Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ( 

25 "Registry", 

26) 

27 

28from collections import defaultdict 

29import contextlib 

30import logging 

31from typing import ( 

32 Any, 

33 Dict, 

34 Iterable, 

35 Iterator, 

36 List, 

37 Mapping, 

38 Optional, 

39 Set, 

40 Type, 

41 TYPE_CHECKING, 

42 Union, 

43) 

44 

45import sqlalchemy 

46 

47from ..core import ( 

48 Config, 

49 DataCoordinate, 

50 DataCoordinateIterable, 

51 DataId, 

52 DatasetRef, 

53 DatasetType, 

54 ddl, 

55 Dimension, 

56 DimensionElement, 

57 DimensionGraph, 

58 DimensionRecord, 

59 DimensionUniverse, 

60 NamedKeyMapping, 

61 NameLookupMapping, 

62 StorageClassFactory, 

63) 

64from . import queries 

65from ..core.utils import doImport, iterable, transactional 

66from ._config import RegistryConfig 

67from ._collectionType import CollectionType 

68from ._exceptions import ConflictingDefinitionError, InconsistentDataIdError, OrphanedRecordError 

69from .wildcards import CategorizedWildcard, CollectionQuery, CollectionSearch, Ellipsis 

70from .interfaces import ChainedCollectionRecord, RunRecord 

71from .versions import ButlerVersionsManager, DigestMismatchError 

72 

73if TYPE_CHECKING: 73 ↛ 74line 73 didn't jump to line 74, because the condition on line 73 was never true

74 from ..butlerConfig import ButlerConfig 

75 from .interfaces import ( 

76 ButlerAttributeManager, 

77 CollectionManager, 

78 Database, 

79 OpaqueTableStorageManager, 

80 DimensionRecordStorageManager, 

81 DatasetRecordStorageManager, 

82 DatastoreRegistryBridgeManager, 

83 ) 

84 

85 

86_LOG = logging.getLogger(__name__) 

87 

88 

89class Registry: 

90 """Registry interface. 

91 

92 Parameters 

93 ---------- 

94 database : `Database` 

95 Database instance to store Registry. 

96 universe : `DimensionUniverse` 

97 Full set of dimensions for Registry. 

98 attributes : `type` 

99 Manager class implementing `ButlerAttributeManager`. 

100 opaque : `type` 

101 Manager class implementing `OpaqueTableStorageManager`. 

102 dimensions : `type` 

103 Manager class implementing `DimensionRecordStorageManager`. 

104 collections : `type` 

105 Manager class implementing `CollectionManager`. 

106 datasets : `type` 

107 Manager class implementing `DatasetRecordStorageManager`. 

108 datastoreBridges : `type` 

109 Manager class implementing `DatastoreRegistryBridgeManager`. 

110 writeable : `bool`, optional 

111 If True then Registry will support write operations. 

112 create : `bool`, optional 

113 If True then database schema will be initialized, it must be empty 

114 before instantiating Registry. 

115 """ 

116 

117 defaultConfigFile: Optional[str] = None 

118 """Path to configuration defaults. Accessed within the ``config`` resource 

119 or relative to a search path. Can be None if no defaults specified. 

120 """ 

121 

122 @classmethod 

123 def fromConfig(cls, config: Union[ButlerConfig, RegistryConfig, Config, str], create: bool = False, 

124 butlerRoot: Optional[str] = None, writeable: bool = True) -> Registry: 

125 """Create `Registry` subclass instance from `config`. 

126 

127 Uses ``registry.cls`` from `config` to determine which subclass to 

128 instantiate. 

129 

130 Parameters 

131 ---------- 

132 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str` 

133 Registry configuration 

134 create : `bool`, optional 

135 Assume empty Registry and create a new one. 

136 butlerRoot : `str`, optional 

137 Path to the repository root this `Registry` will manage. 

138 writeable : `bool`, optional 

139 If `True` (default) create a read-write connection to the database. 

140 

141 Returns 

142 ------- 

143 registry : `Registry` (subclass) 

144 A new `Registry` subclass instance. 

145 """ 

146 if not isinstance(config, RegistryConfig): 

147 if isinstance(config, str) or isinstance(config, Config): 

148 config = RegistryConfig(config) 

149 else: 

150 raise ValueError("Incompatible Registry configuration: {}".format(config)) 

151 config.replaceRoot(butlerRoot) 

152 DatabaseClass = config.getDatabaseClass() 

153 database = DatabaseClass.fromUri(str(config.connectionString), origin=config.get("origin", 0), 

154 namespace=config.get("namespace"), writeable=writeable) 

155 universe = DimensionUniverse(config) 

156 attributes = doImport(config["managers", "attributes"]) 

157 opaque = doImport(config["managers", "opaque"]) 

158 dimensions = doImport(config["managers", "dimensions"]) 

159 collections = doImport(config["managers", "collections"]) 

160 datasets = doImport(config["managers", "datasets"]) 

161 datastoreBridges = doImport(config["managers", "datastores"]) 

162 

163 return cls(database, universe, dimensions=dimensions, attributes=attributes, opaque=opaque, 

164 collections=collections, datasets=datasets, datastoreBridges=datastoreBridges, 

165 writeable=writeable, create=create) 

166 

167 def __init__(self, database: Database, universe: DimensionUniverse, *, 

168 attributes: Type[ButlerAttributeManager], 

169 opaque: Type[OpaqueTableStorageManager], 

170 dimensions: Type[DimensionRecordStorageManager], 

171 collections: Type[CollectionManager], 

172 datasets: Type[DatasetRecordStorageManager], 

173 datastoreBridges: Type[DatastoreRegistryBridgeManager], 

174 writeable: bool = True, 

175 create: bool = False): 

176 self._db = database 

177 self.storageClasses = StorageClassFactory() 

178 with self._db.declareStaticTables(create=create) as context: 

179 self._attributes = attributes.initialize(self._db, context) 

180 self._dimensions = dimensions.initialize(self._db, context, universe=universe) 

181 self._collections = collections.initialize(self._db, context) 

182 self._datasets = datasets.initialize(self._db, context, 

183 collections=self._collections, 

184 universe=self.dimensions) 

185 self._opaque = opaque.initialize(self._db, context) 

186 self._datastoreBridges = datastoreBridges.initialize(self._db, context, 

187 opaque=self._opaque, 

188 datasets=datasets, 

189 universe=self.dimensions) 

190 versions = ButlerVersionsManager( 

191 self._attributes, 

192 dict( 

193 attributes=self._attributes, 

194 opaque=self._opaque, 

195 dimensions=self._dimensions, 

196 collections=self._collections, 

197 datasets=self._datasets, 

198 datastores=self._datastoreBridges, 

199 ) 

200 ) 

201 # store managers and their versions in attributes table 

202 context.addInitializer(lambda db: versions.storeManagersConfig()) 

203 context.addInitializer(lambda db: versions.storeManagersVersions()) 

204 

205 if not create: 

206 # verify that configured versions are compatible with schema 

207 versions.checkManagersConfig() 

208 versions.checkManagersVersions(writeable) 

209 try: 

210 versions.checkManagersDigests() 

211 except DigestMismatchError as exc: 

212 # potentially digest mismatch is a serious error but during 

213 # development it could be benign, treat this as warning for 

214 # now. 

215 _LOG.warning(f"Registry schema digest mismatch: {exc}") 

216 

217 self._collections.refresh() 

218 self._datasets.refresh(universe=self._dimensions.universe) 

219 

220 def __str__(self) -> str: 

221 return str(self._db) 

222 

223 def __repr__(self) -> str: 

224 return f"Registry({self._db!r}, {self.dimensions!r})" 

225 

226 def isWriteable(self) -> bool: 

227 """Return `True` if this registry allows write operations, and `False` 

228 otherwise. 

229 """ 

230 return self._db.isWriteable() 

231 

232 @property 

233 def dimensions(self) -> DimensionUniverse: 

234 """All dimensions recognized by this `Registry` (`DimensionUniverse`). 

235 """ 

236 return self._dimensions.universe 

237 

238 @contextlib.contextmanager 

239 def transaction(self) -> Iterator[None]: 

240 """Return a context manager that represents a transaction. 

241 """ 

242 # TODO make savepoint=False the default. 

243 try: 

244 with self._db.transaction(): 

245 yield 

246 except BaseException: 

247 # TODO: this clears the caches sometimes when we wouldn't actually 

248 # need to. Can we avoid that? 

249 self._dimensions.clearCaches() 

250 raise 

251 

252 def registerOpaqueTable(self, tableName: str, spec: ddl.TableSpec) -> None: 

253 """Add an opaque (to the `Registry`) table for use by a `Datastore` or 

254 other data repository client. 

255 

256 Opaque table records can be added via `insertOpaqueData`, retrieved via 

257 `fetchOpaqueData`, and removed via `deleteOpaqueData`. 

258 

259 Parameters 

260 ---------- 

261 tableName : `str` 

262 Logical name of the opaque table. This may differ from the 

263 actual name used in the database by a prefix and/or suffix. 

264 spec : `ddl.TableSpec` 

265 Specification for the table to be added. 

266 """ 

267 self._opaque.register(tableName, spec) 

268 

269 @transactional 

270 def insertOpaqueData(self, tableName: str, *data: dict) -> None: 

271 """Insert records into an opaque table. 

272 

273 Parameters 

274 ---------- 

275 tableName : `str` 

276 Logical name of the opaque table. Must match the name used in a 

277 previous call to `registerOpaqueTable`. 

278 data 

279 Each additional positional argument is a dictionary that represents 

280 a single row to be added. 

281 """ 

282 self._opaque[tableName].insert(*data) 

283 

284 def fetchOpaqueData(self, tableName: str, **where: Any) -> Iterator[dict]: 

285 """Retrieve records from an opaque table. 

286 

287 Parameters 

288 ---------- 

289 tableName : `str` 

290 Logical name of the opaque table. Must match the name used in a 

291 previous call to `registerOpaqueTable`. 

292 where 

293 Additional keyword arguments are interpreted as equality 

294 constraints that restrict the returned rows (combined with AND); 

295 keyword arguments are column names and values are the values they 

296 must have. 

297 

298 Yields 

299 ------ 

300 row : `dict` 

301 A dictionary representing a single result row. 

302 """ 

303 yield from self._opaque[tableName].fetch(**where) 

304 

305 @transactional 

306 def deleteOpaqueData(self, tableName: str, **where: Any) -> None: 

307 """Remove records from an opaque table. 

308 

309 Parameters 

310 ---------- 

311 tableName : `str` 

312 Logical name of the opaque table. Must match the name used in a 

313 previous call to `registerOpaqueTable`. 

314 where 

315 Additional keyword arguments are interpreted as equality 

316 constraints that restrict the deleted rows (combined with AND); 

317 keyword arguments are column names and values are the values they 

318 must have. 

319 """ 

320 self._opaque[tableName].delete(**where) 

321 

322 def registerCollection(self, name: str, type: CollectionType = CollectionType.TAGGED) -> None: 

323 """Add a new collection if one with the given name does not exist. 

324 

325 Parameters 

326 ---------- 

327 name : `str` 

328 The name of the collection to create. 

329 type : `CollectionType` 

330 Enum value indicating the type of collection to create. 

331 

332 Notes 

333 ----- 

334 This method cannot be called within transactions, as it needs to be 

335 able to perform its own transaction to be concurrent. 

336 """ 

337 self._collections.register(name, type) 

338 

339 def getCollectionType(self, name: str) -> CollectionType: 

340 """Return an enumeration value indicating the type of the given 

341 collection. 

342 

343 Parameters 

344 ---------- 

345 name : `str` 

346 The name of the collection. 

347 

348 Returns 

349 ------- 

350 type : `CollectionType` 

351 Enum value indicating the type of this collection. 

352 

353 Raises 

354 ------ 

355 MissingCollectionError 

356 Raised if no collection with the given name exists. 

357 """ 

358 return self._collections.find(name).type 

359 

360 def registerRun(self, name: str) -> None: 

361 """Add a new run if one with the given name does not exist. 

362 

363 Parameters 

364 ---------- 

365 name : `str` 

366 The name of the run to create. 

367 

368 Notes 

369 ----- 

370 This method cannot be called within transactions, as it needs to be 

371 able to perform its own transaction to be concurrent. 

372 """ 

373 self._collections.register(name, CollectionType.RUN) 

374 

375 @transactional 

376 def removeCollection(self, name: str) -> None: 

377 """Completely remove the given collection. 

378 

379 Parameters 

380 ---------- 

381 name : `str` 

382 The name of the collection to remove. 

383 

384 Raises 

385 ------ 

386 MissingCollectionError 

387 Raised if no collection with the given name exists. 

388 

389 Notes 

390 ----- 

391 If this is a `~CollectionType.RUN` collection, all datasets and quanta 

392 in it are also fully removed. This requires that those datasets be 

393 removed (or at least trashed) from any datastores that hold them first. 

394 

395 A collection may not be deleted as long as it is referenced by a 

396 `~CollectionType.CHAINED` collection; the ``CHAINED`` collection must 

397 be deleted or redefined first. 

398 """ 

399 self._collections.remove(name) 

400 

401 def getCollectionChain(self, parent: str) -> CollectionSearch: 

402 """Return the child collections in a `~CollectionType.CHAINED` 

403 collection. 

404 

405 Parameters 

406 ---------- 

407 parent : `str` 

408 Name of the chained collection. Must have already been added via 

409 a call to `Registry.registerCollection`. 

410 

411 Returns 

412 ------- 

413 children : `CollectionSearch` 

414 An object that defines the search path of the collection. 

415 See :ref:`daf_butler_collection_expressions` for more information. 

416 

417 Raises 

418 ------ 

419 MissingCollectionError 

420 Raised if ``parent`` does not exist in the `Registry`. 

421 TypeError 

422 Raised if ``parent`` does not correspond to a 

423 `~CollectionType.CHAINED` collection. 

424 """ 

425 record = self._collections.find(parent) 

426 if record.type is not CollectionType.CHAINED: 

427 raise TypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

428 assert isinstance(record, ChainedCollectionRecord) 

429 return record.children 

430 

431 @transactional 

432 def setCollectionChain(self, parent: str, children: Any) -> None: 

433 """Define or redefine a `~CollectionType.CHAINED` collection. 

434 

435 Parameters 

436 ---------- 

437 parent : `str` 

438 Name of the chained collection. Must have already been added via 

439 a call to `Registry.registerCollection`. 

440 children : `Any` 

441 An expression defining an ordered search of child collections, 

442 generally an iterable of `str`. Restrictions on the dataset types 

443 to be searched can also be included, by passing mapping or an 

444 iterable containing tuples; see 

445 :ref:`daf_butler_collection_expressions` for more information. 

446 

447 Raises 

448 ------ 

449 MissingCollectionError 

450 Raised when any of the given collections do not exist in the 

451 `Registry`. 

452 TypeError 

453 Raised if ``parent`` does not correspond to a 

454 `~CollectionType.CHAINED` collection. 

455 ValueError 

456 Raised if the given collections contains a cycle. 

457 """ 

458 record = self._collections.find(parent) 

459 if record.type is not CollectionType.CHAINED: 

460 raise TypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

461 assert isinstance(record, ChainedCollectionRecord) 

462 children = CollectionSearch.fromExpression(children) 

463 if children != record.children: 

464 record.update(self._collections, children) 

465 

466 def registerDatasetType(self, datasetType: DatasetType) -> bool: 

467 """ 

468 Add a new `DatasetType` to the Registry. 

469 

470 It is not an error to register the same `DatasetType` twice. 

471 

472 Parameters 

473 ---------- 

474 datasetType : `DatasetType` 

475 The `DatasetType` to be added. 

476 

477 Returns 

478 ------- 

479 inserted : `bool` 

480 `True` if ``datasetType`` was inserted, `False` if an identical 

481 existing `DatsetType` was found. Note that in either case the 

482 DatasetType is guaranteed to be defined in the Registry 

483 consistently with the given definition. 

484 

485 Raises 

486 ------ 

487 ValueError 

488 Raised if the dimensions or storage class are invalid. 

489 ConflictingDefinitionError 

490 Raised if this DatasetType is already registered with a different 

491 definition. 

492 

493 Notes 

494 ----- 

495 This method cannot be called within transactions, as it needs to be 

496 able to perform its own transaction to be concurrent. 

497 """ 

498 _, inserted = self._datasets.register(datasetType) 

499 return inserted 

500 

501 def getDatasetType(self, name: str) -> DatasetType: 

502 """Get the `DatasetType`. 

503 

504 Parameters 

505 ---------- 

506 name : `str` 

507 Name of the type. 

508 

509 Returns 

510 ------- 

511 type : `DatasetType` 

512 The `DatasetType` associated with the given name. 

513 

514 Raises 

515 ------ 

516 KeyError 

517 Requested named DatasetType could not be found in registry. 

518 """ 

519 return self._datasets[name].datasetType 

520 

521 def findDataset(self, datasetType: Union[DatasetType, str], dataId: Optional[DataId] = None, *, 

522 collections: Any, **kwargs: Any) -> Optional[DatasetRef]: 

523 """Find a dataset given its `DatasetType` and data ID. 

524 

525 This can be used to obtain a `DatasetRef` that permits the dataset to 

526 be read from a `Datastore`. If the dataset is a component and can not 

527 be found using the provided dataset type, a dataset ref for the parent 

528 will be returned instead but with the correct dataset type. 

529 

530 Parameters 

531 ---------- 

532 datasetType : `DatasetType` or `str` 

533 A `DatasetType` or the name of one. 

534 dataId : `dict` or `DataCoordinate`, optional 

535 A `dict`-like object containing the `Dimension` links that identify 

536 the dataset within a collection. 

537 collections 

538 An expression that fully or partially identifies the collections 

539 to search for the dataset, such as a `str`, `DatasetType`, or 

540 iterable thereof. See :ref:`daf_butler_collection_expressions` 

541 for more information. 

542 **kwargs 

543 Additional keyword arguments passed to 

544 `DataCoordinate.standardize` to convert ``dataId`` to a true 

545 `DataCoordinate` or augment an existing one. 

546 

547 Returns 

548 ------- 

549 ref : `DatasetRef` 

550 A reference to the dataset, or `None` if no matching Dataset 

551 was found. 

552 

553 Raises 

554 ------ 

555 LookupError 

556 Raised if one or more data ID keys are missing. 

557 KeyError 

558 Raised if the dataset type does not exist. 

559 MissingCollectionError 

560 Raised if any of ``collections`` does not exist in the registry. 

561 """ 

562 if isinstance(datasetType, DatasetType): 

563 storage = self._datasets[datasetType.name] 

564 else: 

565 storage = self._datasets[datasetType] 

566 dataId = DataCoordinate.standardize(dataId, graph=storage.datasetType.dimensions, 

567 universe=self.dimensions, **kwargs) 

568 collections = CollectionSearch.fromExpression(collections) 

569 for collectionRecord in collections.iter(self._collections, datasetType=storage.datasetType): 

570 result = storage.find(collectionRecord, dataId) 

571 if result is not None: 

572 return result 

573 

574 return None 

575 

576 @transactional 

577 def insertDatasets(self, datasetType: Union[DatasetType, str], dataIds: Iterable[DataId], 

578 run: str) -> List[DatasetRef]: 

579 """Insert one or more datasets into the `Registry` 

580 

581 This always adds new datasets; to associate existing datasets with 

582 a new collection, use ``associate``. 

583 

584 Parameters 

585 ---------- 

586 datasetType : `DatasetType` or `str` 

587 A `DatasetType` or the name of one. 

588 dataIds : `~collections.abc.Iterable` of `dict` or `DataCoordinate` 

589 Dimension-based identifiers for the new datasets. 

590 run : `str` 

591 The name of the run that produced the datasets. 

592 

593 Returns 

594 ------- 

595 refs : `list` of `DatasetRef` 

596 Resolved `DatasetRef` instances for all given data IDs (in the same 

597 order). 

598 

599 Raises 

600 ------ 

601 ConflictingDefinitionError 

602 If a dataset with the same dataset type and data ID as one of those 

603 given already exists in ``run``. 

604 MissingCollectionError 

605 Raised if ``run`` does not exist in the registry. 

606 """ 

607 if isinstance(datasetType, DatasetType): 

608 storage = self._datasets.find(datasetType.name) 

609 if storage is None: 

610 raise LookupError(f"DatasetType '{datasetType}' has not been registered.") 

611 else: 

612 storage = self._datasets.find(datasetType) 

613 if storage is None: 

614 raise LookupError(f"DatasetType with name '{datasetType}' has not been registered.") 

615 runRecord = self._collections.find(run) 

616 if runRecord.type is not CollectionType.RUN: 

617 raise TypeError("Given collection is of type {runRecord.type.name}; RUN collection required.") 

618 assert isinstance(runRecord, RunRecord) 

619 expandedDataIds = [self.expandDataId(dataId, graph=storage.datasetType.dimensions) 

620 for dataId in dataIds] 

621 try: 

622 refs = list(storage.insert(runRecord, expandedDataIds)) 

623 except sqlalchemy.exc.IntegrityError as err: 

624 raise ConflictingDefinitionError(f"A database constraint failure was triggered by inserting " 

625 f"one or more datasets of type {storage.datasetType} into " 

626 f"collection '{run}'. " 

627 f"This probably means a dataset with the same data ID " 

628 f"and dataset type already exists, but it may also mean a " 

629 f"dimension row is missing.") from err 

630 return refs 

631 

632 def getDataset(self, id: int) -> Optional[DatasetRef]: 

633 """Retrieve a Dataset entry. 

634 

635 Parameters 

636 ---------- 

637 id : `int` 

638 The unique identifier for the dataset. 

639 

640 Returns 

641 ------- 

642 ref : `DatasetRef` or `None` 

643 A ref to the Dataset, or `None` if no matching Dataset 

644 was found. 

645 """ 

646 ref = self._datasets.getDatasetRef(id, universe=self.dimensions) 

647 if ref is None: 

648 return None 

649 return ref 

650 

651 @transactional 

652 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None: 

653 """Remove datasets from the Registry. 

654 

655 The datasets will be removed unconditionally from all collections, and 

656 any `Quantum` that consumed this dataset will instead be marked with 

657 having a NULL input. `Datastore` records will *not* be deleted; the 

658 caller is responsible for ensuring that the dataset has already been 

659 removed from all Datastores. 

660 

661 Parameters 

662 ---------- 

663 refs : `Iterable` of `DatasetRef` 

664 References to the datasets to be removed. Must include a valid 

665 ``id`` attribute, and should be considered invalidated upon return. 

666 

667 Raises 

668 ------ 

669 AmbiguousDatasetError 

670 Raised if any ``ref.id`` is `None`. 

671 OrphanedRecordError 

672 Raised if any dataset is still present in any `Datastore`. 

673 """ 

674 for datasetType, refsForType in DatasetRef.groupByType(refs).items(): 

675 storage = self._datasets.find(datasetType.name) 

676 assert storage is not None 

677 try: 

678 storage.delete(refsForType) 

679 except sqlalchemy.exc.IntegrityError as err: 

680 raise OrphanedRecordError("One or more datasets is still " 

681 "present in one or more Datastores.") from err 

682 

683 @transactional 

684 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

685 """Add existing datasets to a `~CollectionType.TAGGED` collection. 

686 

687 If a DatasetRef with the same exact integer ID is already in a 

688 collection nothing is changed. If a `DatasetRef` with the same 

689 `DatasetType` and data ID but with different integer ID 

690 exists in the collection, `ConflictingDefinitionError` is raised. 

691 

692 Parameters 

693 ---------- 

694 collection : `str` 

695 Indicates the collection the datasets should be associated with. 

696 refs : `Iterable` [ `DatasetRef` ] 

697 An iterable of resolved `DatasetRef` instances that already exist 

698 in this `Registry`. 

699 

700 Raises 

701 ------ 

702 ConflictingDefinitionError 

703 If a Dataset with the given `DatasetRef` already exists in the 

704 given collection. 

705 AmbiguousDatasetError 

706 Raised if ``any(ref.id is None for ref in refs)``. 

707 MissingCollectionError 

708 Raised if ``collection`` does not exist in the registry. 

709 TypeError 

710 Raise adding new datasets to the given ``collection`` is not 

711 allowed. 

712 """ 

713 collectionRecord = self._collections.find(collection) 

714 if collectionRecord.type is not CollectionType.TAGGED: 

715 raise TypeError(f"Collection '{collection}' has type {collectionRecord.type.name}, not TAGGED.") 

716 for datasetType, refsForType in DatasetRef.groupByType(refs).items(): 

717 storage = self._datasets.find(datasetType.name) 

718 assert storage is not None 

719 try: 

720 storage.associate(collectionRecord, refsForType) 

721 except sqlalchemy.exc.IntegrityError as err: 

722 raise ConflictingDefinitionError( 

723 f"Constraint violation while associating dataset of type {datasetType.name} with " 

724 f"collection {collection}. This probably means that one or more datasets with the same " 

725 f"dataset type and data ID already exist in the collection, but it may also indicate " 

726 f"that the datasets do not exist." 

727 ) from err 

728 

729 @transactional 

730 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

731 """Remove existing datasets from a `~CollectionType.TAGGED` collection. 

732 

733 ``collection`` and ``ref`` combinations that are not currently 

734 associated are silently ignored. 

735 

736 Parameters 

737 ---------- 

738 collection : `str` 

739 The collection the datasets should no longer be associated with. 

740 refs : `Iterable` [ `DatasetRef` ] 

741 An iterable of resolved `DatasetRef` instances that already exist 

742 in this `Registry`. 

743 

744 Raises 

745 ------ 

746 AmbiguousDatasetError 

747 Raised if any of the given dataset references is unresolved. 

748 MissingCollectionError 

749 Raised if ``collection`` does not exist in the registry. 

750 TypeError 

751 Raise adding new datasets to the given ``collection`` is not 

752 allowed. 

753 """ 

754 collectionRecord = self._collections.find(collection) 

755 if collectionRecord.type is not CollectionType.TAGGED: 

756 raise TypeError(f"Collection '{collection}' has type {collectionRecord.type.name}; " 

757 "expected TAGGED.") 

758 for datasetType, refsForType in DatasetRef.groupByType(refs).items(): 

759 storage = self._datasets.find(datasetType.name) 

760 assert storage is not None 

761 storage.disassociate(collectionRecord, refsForType) 

762 

763 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager: 

764 """Return an object that allows a new `Datastore` instance to 

765 communicate with this `Registry`. 

766 

767 Returns 

768 ------- 

769 manager : `DatastoreRegistryBridgeManager` 

770 Object that mediates communication between this `Registry` and its 

771 associated datastores. 

772 """ 

773 return self._datastoreBridges 

774 

775 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]: 

776 """Retrieve datastore locations for a given dataset. 

777 

778 Parameters 

779 ---------- 

780 ref : `DatasetRef` 

781 A reference to the dataset for which to retrieve storage 

782 information. 

783 

784 Returns 

785 ------- 

786 datastores : `Iterable` [ `str` ] 

787 All the matching datastores holding this dataset. 

788 

789 Raises 

790 ------ 

791 AmbiguousDatasetError 

792 Raised if ``ref.id`` is `None`. 

793 """ 

794 return self._datastoreBridges.findDatastores(ref) 

795 

796 def expandDataId(self, dataId: Optional[DataId] = None, *, graph: Optional[DimensionGraph] = None, 

797 records: Optional[NameLookupMapping[DimensionElement, Optional[DimensionRecord]]] = None, 

798 **kwargs: Any) -> DataCoordinate: 

799 """Expand a dimension-based data ID to include additional information. 

800 

801 Parameters 

802 ---------- 

803 dataId : `DataCoordinate` or `dict`, optional 

804 Data ID to be expanded; augmented and overridden by ``kwds``. 

805 graph : `DimensionGraph`, optional 

806 Set of dimensions for the expanded ID. If `None`, the dimensions 

807 will be inferred from the keys of ``dataId`` and ``kwds``. 

808 Dimensions that are in ``dataId`` or ``kwds`` but not in ``graph`` 

809 are silently ignored, providing a way to extract and expand a 

810 subset of a data ID. 

811 records : `Mapping` [`str`, `DimensionRecord`], optional 

812 Dimension record data to use before querying the database for that 

813 data, keyed by element name. 

814 **kwargs 

815 Additional keywords are treated like additional key-value pairs for 

816 ``dataId``, extending and overriding 

817 

818 Returns 

819 ------- 

820 expanded : `DataCoordinate` 

821 A data ID that includes full metadata for all of the dimensions it 

822 identifieds, i.e. guarantees that ``expanded.hasRecords()`` and 

823 ``expanded.hasFull()`` both return `True`. 

824 """ 

825 standardized = DataCoordinate.standardize(dataId, graph=graph, universe=self.dimensions, **kwargs) 

826 if standardized.hasRecords(): 

827 return standardized 

828 if records is None: 

829 records = {} 

830 elif isinstance(records, NamedKeyMapping): 

831 records = records.byName() 

832 else: 

833 records = dict(records) 

834 if isinstance(dataId, DataCoordinate) and dataId.hasRecords(): 

835 records.update(dataId.records.byName()) 

836 keys = standardized.byName() 

837 for element in standardized.graph.primaryKeyTraversalOrder: 

838 record = records.get(element.name, ...) # Use ... to mean not found; None might mean NULL 

839 if record is ...: 

840 if isinstance(element, Dimension) and keys.get(element.name) is None: 

841 if element in standardized.graph.required: 

842 raise LookupError( 

843 f"No value or null value for required dimension {element.name}." 

844 ) 

845 keys[element.name] = None 

846 record = None 

847 else: 

848 storage = self._dimensions[element] 

849 dataIdSet = DataCoordinateIterable.fromScalar( 

850 DataCoordinate.standardize(keys, graph=element.graph) 

851 ) 

852 fetched = tuple(storage.fetch(dataIdSet)) 

853 try: 

854 (record,) = fetched 

855 except ValueError: 

856 record = None 

857 records[element.name] = record 

858 if record is not None: 

859 for d in element.implied: 

860 value = getattr(record, d.name) 

861 if keys.setdefault(d.name, value) != value: 

862 raise InconsistentDataIdError( 

863 f"Data ID {standardized} has {d.name}={keys[d.name]!r}, " 

864 f"but {element.name} implies {d.name}={value!r}." 

865 ) 

866 else: 

867 if element in standardized.graph.required: 

868 raise LookupError( 

869 f"Could not fetch record for required dimension {element.name} via keys {keys}." 

870 ) 

871 if element.alwaysJoin: 

872 raise InconsistentDataIdError( 

873 f"Could not fetch record for element {element.name} via keys {keys}, ", 

874 "but it is marked alwaysJoin=True; this means one or more dimensions are not " 

875 "related." 

876 ) 

877 for d in element.implied: 

878 keys.setdefault(d.name, None) 

879 records.setdefault(d.name, None) 

880 return DataCoordinate.standardize(keys, graph=standardized.graph).expanded(records=records) 

881 

882 def insertDimensionData(self, element: Union[DimensionElement, str], 

883 *data: Union[Mapping[str, Any], DimensionRecord], 

884 conform: bool = True) -> None: 

885 """Insert one or more dimension records into the database. 

886 

887 Parameters 

888 ---------- 

889 element : `DimensionElement` or `str` 

890 The `DimensionElement` or name thereof that identifies the table 

891 records will be inserted into. 

892 data : `dict` or `DimensionRecord` (variadic) 

893 One or more records to insert. 

894 conform : `bool`, optional 

895 If `False` (`True` is default) perform no checking or conversions, 

896 and assume that ``element`` is a `DimensionElement` instance and 

897 ``data`` is a one or more `DimensionRecord` instances of the 

898 appropriate subclass. 

899 """ 

900 if conform: 

901 if isinstance(element, str): 

902 element = self.dimensions[element] 

903 records = [row if isinstance(row, DimensionRecord) else element.RecordClass.fromDict(row) 

904 for row in data] 

905 else: 

906 # Ignore typing since caller said to trust them with conform=False. 

907 records = data # type: ignore 

908 storage = self._dimensions[element] # type: ignore 

909 storage.insert(*records) 

910 

911 def syncDimensionData(self, element: Union[DimensionElement, str], 

912 row: Union[Mapping[str, Any], DimensionRecord], 

913 conform: bool = True) -> bool: 

914 """Synchronize the given dimension record with the database, inserting 

915 if it does not already exist and comparing values if it does. 

916 

917 Parameters 

918 ---------- 

919 element : `DimensionElement` or `str` 

920 The `DimensionElement` or name thereof that identifies the table 

921 records will be inserted into. 

922 row : `dict` or `DimensionRecord` 

923 The record to insert. 

924 conform : `bool`, optional 

925 If `False` (`True` is default) perform no checking or conversions, 

926 and assume that ``element`` is a `DimensionElement` instance and 

927 ``data`` is a one or more `DimensionRecord` instances of the 

928 appropriate subclass. 

929 

930 Returns 

931 ------- 

932 inserted : `bool` 

933 `True` if a new row was inserted, `False` otherwise. 

934 

935 Raises 

936 ------ 

937 ConflictingDefinitionError 

938 Raised if the record exists in the database (according to primary 

939 key lookup) but is inconsistent with the given one. 

940 

941 Notes 

942 ----- 

943 This method cannot be called within transactions, as it needs to be 

944 able to perform its own transaction to be concurrent. 

945 """ 

946 if conform: 

947 if isinstance(element, str): 

948 element = self.dimensions[element] 

949 record = row if isinstance(row, DimensionRecord) else element.RecordClass.fromDict(row) 

950 else: 

951 # Ignore typing since caller said to trust them with conform=False. 

952 record = row # type: ignore 

953 storage = self._dimensions[element] # type: ignore 

954 return storage.sync(record) 

955 

956 def queryDatasetTypes(self, expression: Any = ..., *, components: Optional[bool] = None 

957 ) -> Iterator[DatasetType]: 

958 """Iterate over the dataset types whose names match an expression. 

959 

960 Parameters 

961 ---------- 

962 expression : `Any`, optional 

963 An expression that fully or partially identifies the dataset types 

964 to return, such as a `str`, `re.Pattern`, or iterable thereof. 

965 `...` can be used to return all dataset types, and is the default. 

966 See :ref:`daf_butler_dataset_type_expressions` for more 

967 information. 

968 components : `bool`, optional 

969 If `True`, apply all expression patterns to component dataset type 

970 names as well. If `False`, never apply patterns to components. 

971 If `None` (default), apply patterns to components only if their 

972 parent datasets were not matched by the expression. 

973 Fully-specified component datasets (`str` or `DatasetType` 

974 instances) are always included. 

975 

976 Yields 

977 ------ 

978 datasetType : `DatasetType` 

979 A `DatasetType` instance whose name matches ``expression``. 

980 """ 

981 wildcard = CategorizedWildcard.fromExpression(expression, coerceUnrecognized=lambda d: d.name) 

982 if wildcard is Ellipsis: 

983 for datasetType in self._datasets: 

984 # The dataset type can no longer be a component 

985 yield datasetType 

986 if components and datasetType.isComposite(): 

987 # Automatically create the component dataset types 

988 for component in datasetType.makeAllComponentDatasetTypes(): 

989 yield component 

990 return 

991 done: Set[str] = set() 

992 for name in wildcard.strings: 

993 storage = self._datasets.find(name) 

994 if storage is not None: 

995 done.add(storage.datasetType.name) 

996 yield storage.datasetType 

997 if wildcard.patterns: 

998 # If components (the argument) is None, we'll save component 

999 # dataset that we might want to match, but only if their parents 

1000 # didn't get included. 

1001 componentsForLater = [] 

1002 for registeredDatasetType in self._datasets: 

1003 # Components are not stored in registry so expand them here 

1004 allDatasetTypes = [registeredDatasetType] \ 

1005 + registeredDatasetType.makeAllComponentDatasetTypes() 

1006 for datasetType in allDatasetTypes: 

1007 if datasetType.name in done: 

1008 continue 

1009 parentName, componentName = datasetType.nameAndComponent() 

1010 if componentName is not None and not components: 

1011 if components is None and parentName not in done: 

1012 componentsForLater.append(datasetType) 

1013 continue 

1014 if any(p.fullmatch(datasetType.name) for p in wildcard.patterns): 

1015 done.add(datasetType.name) 

1016 yield datasetType 

1017 # Go back and try to match saved components. 

1018 for datasetType in componentsForLater: 

1019 parentName, _ = datasetType.nameAndComponent() 

1020 if parentName not in done and any(p.fullmatch(datasetType.name) for p in wildcard.patterns): 

1021 yield datasetType 

1022 

1023 def queryCollections(self, expression: Any = ..., 

1024 datasetType: Optional[DatasetType] = None, 

1025 collectionType: Optional[CollectionType] = None, 

1026 flattenChains: bool = False, 

1027 includeChains: Optional[bool] = None) -> Iterator[str]: 

1028 """Iterate over the collections whose names match an expression. 

1029 

1030 Parameters 

1031 ---------- 

1032 expression : `Any`, optional 

1033 An expression that fully or partially identifies the collections 

1034 to return, such as a `str`, `re.Pattern`, or iterable thereof. 

1035 `...` can be used to return all collections, and is the default. 

1036 See :ref:`daf_butler_collection_expressions` for more 

1037 information. 

1038 datasetType : `DatasetType`, optional 

1039 If provided, only yield collections that should be searched for 

1040 this dataset type according to ``expression``. If this is 

1041 not provided, any dataset type restrictions in ``expression`` are 

1042 ignored. 

1043 collectionType : `CollectionType`, optional 

1044 If provided, only yield collections of this type. 

1045 flattenChains : `bool`, optional 

1046 If `True` (`False` is default), recursively yield the child 

1047 collections of matching `~CollectionType.CHAINED` collections. 

1048 includeChains : `bool`, optional 

1049 If `True`, yield records for matching `~CollectionType.CHAINED` 

1050 collections. Default is the opposite of ``flattenChains``: include 

1051 either CHAINED collections or their children, but not both. 

1052 

1053 Yields 

1054 ------ 

1055 collection : `str` 

1056 The name of a collection that matches ``expression``. 

1057 """ 

1058 query = CollectionQuery.fromExpression(expression) 

1059 for record in query.iter(self._collections, datasetType=datasetType, collectionType=collectionType, 

1060 flattenChains=flattenChains, includeChains=includeChains): 

1061 yield record.name 

1062 

1063 def makeQueryBuilder(self, summary: queries.QuerySummary) -> queries.QueryBuilder: 

1064 """Return a `QueryBuilder` instance capable of constructing and 

1065 managing more complex queries than those obtainable via `Registry` 

1066 interfaces. 

1067 

1068 This is an advanced interface; downstream code should prefer 

1069 `Registry.queryDataIds` and `Registry.queryDatasets` whenever those 

1070 are sufficient. 

1071 

1072 Parameters 

1073 ---------- 

1074 summary : `queries.QuerySummary` 

1075 Object describing and categorizing the full set of dimensions that 

1076 will be included in the query. 

1077 

1078 Returns 

1079 ------- 

1080 builder : `queries.QueryBuilder` 

1081 Object that can be used to construct and perform advanced queries. 

1082 """ 

1083 return queries.QueryBuilder( 

1084 summary, 

1085 queries.RegistryManagers( 

1086 collections=self._collections, 

1087 dimensions=self._dimensions, 

1088 datasets=self._datasets 

1089 ) 

1090 ) 

1091 

1092 def queryDatasets(self, datasetType: Any, *, 

1093 collections: Any, 

1094 dimensions: Optional[Iterable[Union[Dimension, str]]] = None, 

1095 dataId: Optional[DataId] = None, 

1096 where: Optional[str] = None, 

1097 deduplicate: bool = False, 

1098 components: Optional[bool] = None, 

1099 **kwargs: Any) -> queries.DatasetQueryResults: 

1100 """Query for and iterate over dataset references matching user-provided 

1101 criteria. 

1102 

1103 Parameters 

1104 ---------- 

1105 datasetType 

1106 An expression that fully or partially identifies the dataset types 

1107 to be queried. Allowed types include `DatasetType`, `str`, 

1108 `re.Pattern`, and iterables thereof. The special value `...` can 

1109 be used to query all dataset types. See 

1110 :ref:`daf_butler_dataset_type_expressions` for more information. 

1111 collections 

1112 An expression that fully or partially identifies the collections 

1113 to search for datasets, such as a `str`, `re.Pattern`, or iterable 

1114 thereof. `...` can be used to return all collections. See 

1115 :ref:`daf_butler_collection_expressions` for more information. 

1116 dimensions : `~collections.abc.Iterable` of `Dimension` or `str` 

1117 Dimensions to include in the query (in addition to those used 

1118 to identify the queried dataset type(s)), either to constrain 

1119 the resulting datasets to those for which a matching dimension 

1120 exists, or to relate the dataset type's dimensions to dimensions 

1121 referenced by the ``dataId`` or ``where`` arguments. 

1122 dataId : `dict` or `DataCoordinate`, optional 

1123 A data ID whose key-value pairs are used as equality constraints 

1124 in the query. 

1125 where : `str`, optional 

1126 A string expression similar to a SQL WHERE clause. May involve 

1127 any column of a dimension table or (as a shortcut for the primary 

1128 key column of a dimension table) dimension name. See 

1129 :ref:`daf_butler_dimension_expressions` for more information. 

1130 deduplicate : `bool`, optional 

1131 If `True` (`False` is default), for each result data ID, only 

1132 yield one `DatasetRef` of each `DatasetType`, from the first 

1133 collection in which a dataset of that dataset type appears 

1134 (according to the order of ``collections`` passed in). If `True`, 

1135 ``collections`` must not contain regular expressions and may not 

1136 be `...`. 

1137 components : `bool`, optional 

1138 If `True`, apply all dataset expression patterns to component 

1139 dataset type names as well. If `False`, never apply patterns to 

1140 components. If `None` (default), apply patterns to components only 

1141 if their parent datasets were not matched by the expression. 

1142 Fully-specified component datasets (`str` or `DatasetType` 

1143 instances) are always included. 

1144 **kwargs 

1145 Additional keyword arguments are forwarded to 

1146 `DataCoordinate.standardize` when processing the ``dataId`` 

1147 argument (and may be used to provide a constraining data ID even 

1148 when the ``dataId`` argument is `None`). 

1149 

1150 Returns 

1151 ------- 

1152 refs : `queries.DatasetQueryResults` 

1153 Dataset references matching the given query criteria. 

1154 

1155 Raises 

1156 ------ 

1157 TypeError 

1158 Raised when the arguments are incompatible, such as when a 

1159 collection wildcard is passed when ``deduplicate`` is `True`. 

1160 

1161 Notes 

1162 ----- 

1163 When multiple dataset types are queried in a single call, the 

1164 results of this operation are equivalent to querying for each dataset 

1165 type separately in turn, and no information about the relationships 

1166 between datasets of different types is included. In contexts where 

1167 that kind of information is important, the recommended pattern is to 

1168 use `queryDataIds` to first obtain data IDs (possibly with the 

1169 desired dataset types and collections passed as constraints to the 

1170 query), and then use multiple (generally much simpler) calls to 

1171 `queryDatasets` with the returned data IDs passed as constraints. 

1172 """ 

1173 # Standardize the collections expression. 

1174 if deduplicate: 

1175 collections = CollectionSearch.fromExpression(collections) 

1176 else: 

1177 collections = CollectionQuery.fromExpression(collections) 

1178 # Standardize and expand the data ID provided as a constraint. 

1179 standardizedDataId = self.expandDataId(dataId, **kwargs) 

1180 

1181 # We can only query directly if given a non-component DatasetType 

1182 # instance. If we were given an expression or str or a component 

1183 # DatasetType instance, we'll populate this dict, recurse, and return. 

1184 # If we already have a non-component DatasetType, it will remain None 

1185 # and we'll run the query directly. 

1186 composition: Optional[ 

1187 Dict[ 

1188 DatasetType, # parent dataset type 

1189 List[Optional[str]] # component name, or None for parent 

1190 ] 

1191 ] = None 

1192 if not isinstance(datasetType, DatasetType): 

1193 # We were given a dataset type expression (which may be as simple 

1194 # as a str). Loop over all matching datasets, delegating handling 

1195 # of the `components` argument to queryDatasetTypes, as we populate 

1196 # the composition dict. 

1197 composition = defaultdict(list) 

1198 for trueDatasetType in self.queryDatasetTypes(datasetType, components=components): 

1199 parentName, componentName = trueDatasetType.nameAndComponent() 

1200 if componentName is not None: 

1201 parentDatasetType = self.getDatasetType(parentName) 

1202 composition.setdefault(parentDatasetType, []).append(componentName) 

1203 else: 

1204 composition.setdefault(trueDatasetType, []).append(None) 

1205 elif datasetType.isComponent(): 

1206 # We were given a true DatasetType instance, but it's a component. 

1207 # the composition dict will have exactly one item. 

1208 parentName, componentName = datasetType.nameAndComponent() 

1209 parentDatasetType = self.getDatasetType(parentName) 

1210 composition = {parentDatasetType: [componentName]} 

1211 if composition is not None: 

1212 # We need to recurse. Do that once for each parent dataset type. 

1213 chain = [] 

1214 for parentDatasetType, componentNames in composition.items(): 

1215 parentResults = self.queryDatasets( 

1216 parentDatasetType, 

1217 collections=collections, 

1218 dimensions=dimensions, 

1219 dataId=standardizedDataId, 

1220 where=where, 

1221 deduplicate=deduplicate 

1222 ) 

1223 if isinstance(parentResults, queries.ParentDatasetQueryResults): 

1224 chain.append( 

1225 parentResults.withComponents(componentNames) 

1226 ) 

1227 else: 

1228 # Should only happen if we know there would be no results. 

1229 assert isinstance(parentResults, queries.ChainedDatasetQueryResults) \ 

1230 and not parentResults._chain 

1231 return queries.ChainedDatasetQueryResults(chain) 

1232 # If we get here, there's no need to recurse (or we are already 

1233 # recursing; there can only ever be one level of recursion). 

1234 

1235 # The full set of dimensions in the query is the combination of those 

1236 # needed for the DatasetType and those explicitly requested, if any. 

1237 requestedDimensionNames = set(datasetType.dimensions.names) 

1238 if dimensions is not None: 

1239 requestedDimensionNames.update(self.dimensions.extract(dimensions).names) 

1240 # Construct the summary structure needed to construct a QueryBuilder. 

1241 summary = queries.QuerySummary( 

1242 requested=DimensionGraph(self.dimensions, names=requestedDimensionNames), 

1243 dataId=standardizedDataId, 

1244 expression=where, 

1245 ) 

1246 builder = self.makeQueryBuilder(summary) 

1247 # Add the dataset subquery to the query, telling the QueryBuilder to 

1248 # include the rank of the selected collection in the results only if we 

1249 # need to deduplicate. Note that if any of the collections are 

1250 # actually wildcard expressions, and we've asked for deduplication, 

1251 # this will raise TypeError for us. 

1252 if not builder.joinDataset(datasetType, collections, isResult=True, deduplicate=deduplicate): 

1253 return queries.ChainedDatasetQueryResults(()) 

1254 query = builder.finish() 

1255 return queries.ParentDatasetQueryResults(self._db, query, components=[None]) 

1256 

1257 def queryDataIds(self, dimensions: Union[Iterable[Union[Dimension, str]], Dimension, str], *, 

1258 dataId: Optional[DataId] = None, 

1259 datasets: Any = None, 

1260 collections: Any = None, 

1261 where: Optional[str] = None, 

1262 components: Optional[bool] = None, 

1263 **kwargs: Any) -> queries.DataCoordinateQueryResults: 

1264 """Query for data IDs matching user-provided criteria. 

1265 

1266 Parameters 

1267 ---------- 

1268 dimensions : `Dimension` or `str`, or iterable thereof 

1269 The dimensions of the data IDs to yield, as either `Dimension` 

1270 instances or `str`. Will be automatically expanded to a complete 

1271 `DimensionGraph`. 

1272 dataId : `dict` or `DataCoordinate`, optional 

1273 A data ID whose key-value pairs are used as equality constraints 

1274 in the query. 

1275 datasets : `Any`, optional 

1276 An expression that fully or partially identifies dataset types 

1277 that should constrain the yielded data IDs. For example, including 

1278 "raw" here would constrain the yielded ``instrument``, 

1279 ``exposure``, ``detector``, and ``physical_filter`` values to only 

1280 those for which at least one "raw" dataset exists in 

1281 ``collections``. Allowed types include `DatasetType`, `str`, 

1282 `re.Pattern`, and iterables thereof. Unlike other dataset type 

1283 expressions, ``...`` is not permitted - it doesn't make sense to 

1284 constrain data IDs on the existence of *all* datasets. 

1285 See :ref:`daf_butler_dataset_type_expressions` for more 

1286 information. 

1287 collections: `Any`, optional 

1288 An expression that fully or partially identifies the collections 

1289 to search for datasets, such as a `str`, `re.Pattern`, or iterable 

1290 thereof. `...` can be used to return all collections. Must be 

1291 provided if ``datasets`` is, and is ignored if it is not. See 

1292 :ref:`daf_butler_collection_expressions` for more information. 

1293 where : `str`, optional 

1294 A string expression similar to a SQL WHERE clause. May involve 

1295 any column of a dimension table or (as a shortcut for the primary 

1296 key column of a dimension table) dimension name. See 

1297 :ref:`daf_butler_dimension_expressions` for more information. 

1298 components : `bool`, optional 

1299 If `True`, apply all dataset expression patterns to component 

1300 dataset type names as well. If `False`, never apply patterns to 

1301 components. If `None` (default), apply patterns to components only 

1302 if their parent datasets were not matched by the expression. 

1303 Fully-specified component datasets (`str` or `DatasetType` 

1304 instances) are always included. 

1305 **kwargs 

1306 Additional keyword arguments are forwarded to 

1307 `DataCoordinate.standardize` when processing the ``dataId`` 

1308 argument (and may be used to provide a constraining data ID even 

1309 when the ``dataId`` argument is `None`). 

1310 

1311 Returns 

1312 ------- 

1313 dataIds : `DataCoordinateQueryResults` 

1314 Data IDs matching the given query parameters. These are guaranteed 

1315 to identify all dimensions (`DataCoordinate.hasFull` returns 

1316 `True`), but will not contain `DimensionRecord` objects 

1317 (`DataCoordinate.hasRecords` returns `False`). Call 

1318 `DataCoordinateQueryResults.expanded` on the returned object to 

1319 fetch those (and consider using 

1320 `DataCoordinateQueryResults.materialize` on the returned object 

1321 first if the expected number of rows is very large). See 

1322 documentation for those methods for additional information. 

1323 """ 

1324 dimensions = iterable(dimensions) 

1325 standardizedDataId = self.expandDataId(dataId, **kwargs) 

1326 standardizedDatasetTypes = set() 

1327 requestedDimensions = self.dimensions.extract(dimensions) 

1328 queryDimensionNames = set(requestedDimensions.names) 

1329 if datasets is not None: 

1330 if collections is None: 

1331 raise TypeError("Cannot pass 'datasets' without 'collections'.") 

1332 for datasetType in self.queryDatasetTypes(datasets, components=components): 

1333 queryDimensionNames.update(datasetType.dimensions.names) 

1334 # If any matched dataset type is a component, just operate on 

1335 # its parent instead, because Registry doesn't know anything 

1336 # about what components exist, and here (unlike queryDatasets) 

1337 # we don't care about returning them. 

1338 parentDatasetTypeName, componentName = datasetType.nameAndComponent() 

1339 if componentName is not None: 

1340 datasetType = self.getDatasetType(parentDatasetTypeName) 

1341 standardizedDatasetTypes.add(datasetType) 

1342 # Preprocess collections expression in case the original included 

1343 # single-pass iterators (we'll want to use it multiple times 

1344 # below). 

1345 collections = CollectionQuery.fromExpression(collections) 

1346 

1347 summary = queries.QuerySummary( 

1348 requested=DimensionGraph(self.dimensions, names=queryDimensionNames), 

1349 dataId=standardizedDataId, 

1350 expression=where, 

1351 ) 

1352 builder = self.makeQueryBuilder(summary) 

1353 for datasetType in standardizedDatasetTypes: 

1354 builder.joinDataset(datasetType, collections, isResult=False) 

1355 query = builder.finish() 

1356 return queries.DataCoordinateQueryResults(self._db, query) 

1357 

1358 def queryDimensionRecords(self, element: Union[DimensionElement, str], *, 

1359 dataId: Optional[DataId] = None, 

1360 datasets: Any = None, 

1361 collections: Any = None, 

1362 where: Optional[str] = None, 

1363 components: Optional[bool] = None, 

1364 **kwargs: Any) -> Iterator[DimensionRecord]: 

1365 """Query for dimension information matching user-provided criteria. 

1366 

1367 Parameters 

1368 ---------- 

1369 element : `DimensionElement` or `str` 

1370 The dimension element to obtain r 

1371 dataId : `dict` or `DataCoordinate`, optional 

1372 A data ID whose key-value pairs are used as equality constraints 

1373 in the query. 

1374 datasets : `Any`, optional 

1375 An expression that fully or partially identifies dataset types 

1376 that should constrain the yielded records. See `queryDataIds` and 

1377 :ref:`daf_butler_dataset_type_expressions` for more information. 

1378 collections: `Any`, optional 

1379 An expression that fully or partially identifies the collections 

1380 to search for datasets. See `queryDataIds` and 

1381 :ref:`daf_butler_collection_expressions` for more information. 

1382 where : `str`, optional 

1383 A string expression similar to a SQL WHERE clause. See 

1384 `queryDataIds` and :ref:`daf_butler_dimension_expressions` for more 

1385 information. 

1386 components : `bool`, optional 

1387 Whether to apply dataset expressions to components as well. 

1388 See `queryDataIds` for more information. 

1389 **kwargs 

1390 Additional keyword arguments are forwarded to 

1391 `DataCoordinate.standardize` when processing the ``dataId`` 

1392 argument (and may be used to provide a constraining data ID even 

1393 when the ``dataId`` argument is `None`). 

1394 

1395 Returns 

1396 ------- 

1397 dataIds : `DataCoordinateQueryResults` 

1398 Data IDs matching the given query parameters. 

1399 """ 

1400 if not isinstance(element, DimensionElement): 

1401 element = self.dimensions[element] 

1402 dataIds = self.queryDataIds(element.graph, dataId=dataId, datasets=datasets, collections=collections, 

1403 where=where, components=components, **kwargs) 

1404 return iter(self._dimensions[element].fetch(dataIds)) 

1405 

1406 storageClasses: StorageClassFactory 

1407 """All storage classes known to the registry (`StorageClassFactory`). 

1408 """