Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ( 

25 "Registry", 

26) 

27 

28from collections import defaultdict 

29import contextlib 

30import logging 

31from typing import ( 

32 Any, 

33 Dict, 

34 Iterable, 

35 Iterator, 

36 List, 

37 Mapping, 

38 Optional, 

39 Set, 

40 Type, 

41 TYPE_CHECKING, 

42 Union, 

43) 

44 

45import sqlalchemy 

46 

47from ..core import ( 

48 Config, 

49 DataCoordinate, 

50 DataCoordinateIterable, 

51 DataId, 

52 DatasetRef, 

53 DatasetType, 

54 ddl, 

55 Dimension, 

56 DimensionElement, 

57 DimensionGraph, 

58 DimensionRecord, 

59 DimensionUniverse, 

60 NamedKeyMapping, 

61 NameLookupMapping, 

62 StorageClassFactory, 

63) 

64from . import queries 

65from ..core.utils import doImport, iterable, transactional 

66from ._config import RegistryConfig 

67from ._collectionType import CollectionType 

68from ._exceptions import ConflictingDefinitionError, InconsistentDataIdError, OrphanedRecordError 

69from .wildcards import CategorizedWildcard, CollectionQuery, CollectionSearch, Ellipsis 

70from .interfaces import ChainedCollectionRecord, RunRecord 

71from .versions import ButlerVersionsManager, DigestMismatchError 

72 

73if TYPE_CHECKING: 73 ↛ 74line 73 didn't jump to line 74, because the condition on line 73 was never true

74 from ..butlerConfig import ButlerConfig 

75 from .interfaces import ( 

76 ButlerAttributeManager, 

77 CollectionManager, 

78 Database, 

79 OpaqueTableStorageManager, 

80 DimensionRecordStorageManager, 

81 DatasetRecordStorageManager, 

82 DatastoreRegistryBridgeManager, 

83 ) 

84 

85 

86_LOG = logging.getLogger(__name__) 

87 

88 

89class Registry: 

90 """Registry interface. 

91 

92 Parameters 

93 ---------- 

94 database : `Database` 

95 Database instance to store Registry. 

96 universe : `DimensionUniverse` 

97 Full set of dimensions for Registry. 

98 attributes : `type` 

99 Manager class implementing `ButlerAttributeManager`. 

100 opaque : `type` 

101 Manager class implementing `OpaqueTableStorageManager`. 

102 dimensions : `type` 

103 Manager class implementing `DimensionRecordStorageManager`. 

104 collections : `type` 

105 Manager class implementing `CollectionManager`. 

106 datasets : `type` 

107 Manager class implementing `DatasetRecordStorageManager`. 

108 datastoreBridges : `type` 

109 Manager class implementing `DatastoreRegistryBridgeManager`. 

110 writeable : `bool`, optional 

111 If True then Registry will support write operations. 

112 create : `bool`, optional 

113 If True then database schema will be initialized, it must be empty 

114 before instantiating Registry. 

115 """ 

116 

117 defaultConfigFile: Optional[str] = None 

118 """Path to configuration defaults. Accessed within the ``config`` resource 

119 or relative to a search path. Can be None if no defaults specified. 

120 """ 

121 

122 @classmethod 

123 def fromConfig(cls, config: Union[ButlerConfig, RegistryConfig, Config, str], create: bool = False, 

124 butlerRoot: Optional[str] = None, writeable: bool = True) -> Registry: 

125 """Create `Registry` subclass instance from `config`. 

126 

127 Uses ``registry.cls`` from `config` to determine which subclass to 

128 instantiate. 

129 

130 Parameters 

131 ---------- 

132 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str` 

133 Registry configuration 

134 create : `bool`, optional 

135 Assume empty Registry and create a new one. 

136 butlerRoot : `str`, optional 

137 Path to the repository root this `Registry` will manage. 

138 writeable : `bool`, optional 

139 If `True` (default) create a read-write connection to the database. 

140 

141 Returns 

142 ------- 

143 registry : `Registry` (subclass) 

144 A new `Registry` subclass instance. 

145 """ 

146 if not isinstance(config, RegistryConfig): 

147 if isinstance(config, str) or isinstance(config, Config): 

148 config = RegistryConfig(config) 

149 else: 

150 raise ValueError("Incompatible Registry configuration: {}".format(config)) 

151 config.replaceRoot(butlerRoot) 

152 DatabaseClass = config.getDatabaseClass() 

153 database = DatabaseClass.fromUri(str(config.connectionString), origin=config.get("origin", 0), 

154 namespace=config.get("namespace"), writeable=writeable) 

155 universe = DimensionUniverse(config) 

156 attributes = doImport(config["managers", "attributes"]) 

157 opaque = doImport(config["managers", "opaque"]) 

158 dimensions = doImport(config["managers", "dimensions"]) 

159 collections = doImport(config["managers", "collections"]) 

160 datasets = doImport(config["managers", "datasets"]) 

161 datastoreBridges = doImport(config["managers", "datastores"]) 

162 

163 return cls(database, universe, dimensions=dimensions, attributes=attributes, opaque=opaque, 

164 collections=collections, datasets=datasets, datastoreBridges=datastoreBridges, 

165 writeable=writeable, create=create) 

166 

167 def __init__(self, database: Database, universe: DimensionUniverse, *, 

168 attributes: Type[ButlerAttributeManager], 

169 opaque: Type[OpaqueTableStorageManager], 

170 dimensions: Type[DimensionRecordStorageManager], 

171 collections: Type[CollectionManager], 

172 datasets: Type[DatasetRecordStorageManager], 

173 datastoreBridges: Type[DatastoreRegistryBridgeManager], 

174 writeable: bool = True, 

175 create: bool = False): 

176 self._db = database 

177 self.storageClasses = StorageClassFactory() 

178 with self._db.declareStaticTables(create=create) as context: 

179 self._attributes = attributes.initialize(self._db, context) 

180 self._dimensions = dimensions.initialize(self._db, context, universe=universe) 

181 self._collections = collections.initialize(self._db, context) 

182 self._datasets = datasets.initialize(self._db, context, 

183 collections=self._collections, 

184 universe=self.dimensions) 

185 self._opaque = opaque.initialize(self._db, context) 

186 self._datastoreBridges = datastoreBridges.initialize(self._db, context, 

187 opaque=self._opaque, 

188 datasets=datasets, 

189 universe=self.dimensions) 

190 versions = ButlerVersionsManager( 

191 self._attributes, 

192 dict( 

193 attributes=self._attributes, 

194 opaque=self._opaque, 

195 dimensions=self._dimensions, 

196 collections=self._collections, 

197 datasets=self._datasets, 

198 datastores=self._datastoreBridges, 

199 ) 

200 ) 

201 # store managers and their versions in attributes table 

202 context.addInitializer(lambda db: versions.storeManagersConfig()) 

203 context.addInitializer(lambda db: versions.storeManagersVersions()) 

204 

205 if not create: 

206 # verify that configured versions are compatible with schema 

207 versions.checkManagersConfig() 

208 versions.checkManagersVersions(writeable) 

209 try: 

210 versions.checkManagersDigests() 

211 except DigestMismatchError as exc: 

212 # potentially digest mismatch is a serious error but during 

213 # development it could be benign, treat this as warning for 

214 # now. 

215 _LOG.warning(f"Registry schema digest mismatch: {exc}") 

216 

217 self._collections.refresh() 

218 self._datasets.refresh(universe=self._dimensions.universe) 

219 

220 def __str__(self) -> str: 

221 return str(self._db) 

222 

223 def __repr__(self) -> str: 

224 return f"Registry({self._db!r}, {self.dimensions!r})" 

225 

226 def isWriteable(self) -> bool: 

227 """Return `True` if this registry allows write operations, and `False` 

228 otherwise. 

229 """ 

230 return self._db.isWriteable() 

231 

232 @property 

233 def dimensions(self) -> DimensionUniverse: 

234 """All dimensions recognized by this `Registry` (`DimensionUniverse`). 

235 """ 

236 return self._dimensions.universe 

237 

238 @contextlib.contextmanager 

239 def transaction(self, *, savepoint: bool = False) -> Iterator[None]: 

240 """Return a context manager that represents a transaction. 

241 """ 

242 try: 

243 with self._db.transaction(savepoint=savepoint): 

244 yield 

245 except BaseException: 

246 # TODO: this clears the caches sometimes when we wouldn't actually 

247 # need to. Can we avoid that? 

248 self._dimensions.clearCaches() 

249 raise 

250 

251 def registerOpaqueTable(self, tableName: str, spec: ddl.TableSpec) -> None: 

252 """Add an opaque (to the `Registry`) table for use by a `Datastore` or 

253 other data repository client. 

254 

255 Opaque table records can be added via `insertOpaqueData`, retrieved via 

256 `fetchOpaqueData`, and removed via `deleteOpaqueData`. 

257 

258 Parameters 

259 ---------- 

260 tableName : `str` 

261 Logical name of the opaque table. This may differ from the 

262 actual name used in the database by a prefix and/or suffix. 

263 spec : `ddl.TableSpec` 

264 Specification for the table to be added. 

265 """ 

266 self._opaque.register(tableName, spec) 

267 

268 @transactional 

269 def insertOpaqueData(self, tableName: str, *data: dict) -> None: 

270 """Insert records into an opaque table. 

271 

272 Parameters 

273 ---------- 

274 tableName : `str` 

275 Logical name of the opaque table. Must match the name used in a 

276 previous call to `registerOpaqueTable`. 

277 data 

278 Each additional positional argument is a dictionary that represents 

279 a single row to be added. 

280 """ 

281 self._opaque[tableName].insert(*data) 

282 

283 def fetchOpaqueData(self, tableName: str, **where: Any) -> Iterator[dict]: 

284 """Retrieve records from an opaque table. 

285 

286 Parameters 

287 ---------- 

288 tableName : `str` 

289 Logical name of the opaque table. Must match the name used in a 

290 previous call to `registerOpaqueTable`. 

291 where 

292 Additional keyword arguments are interpreted as equality 

293 constraints that restrict the returned rows (combined with AND); 

294 keyword arguments are column names and values are the values they 

295 must have. 

296 

297 Yields 

298 ------ 

299 row : `dict` 

300 A dictionary representing a single result row. 

301 """ 

302 yield from self._opaque[tableName].fetch(**where) 

303 

304 @transactional 

305 def deleteOpaqueData(self, tableName: str, **where: Any) -> None: 

306 """Remove records from an opaque table. 

307 

308 Parameters 

309 ---------- 

310 tableName : `str` 

311 Logical name of the opaque table. Must match the name used in a 

312 previous call to `registerOpaqueTable`. 

313 where 

314 Additional keyword arguments are interpreted as equality 

315 constraints that restrict the deleted rows (combined with AND); 

316 keyword arguments are column names and values are the values they 

317 must have. 

318 """ 

319 self._opaque[tableName].delete(**where) 

320 

321 def registerCollection(self, name: str, type: CollectionType = CollectionType.TAGGED) -> None: 

322 """Add a new collection if one with the given name does not exist. 

323 

324 Parameters 

325 ---------- 

326 name : `str` 

327 The name of the collection to create. 

328 type : `CollectionType` 

329 Enum value indicating the type of collection to create. 

330 

331 Notes 

332 ----- 

333 This method cannot be called within transactions, as it needs to be 

334 able to perform its own transaction to be concurrent. 

335 """ 

336 self._collections.register(name, type) 

337 

338 def getCollectionType(self, name: str) -> CollectionType: 

339 """Return an enumeration value indicating the type of the given 

340 collection. 

341 

342 Parameters 

343 ---------- 

344 name : `str` 

345 The name of the collection. 

346 

347 Returns 

348 ------- 

349 type : `CollectionType` 

350 Enum value indicating the type of this collection. 

351 

352 Raises 

353 ------ 

354 MissingCollectionError 

355 Raised if no collection with the given name exists. 

356 """ 

357 return self._collections.find(name).type 

358 

359 def registerRun(self, name: str) -> None: 

360 """Add a new run if one with the given name does not exist. 

361 

362 Parameters 

363 ---------- 

364 name : `str` 

365 The name of the run to create. 

366 

367 Notes 

368 ----- 

369 This method cannot be called within transactions, as it needs to be 

370 able to perform its own transaction to be concurrent. 

371 """ 

372 self._collections.register(name, CollectionType.RUN) 

373 

374 @transactional 

375 def removeCollection(self, name: str) -> None: 

376 """Completely remove the given collection. 

377 

378 Parameters 

379 ---------- 

380 name : `str` 

381 The name of the collection to remove. 

382 

383 Raises 

384 ------ 

385 MissingCollectionError 

386 Raised if no collection with the given name exists. 

387 

388 Notes 

389 ----- 

390 If this is a `~CollectionType.RUN` collection, all datasets and quanta 

391 in it are also fully removed. This requires that those datasets be 

392 removed (or at least trashed) from any datastores that hold them first. 

393 

394 A collection may not be deleted as long as it is referenced by a 

395 `~CollectionType.CHAINED` collection; the ``CHAINED`` collection must 

396 be deleted or redefined first. 

397 """ 

398 self._collections.remove(name) 

399 

400 def getCollectionChain(self, parent: str) -> CollectionSearch: 

401 """Return the child collections in a `~CollectionType.CHAINED` 

402 collection. 

403 

404 Parameters 

405 ---------- 

406 parent : `str` 

407 Name of the chained collection. Must have already been added via 

408 a call to `Registry.registerCollection`. 

409 

410 Returns 

411 ------- 

412 children : `CollectionSearch` 

413 An object that defines the search path of the collection. 

414 See :ref:`daf_butler_collection_expressions` for more information. 

415 

416 Raises 

417 ------ 

418 MissingCollectionError 

419 Raised if ``parent`` does not exist in the `Registry`. 

420 TypeError 

421 Raised if ``parent`` does not correspond to a 

422 `~CollectionType.CHAINED` collection. 

423 """ 

424 record = self._collections.find(parent) 

425 if record.type is not CollectionType.CHAINED: 

426 raise TypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

427 assert isinstance(record, ChainedCollectionRecord) 

428 return record.children 

429 

430 @transactional 

431 def setCollectionChain(self, parent: str, children: Any) -> None: 

432 """Define or redefine a `~CollectionType.CHAINED` collection. 

433 

434 Parameters 

435 ---------- 

436 parent : `str` 

437 Name of the chained collection. Must have already been added via 

438 a call to `Registry.registerCollection`. 

439 children : `Any` 

440 An expression defining an ordered search of child collections, 

441 generally an iterable of `str`. Restrictions on the dataset types 

442 to be searched can also be included, by passing mapping or an 

443 iterable containing tuples; see 

444 :ref:`daf_butler_collection_expressions` for more information. 

445 

446 Raises 

447 ------ 

448 MissingCollectionError 

449 Raised when any of the given collections do not exist in the 

450 `Registry`. 

451 TypeError 

452 Raised if ``parent`` does not correspond to a 

453 `~CollectionType.CHAINED` collection. 

454 ValueError 

455 Raised if the given collections contains a cycle. 

456 """ 

457 record = self._collections.find(parent) 

458 if record.type is not CollectionType.CHAINED: 

459 raise TypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

460 assert isinstance(record, ChainedCollectionRecord) 

461 children = CollectionSearch.fromExpression(children) 

462 if children != record.children: 

463 record.update(self._collections, children) 

464 

465 def registerDatasetType(self, datasetType: DatasetType) -> bool: 

466 """ 

467 Add a new `DatasetType` to the Registry. 

468 

469 It is not an error to register the same `DatasetType` twice. 

470 

471 Parameters 

472 ---------- 

473 datasetType : `DatasetType` 

474 The `DatasetType` to be added. 

475 

476 Returns 

477 ------- 

478 inserted : `bool` 

479 `True` if ``datasetType`` was inserted, `False` if an identical 

480 existing `DatsetType` was found. Note that in either case the 

481 DatasetType is guaranteed to be defined in the Registry 

482 consistently with the given definition. 

483 

484 Raises 

485 ------ 

486 ValueError 

487 Raised if the dimensions or storage class are invalid. 

488 ConflictingDefinitionError 

489 Raised if this DatasetType is already registered with a different 

490 definition. 

491 

492 Notes 

493 ----- 

494 This method cannot be called within transactions, as it needs to be 

495 able to perform its own transaction to be concurrent. 

496 """ 

497 _, inserted = self._datasets.register(datasetType) 

498 return inserted 

499 

500 def getDatasetType(self, name: str) -> DatasetType: 

501 """Get the `DatasetType`. 

502 

503 Parameters 

504 ---------- 

505 name : `str` 

506 Name of the type. 

507 

508 Returns 

509 ------- 

510 type : `DatasetType` 

511 The `DatasetType` associated with the given name. 

512 

513 Raises 

514 ------ 

515 KeyError 

516 Requested named DatasetType could not be found in registry. 

517 """ 

518 return self._datasets[name].datasetType 

519 

520 def findDataset(self, datasetType: Union[DatasetType, str], dataId: Optional[DataId] = None, *, 

521 collections: Any, **kwargs: Any) -> Optional[DatasetRef]: 

522 """Find a dataset given its `DatasetType` and data ID. 

523 

524 This can be used to obtain a `DatasetRef` that permits the dataset to 

525 be read from a `Datastore`. If the dataset is a component and can not 

526 be found using the provided dataset type, a dataset ref for the parent 

527 will be returned instead but with the correct dataset type. 

528 

529 Parameters 

530 ---------- 

531 datasetType : `DatasetType` or `str` 

532 A `DatasetType` or the name of one. 

533 dataId : `dict` or `DataCoordinate`, optional 

534 A `dict`-like object containing the `Dimension` links that identify 

535 the dataset within a collection. 

536 collections 

537 An expression that fully or partially identifies the collections 

538 to search for the dataset, such as a `str`, `DatasetType`, or 

539 iterable thereof. See :ref:`daf_butler_collection_expressions` 

540 for more information. 

541 **kwargs 

542 Additional keyword arguments passed to 

543 `DataCoordinate.standardize` to convert ``dataId`` to a true 

544 `DataCoordinate` or augment an existing one. 

545 

546 Returns 

547 ------- 

548 ref : `DatasetRef` 

549 A reference to the dataset, or `None` if no matching Dataset 

550 was found. 

551 

552 Raises 

553 ------ 

554 LookupError 

555 Raised if one or more data ID keys are missing. 

556 KeyError 

557 Raised if the dataset type does not exist. 

558 MissingCollectionError 

559 Raised if any of ``collections`` does not exist in the registry. 

560 """ 

561 if isinstance(datasetType, DatasetType): 

562 storage = self._datasets[datasetType.name] 

563 else: 

564 storage = self._datasets[datasetType] 

565 dataId = DataCoordinate.standardize(dataId, graph=storage.datasetType.dimensions, 

566 universe=self.dimensions, **kwargs) 

567 collections = CollectionSearch.fromExpression(collections) 

568 for collectionRecord in collections.iter(self._collections, datasetType=storage.datasetType): 

569 result = storage.find(collectionRecord, dataId) 

570 if result is not None: 

571 return result 

572 

573 return None 

574 

575 @transactional 

576 def insertDatasets(self, datasetType: Union[DatasetType, str], dataIds: Iterable[DataId], 

577 run: str) -> List[DatasetRef]: 

578 """Insert one or more datasets into the `Registry` 

579 

580 This always adds new datasets; to associate existing datasets with 

581 a new collection, use ``associate``. 

582 

583 Parameters 

584 ---------- 

585 datasetType : `DatasetType` or `str` 

586 A `DatasetType` or the name of one. 

587 dataIds : `~collections.abc.Iterable` of `dict` or `DataCoordinate` 

588 Dimension-based identifiers for the new datasets. 

589 run : `str` 

590 The name of the run that produced the datasets. 

591 

592 Returns 

593 ------- 

594 refs : `list` of `DatasetRef` 

595 Resolved `DatasetRef` instances for all given data IDs (in the same 

596 order). 

597 

598 Raises 

599 ------ 

600 ConflictingDefinitionError 

601 If a dataset with the same dataset type and data ID as one of those 

602 given already exists in ``run``. 

603 MissingCollectionError 

604 Raised if ``run`` does not exist in the registry. 

605 """ 

606 if isinstance(datasetType, DatasetType): 

607 storage = self._datasets.find(datasetType.name) 

608 if storage is None: 

609 raise LookupError(f"DatasetType '{datasetType}' has not been registered.") 

610 else: 

611 storage = self._datasets.find(datasetType) 

612 if storage is None: 

613 raise LookupError(f"DatasetType with name '{datasetType}' has not been registered.") 

614 runRecord = self._collections.find(run) 

615 if runRecord.type is not CollectionType.RUN: 

616 raise TypeError("Given collection is of type {runRecord.type.name}; RUN collection required.") 

617 assert isinstance(runRecord, RunRecord) 

618 expandedDataIds = [self.expandDataId(dataId, graph=storage.datasetType.dimensions) 

619 for dataId in dataIds] 

620 try: 

621 refs = list(storage.insert(runRecord, expandedDataIds)) 

622 except sqlalchemy.exc.IntegrityError as err: 

623 raise ConflictingDefinitionError(f"A database constraint failure was triggered by inserting " 

624 f"one or more datasets of type {storage.datasetType} into " 

625 f"collection '{run}'. " 

626 f"This probably means a dataset with the same data ID " 

627 f"and dataset type already exists, but it may also mean a " 

628 f"dimension row is missing.") from err 

629 return refs 

630 

631 def getDataset(self, id: int) -> Optional[DatasetRef]: 

632 """Retrieve a Dataset entry. 

633 

634 Parameters 

635 ---------- 

636 id : `int` 

637 The unique identifier for the dataset. 

638 

639 Returns 

640 ------- 

641 ref : `DatasetRef` or `None` 

642 A ref to the Dataset, or `None` if no matching Dataset 

643 was found. 

644 """ 

645 ref = self._datasets.getDatasetRef(id, universe=self.dimensions) 

646 if ref is None: 

647 return None 

648 return ref 

649 

650 @transactional 

651 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None: 

652 """Remove datasets from the Registry. 

653 

654 The datasets will be removed unconditionally from all collections, and 

655 any `Quantum` that consumed this dataset will instead be marked with 

656 having a NULL input. `Datastore` records will *not* be deleted; the 

657 caller is responsible for ensuring that the dataset has already been 

658 removed from all Datastores. 

659 

660 Parameters 

661 ---------- 

662 refs : `Iterable` of `DatasetRef` 

663 References to the datasets to be removed. Must include a valid 

664 ``id`` attribute, and should be considered invalidated upon return. 

665 

666 Raises 

667 ------ 

668 AmbiguousDatasetError 

669 Raised if any ``ref.id`` is `None`. 

670 OrphanedRecordError 

671 Raised if any dataset is still present in any `Datastore`. 

672 """ 

673 for datasetType, refsForType in DatasetRef.groupByType(refs).items(): 

674 storage = self._datasets.find(datasetType.name) 

675 assert storage is not None 

676 try: 

677 storage.delete(refsForType) 

678 except sqlalchemy.exc.IntegrityError as err: 

679 raise OrphanedRecordError("One or more datasets is still " 

680 "present in one or more Datastores.") from err 

681 

682 @transactional 

683 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

684 """Add existing datasets to a `~CollectionType.TAGGED` collection. 

685 

686 If a DatasetRef with the same exact integer ID is already in a 

687 collection nothing is changed. If a `DatasetRef` with the same 

688 `DatasetType` and data ID but with different integer ID 

689 exists in the collection, `ConflictingDefinitionError` is raised. 

690 

691 Parameters 

692 ---------- 

693 collection : `str` 

694 Indicates the collection the datasets should be associated with. 

695 refs : `Iterable` [ `DatasetRef` ] 

696 An iterable of resolved `DatasetRef` instances that already exist 

697 in this `Registry`. 

698 

699 Raises 

700 ------ 

701 ConflictingDefinitionError 

702 If a Dataset with the given `DatasetRef` already exists in the 

703 given collection. 

704 AmbiguousDatasetError 

705 Raised if ``any(ref.id is None for ref in refs)``. 

706 MissingCollectionError 

707 Raised if ``collection`` does not exist in the registry. 

708 TypeError 

709 Raise adding new datasets to the given ``collection`` is not 

710 allowed. 

711 """ 

712 collectionRecord = self._collections.find(collection) 

713 if collectionRecord.type is not CollectionType.TAGGED: 

714 raise TypeError(f"Collection '{collection}' has type {collectionRecord.type.name}, not TAGGED.") 

715 for datasetType, refsForType in DatasetRef.groupByType(refs).items(): 

716 storage = self._datasets.find(datasetType.name) 

717 assert storage is not None 

718 try: 

719 storage.associate(collectionRecord, refsForType) 

720 except sqlalchemy.exc.IntegrityError as err: 

721 raise ConflictingDefinitionError( 

722 f"Constraint violation while associating dataset of type {datasetType.name} with " 

723 f"collection {collection}. This probably means that one or more datasets with the same " 

724 f"dataset type and data ID already exist in the collection, but it may also indicate " 

725 f"that the datasets do not exist." 

726 ) from err 

727 

728 @transactional 

729 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

730 """Remove existing datasets from a `~CollectionType.TAGGED` collection. 

731 

732 ``collection`` and ``ref`` combinations that are not currently 

733 associated are silently ignored. 

734 

735 Parameters 

736 ---------- 

737 collection : `str` 

738 The collection the datasets should no longer be associated with. 

739 refs : `Iterable` [ `DatasetRef` ] 

740 An iterable of resolved `DatasetRef` instances that already exist 

741 in this `Registry`. 

742 

743 Raises 

744 ------ 

745 AmbiguousDatasetError 

746 Raised if any of the given dataset references is unresolved. 

747 MissingCollectionError 

748 Raised if ``collection`` does not exist in the registry. 

749 TypeError 

750 Raise adding new datasets to the given ``collection`` is not 

751 allowed. 

752 """ 

753 collectionRecord = self._collections.find(collection) 

754 if collectionRecord.type is not CollectionType.TAGGED: 

755 raise TypeError(f"Collection '{collection}' has type {collectionRecord.type.name}; " 

756 "expected TAGGED.") 

757 for datasetType, refsForType in DatasetRef.groupByType(refs).items(): 

758 storage = self._datasets.find(datasetType.name) 

759 assert storage is not None 

760 storage.disassociate(collectionRecord, refsForType) 

761 

762 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager: 

763 """Return an object that allows a new `Datastore` instance to 

764 communicate with this `Registry`. 

765 

766 Returns 

767 ------- 

768 manager : `DatastoreRegistryBridgeManager` 

769 Object that mediates communication between this `Registry` and its 

770 associated datastores. 

771 """ 

772 return self._datastoreBridges 

773 

774 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]: 

775 """Retrieve datastore locations for a given dataset. 

776 

777 Parameters 

778 ---------- 

779 ref : `DatasetRef` 

780 A reference to the dataset for which to retrieve storage 

781 information. 

782 

783 Returns 

784 ------- 

785 datastores : `Iterable` [ `str` ] 

786 All the matching datastores holding this dataset. 

787 

788 Raises 

789 ------ 

790 AmbiguousDatasetError 

791 Raised if ``ref.id`` is `None`. 

792 """ 

793 return self._datastoreBridges.findDatastores(ref) 

794 

795 def expandDataId(self, dataId: Optional[DataId] = None, *, graph: Optional[DimensionGraph] = None, 

796 records: Optional[NameLookupMapping[DimensionElement, Optional[DimensionRecord]]] = None, 

797 **kwargs: Any) -> DataCoordinate: 

798 """Expand a dimension-based data ID to include additional information. 

799 

800 Parameters 

801 ---------- 

802 dataId : `DataCoordinate` or `dict`, optional 

803 Data ID to be expanded; augmented and overridden by ``kwds``. 

804 graph : `DimensionGraph`, optional 

805 Set of dimensions for the expanded ID. If `None`, the dimensions 

806 will be inferred from the keys of ``dataId`` and ``kwds``. 

807 Dimensions that are in ``dataId`` or ``kwds`` but not in ``graph`` 

808 are silently ignored, providing a way to extract and expand a 

809 subset of a data ID. 

810 records : `Mapping` [`str`, `DimensionRecord`], optional 

811 Dimension record data to use before querying the database for that 

812 data, keyed by element name. 

813 **kwargs 

814 Additional keywords are treated like additional key-value pairs for 

815 ``dataId``, extending and overriding 

816 

817 Returns 

818 ------- 

819 expanded : `DataCoordinate` 

820 A data ID that includes full metadata for all of the dimensions it 

821 identifieds, i.e. guarantees that ``expanded.hasRecords()`` and 

822 ``expanded.hasFull()`` both return `True`. 

823 """ 

824 standardized = DataCoordinate.standardize(dataId, graph=graph, universe=self.dimensions, **kwargs) 

825 if standardized.hasRecords(): 

826 return standardized 

827 if records is None: 

828 records = {} 

829 elif isinstance(records, NamedKeyMapping): 

830 records = records.byName() 

831 else: 

832 records = dict(records) 

833 if isinstance(dataId, DataCoordinate) and dataId.hasRecords(): 

834 records.update(dataId.records.byName()) 

835 keys = standardized.byName() 

836 for element in standardized.graph.primaryKeyTraversalOrder: 

837 record = records.get(element.name, ...) # Use ... to mean not found; None might mean NULL 

838 if record is ...: 

839 if isinstance(element, Dimension) and keys.get(element.name) is None: 

840 if element in standardized.graph.required: 

841 raise LookupError( 

842 f"No value or null value for required dimension {element.name}." 

843 ) 

844 keys[element.name] = None 

845 record = None 

846 else: 

847 storage = self._dimensions[element] 

848 dataIdSet = DataCoordinateIterable.fromScalar( 

849 DataCoordinate.standardize(keys, graph=element.graph) 

850 ) 

851 fetched = tuple(storage.fetch(dataIdSet)) 

852 try: 

853 (record,) = fetched 

854 except ValueError: 

855 record = None 

856 records[element.name] = record 

857 if record is not None: 

858 for d in element.implied: 

859 value = getattr(record, d.name) 

860 if keys.setdefault(d.name, value) != value: 

861 raise InconsistentDataIdError( 

862 f"Data ID {standardized} has {d.name}={keys[d.name]!r}, " 

863 f"but {element.name} implies {d.name}={value!r}." 

864 ) 

865 else: 

866 if element in standardized.graph.required: 

867 raise LookupError( 

868 f"Could not fetch record for required dimension {element.name} via keys {keys}." 

869 ) 

870 if element.alwaysJoin: 

871 raise InconsistentDataIdError( 

872 f"Could not fetch record for element {element.name} via keys {keys}, ", 

873 "but it is marked alwaysJoin=True; this means one or more dimensions are not " 

874 "related." 

875 ) 

876 for d in element.implied: 

877 keys.setdefault(d.name, None) 

878 records.setdefault(d.name, None) 

879 return DataCoordinate.standardize(keys, graph=standardized.graph).expanded(records=records) 

880 

881 def insertDimensionData(self, element: Union[DimensionElement, str], 

882 *data: Union[Mapping[str, Any], DimensionRecord], 

883 conform: bool = True) -> None: 

884 """Insert one or more dimension records into the database. 

885 

886 Parameters 

887 ---------- 

888 element : `DimensionElement` or `str` 

889 The `DimensionElement` or name thereof that identifies the table 

890 records will be inserted into. 

891 data : `dict` or `DimensionRecord` (variadic) 

892 One or more records to insert. 

893 conform : `bool`, optional 

894 If `False` (`True` is default) perform no checking or conversions, 

895 and assume that ``element`` is a `DimensionElement` instance and 

896 ``data`` is a one or more `DimensionRecord` instances of the 

897 appropriate subclass. 

898 """ 

899 if conform: 

900 if isinstance(element, str): 

901 element = self.dimensions[element] 

902 records = [row if isinstance(row, DimensionRecord) else element.RecordClass(**row) 

903 for row in data] 

904 else: 

905 # Ignore typing since caller said to trust them with conform=False. 

906 records = data # type: ignore 

907 storage = self._dimensions[element] # type: ignore 

908 storage.insert(*records) 

909 

910 def syncDimensionData(self, element: Union[DimensionElement, str], 

911 row: Union[Mapping[str, Any], DimensionRecord], 

912 conform: bool = True) -> bool: 

913 """Synchronize the given dimension record with the database, inserting 

914 if it does not already exist and comparing values if it does. 

915 

916 Parameters 

917 ---------- 

918 element : `DimensionElement` or `str` 

919 The `DimensionElement` or name thereof that identifies the table 

920 records will be inserted into. 

921 row : `dict` or `DimensionRecord` 

922 The record to insert. 

923 conform : `bool`, optional 

924 If `False` (`True` is default) perform no checking or conversions, 

925 and assume that ``element`` is a `DimensionElement` instance and 

926 ``data`` is a one or more `DimensionRecord` instances of the 

927 appropriate subclass. 

928 

929 Returns 

930 ------- 

931 inserted : `bool` 

932 `True` if a new row was inserted, `False` otherwise. 

933 

934 Raises 

935 ------ 

936 ConflictingDefinitionError 

937 Raised if the record exists in the database (according to primary 

938 key lookup) but is inconsistent with the given one. 

939 

940 Notes 

941 ----- 

942 This method cannot be called within transactions, as it needs to be 

943 able to perform its own transaction to be concurrent. 

944 """ 

945 if conform: 

946 if isinstance(element, str): 

947 element = self.dimensions[element] 

948 record = row if isinstance(row, DimensionRecord) else element.RecordClass(**row) 

949 else: 

950 # Ignore typing since caller said to trust them with conform=False. 

951 record = row # type: ignore 

952 storage = self._dimensions[element] # type: ignore 

953 return storage.sync(record) 

954 

955 def queryDatasetTypes(self, expression: Any = ..., *, components: Optional[bool] = None 

956 ) -> Iterator[DatasetType]: 

957 """Iterate over the dataset types whose names match an expression. 

958 

959 Parameters 

960 ---------- 

961 expression : `Any`, optional 

962 An expression that fully or partially identifies the dataset types 

963 to return, such as a `str`, `re.Pattern`, or iterable thereof. 

964 `...` can be used to return all dataset types, and is the default. 

965 See :ref:`daf_butler_dataset_type_expressions` for more 

966 information. 

967 components : `bool`, optional 

968 If `True`, apply all expression patterns to component dataset type 

969 names as well. If `False`, never apply patterns to components. 

970 If `None` (default), apply patterns to components only if their 

971 parent datasets were not matched by the expression. 

972 Fully-specified component datasets (`str` or `DatasetType` 

973 instances) are always included. 

974 

975 Yields 

976 ------ 

977 datasetType : `DatasetType` 

978 A `DatasetType` instance whose name matches ``expression``. 

979 """ 

980 wildcard = CategorizedWildcard.fromExpression(expression, coerceUnrecognized=lambda d: d.name) 

981 if wildcard is Ellipsis: 

982 for datasetType in self._datasets: 

983 # The dataset type can no longer be a component 

984 yield datasetType 

985 if components and datasetType.isComposite(): 

986 # Automatically create the component dataset types 

987 for component in datasetType.makeAllComponentDatasetTypes(): 

988 yield component 

989 return 

990 done: Set[str] = set() 

991 for name in wildcard.strings: 

992 storage = self._datasets.find(name) 

993 if storage is not None: 

994 done.add(storage.datasetType.name) 

995 yield storage.datasetType 

996 if wildcard.patterns: 

997 # If components (the argument) is None, we'll save component 

998 # dataset that we might want to match, but only if their parents 

999 # didn't get included. 

1000 componentsForLater = [] 

1001 for registeredDatasetType in self._datasets: 

1002 # Components are not stored in registry so expand them here 

1003 allDatasetTypes = [registeredDatasetType] \ 

1004 + registeredDatasetType.makeAllComponentDatasetTypes() 

1005 for datasetType in allDatasetTypes: 

1006 if datasetType.name in done: 

1007 continue 

1008 parentName, componentName = datasetType.nameAndComponent() 

1009 if componentName is not None and not components: 

1010 if components is None and parentName not in done: 

1011 componentsForLater.append(datasetType) 

1012 continue 

1013 if any(p.fullmatch(datasetType.name) for p in wildcard.patterns): 

1014 done.add(datasetType.name) 

1015 yield datasetType 

1016 # Go back and try to match saved components. 

1017 for datasetType in componentsForLater: 

1018 parentName, _ = datasetType.nameAndComponent() 

1019 if parentName not in done and any(p.fullmatch(datasetType.name) for p in wildcard.patterns): 

1020 yield datasetType 

1021 

1022 def queryCollections(self, expression: Any = ..., 

1023 datasetType: Optional[DatasetType] = None, 

1024 collectionType: Optional[CollectionType] = None, 

1025 flattenChains: bool = False, 

1026 includeChains: Optional[bool] = None) -> Iterator[str]: 

1027 """Iterate over the collections whose names match an expression. 

1028 

1029 Parameters 

1030 ---------- 

1031 expression : `Any`, optional 

1032 An expression that fully or partially identifies the collections 

1033 to return, such as a `str`, `re.Pattern`, or iterable thereof. 

1034 `...` can be used to return all collections, and is the default. 

1035 See :ref:`daf_butler_collection_expressions` for more 

1036 information. 

1037 datasetType : `DatasetType`, optional 

1038 If provided, only yield collections that should be searched for 

1039 this dataset type according to ``expression``. If this is 

1040 not provided, any dataset type restrictions in ``expression`` are 

1041 ignored. 

1042 collectionType : `CollectionType`, optional 

1043 If provided, only yield collections of this type. 

1044 flattenChains : `bool`, optional 

1045 If `True` (`False` is default), recursively yield the child 

1046 collections of matching `~CollectionType.CHAINED` collections. 

1047 includeChains : `bool`, optional 

1048 If `True`, yield records for matching `~CollectionType.CHAINED` 

1049 collections. Default is the opposite of ``flattenChains``: include 

1050 either CHAINED collections or their children, but not both. 

1051 

1052 Yields 

1053 ------ 

1054 collection : `str` 

1055 The name of a collection that matches ``expression``. 

1056 """ 

1057 query = CollectionQuery.fromExpression(expression) 

1058 for record in query.iter(self._collections, datasetType=datasetType, collectionType=collectionType, 

1059 flattenChains=flattenChains, includeChains=includeChains): 

1060 yield record.name 

1061 

1062 def makeQueryBuilder(self, summary: queries.QuerySummary) -> queries.QueryBuilder: 

1063 """Return a `QueryBuilder` instance capable of constructing and 

1064 managing more complex queries than those obtainable via `Registry` 

1065 interfaces. 

1066 

1067 This is an advanced interface; downstream code should prefer 

1068 `Registry.queryDataIds` and `Registry.queryDatasets` whenever those 

1069 are sufficient. 

1070 

1071 Parameters 

1072 ---------- 

1073 summary : `queries.QuerySummary` 

1074 Object describing and categorizing the full set of dimensions that 

1075 will be included in the query. 

1076 

1077 Returns 

1078 ------- 

1079 builder : `queries.QueryBuilder` 

1080 Object that can be used to construct and perform advanced queries. 

1081 """ 

1082 return queries.QueryBuilder( 

1083 summary, 

1084 queries.RegistryManagers( 

1085 collections=self._collections, 

1086 dimensions=self._dimensions, 

1087 datasets=self._datasets 

1088 ) 

1089 ) 

1090 

1091 def queryDatasets(self, datasetType: Any, *, 

1092 collections: Any, 

1093 dimensions: Optional[Iterable[Union[Dimension, str]]] = None, 

1094 dataId: Optional[DataId] = None, 

1095 where: Optional[str] = None, 

1096 deduplicate: bool = False, 

1097 components: Optional[bool] = None, 

1098 **kwargs: Any) -> queries.DatasetQueryResults: 

1099 """Query for and iterate over dataset references matching user-provided 

1100 criteria. 

1101 

1102 Parameters 

1103 ---------- 

1104 datasetType 

1105 An expression that fully or partially identifies the dataset types 

1106 to be queried. Allowed types include `DatasetType`, `str`, 

1107 `re.Pattern`, and iterables thereof. The special value `...` can 

1108 be used to query all dataset types. See 

1109 :ref:`daf_butler_dataset_type_expressions` for more information. 

1110 collections 

1111 An expression that fully or partially identifies the collections 

1112 to search for datasets, such as a `str`, `re.Pattern`, or iterable 

1113 thereof. `...` can be used to return all collections. See 

1114 :ref:`daf_butler_collection_expressions` for more information. 

1115 dimensions : `~collections.abc.Iterable` of `Dimension` or `str` 

1116 Dimensions to include in the query (in addition to those used 

1117 to identify the queried dataset type(s)), either to constrain 

1118 the resulting datasets to those for which a matching dimension 

1119 exists, or to relate the dataset type's dimensions to dimensions 

1120 referenced by the ``dataId`` or ``where`` arguments. 

1121 dataId : `dict` or `DataCoordinate`, optional 

1122 A data ID whose key-value pairs are used as equality constraints 

1123 in the query. 

1124 where : `str`, optional 

1125 A string expression similar to a SQL WHERE clause. May involve 

1126 any column of a dimension table or (as a shortcut for the primary 

1127 key column of a dimension table) dimension name. See 

1128 :ref:`daf_butler_dimension_expressions` for more information. 

1129 deduplicate : `bool`, optional 

1130 If `True` (`False` is default), for each result data ID, only 

1131 yield one `DatasetRef` of each `DatasetType`, from the first 

1132 collection in which a dataset of that dataset type appears 

1133 (according to the order of ``collections`` passed in). If `True`, 

1134 ``collections`` must not contain regular expressions and may not 

1135 be `...`. 

1136 components : `bool`, optional 

1137 If `True`, apply all dataset expression patterns to component 

1138 dataset type names as well. If `False`, never apply patterns to 

1139 components. If `None` (default), apply patterns to components only 

1140 if their parent datasets were not matched by the expression. 

1141 Fully-specified component datasets (`str` or `DatasetType` 

1142 instances) are always included. 

1143 **kwargs 

1144 Additional keyword arguments are forwarded to 

1145 `DataCoordinate.standardize` when processing the ``dataId`` 

1146 argument (and may be used to provide a constraining data ID even 

1147 when the ``dataId`` argument is `None`). 

1148 

1149 Returns 

1150 ------- 

1151 refs : `queries.DatasetQueryResults` 

1152 Dataset references matching the given query criteria. 

1153 

1154 Raises 

1155 ------ 

1156 TypeError 

1157 Raised when the arguments are incompatible, such as when a 

1158 collection wildcard is passed when ``deduplicate`` is `True`. 

1159 

1160 Notes 

1161 ----- 

1162 When multiple dataset types are queried in a single call, the 

1163 results of this operation are equivalent to querying for each dataset 

1164 type separately in turn, and no information about the relationships 

1165 between datasets of different types is included. In contexts where 

1166 that kind of information is important, the recommended pattern is to 

1167 use `queryDataIds` to first obtain data IDs (possibly with the 

1168 desired dataset types and collections passed as constraints to the 

1169 query), and then use multiple (generally much simpler) calls to 

1170 `queryDatasets` with the returned data IDs passed as constraints. 

1171 """ 

1172 # Standardize the collections expression. 

1173 if deduplicate: 

1174 collections = CollectionSearch.fromExpression(collections) 

1175 else: 

1176 collections = CollectionQuery.fromExpression(collections) 

1177 # Standardize and expand the data ID provided as a constraint. 

1178 standardizedDataId = self.expandDataId(dataId, **kwargs) 

1179 

1180 # We can only query directly if given a non-component DatasetType 

1181 # instance. If we were given an expression or str or a component 

1182 # DatasetType instance, we'll populate this dict, recurse, and return. 

1183 # If we already have a non-component DatasetType, it will remain None 

1184 # and we'll run the query directly. 

1185 composition: Optional[ 

1186 Dict[ 

1187 DatasetType, # parent dataset type 

1188 List[Optional[str]] # component name, or None for parent 

1189 ] 

1190 ] = None 

1191 if not isinstance(datasetType, DatasetType): 

1192 # We were given a dataset type expression (which may be as simple 

1193 # as a str). Loop over all matching datasets, delegating handling 

1194 # of the `components` argument to queryDatasetTypes, as we populate 

1195 # the composition dict. 

1196 composition = defaultdict(list) 

1197 for trueDatasetType in self.queryDatasetTypes(datasetType, components=components): 

1198 parentName, componentName = trueDatasetType.nameAndComponent() 

1199 if componentName is not None: 

1200 parentDatasetType = self.getDatasetType(parentName) 

1201 composition.setdefault(parentDatasetType, []).append(componentName) 

1202 else: 

1203 composition.setdefault(trueDatasetType, []).append(None) 

1204 elif datasetType.isComponent(): 

1205 # We were given a true DatasetType instance, but it's a component. 

1206 # the composition dict will have exactly one item. 

1207 parentName, componentName = datasetType.nameAndComponent() 

1208 parentDatasetType = self.getDatasetType(parentName) 

1209 composition = {parentDatasetType: [componentName]} 

1210 if composition is not None: 

1211 # We need to recurse. Do that once for each parent dataset type. 

1212 chain = [] 

1213 for parentDatasetType, componentNames in composition.items(): 

1214 parentResults = self.queryDatasets( 

1215 parentDatasetType, 

1216 collections=collections, 

1217 dimensions=dimensions, 

1218 dataId=standardizedDataId, 

1219 where=where, 

1220 deduplicate=deduplicate 

1221 ) 

1222 if isinstance(parentResults, queries.ParentDatasetQueryResults): 

1223 chain.append( 

1224 parentResults.withComponents(componentNames) 

1225 ) 

1226 else: 

1227 # Should only happen if we know there would be no results. 

1228 assert isinstance(parentResults, queries.ChainedDatasetQueryResults) \ 

1229 and not parentResults._chain 

1230 return queries.ChainedDatasetQueryResults(chain) 

1231 # If we get here, there's no need to recurse (or we are already 

1232 # recursing; there can only ever be one level of recursion). 

1233 

1234 # The full set of dimensions in the query is the combination of those 

1235 # needed for the DatasetType and those explicitly requested, if any. 

1236 requestedDimensionNames = set(datasetType.dimensions.names) 

1237 if dimensions is not None: 

1238 requestedDimensionNames.update(self.dimensions.extract(dimensions).names) 

1239 # Construct the summary structure needed to construct a QueryBuilder. 

1240 summary = queries.QuerySummary( 

1241 requested=DimensionGraph(self.dimensions, names=requestedDimensionNames), 

1242 dataId=standardizedDataId, 

1243 expression=where, 

1244 ) 

1245 builder = self.makeQueryBuilder(summary) 

1246 # Add the dataset subquery to the query, telling the QueryBuilder to 

1247 # include the rank of the selected collection in the results only if we 

1248 # need to deduplicate. Note that if any of the collections are 

1249 # actually wildcard expressions, and we've asked for deduplication, 

1250 # this will raise TypeError for us. 

1251 if not builder.joinDataset(datasetType, collections, isResult=True, deduplicate=deduplicate): 

1252 return queries.ChainedDatasetQueryResults(()) 

1253 query = builder.finish() 

1254 return queries.ParentDatasetQueryResults(self._db, query, components=[None]) 

1255 

1256 def queryDataIds(self, dimensions: Union[Iterable[Union[Dimension, str]], Dimension, str], *, 

1257 dataId: Optional[DataId] = None, 

1258 datasets: Any = None, 

1259 collections: Any = None, 

1260 where: Optional[str] = None, 

1261 components: Optional[bool] = None, 

1262 **kwargs: Any) -> queries.DataCoordinateQueryResults: 

1263 """Query for data IDs matching user-provided criteria. 

1264 

1265 Parameters 

1266 ---------- 

1267 dimensions : `Dimension` or `str`, or iterable thereof 

1268 The dimensions of the data IDs to yield, as either `Dimension` 

1269 instances or `str`. Will be automatically expanded to a complete 

1270 `DimensionGraph`. 

1271 dataId : `dict` or `DataCoordinate`, optional 

1272 A data ID whose key-value pairs are used as equality constraints 

1273 in the query. 

1274 datasets : `Any`, optional 

1275 An expression that fully or partially identifies dataset types 

1276 that should constrain the yielded data IDs. For example, including 

1277 "raw" here would constrain the yielded ``instrument``, 

1278 ``exposure``, ``detector``, and ``physical_filter`` values to only 

1279 those for which at least one "raw" dataset exists in 

1280 ``collections``. Allowed types include `DatasetType`, `str`, 

1281 `re.Pattern`, and iterables thereof. Unlike other dataset type 

1282 expressions, ``...`` is not permitted - it doesn't make sense to 

1283 constrain data IDs on the existence of *all* datasets. 

1284 See :ref:`daf_butler_dataset_type_expressions` for more 

1285 information. 

1286 collections: `Any`, optional 

1287 An expression that fully or partially identifies the collections 

1288 to search for datasets, such as a `str`, `re.Pattern`, or iterable 

1289 thereof. `...` can be used to return all collections. Must be 

1290 provided if ``datasets`` is, and is ignored if it is not. See 

1291 :ref:`daf_butler_collection_expressions` for more information. 

1292 where : `str`, optional 

1293 A string expression similar to a SQL WHERE clause. May involve 

1294 any column of a dimension table or (as a shortcut for the primary 

1295 key column of a dimension table) dimension name. See 

1296 :ref:`daf_butler_dimension_expressions` for more information. 

1297 components : `bool`, optional 

1298 If `True`, apply all dataset expression patterns to component 

1299 dataset type names as well. If `False`, never apply patterns to 

1300 components. If `None` (default), apply patterns to components only 

1301 if their parent datasets were not matched by the expression. 

1302 Fully-specified component datasets (`str` or `DatasetType` 

1303 instances) are always included. 

1304 **kwargs 

1305 Additional keyword arguments are forwarded to 

1306 `DataCoordinate.standardize` when processing the ``dataId`` 

1307 argument (and may be used to provide a constraining data ID even 

1308 when the ``dataId`` argument is `None`). 

1309 

1310 Returns 

1311 ------- 

1312 dataIds : `DataCoordinateQueryResults` 

1313 Data IDs matching the given query parameters. These are guaranteed 

1314 to identify all dimensions (`DataCoordinate.hasFull` returns 

1315 `True`), but will not contain `DimensionRecord` objects 

1316 (`DataCoordinate.hasRecords` returns `False`). Call 

1317 `DataCoordinateQueryResults.expanded` on the returned object to 

1318 fetch those (and consider using 

1319 `DataCoordinateQueryResults.materialize` on the returned object 

1320 first if the expected number of rows is very large). See 

1321 documentation for those methods for additional information. 

1322 """ 

1323 dimensions = iterable(dimensions) 

1324 standardizedDataId = self.expandDataId(dataId, **kwargs) 

1325 standardizedDatasetTypes = set() 

1326 requestedDimensions = self.dimensions.extract(dimensions) 

1327 queryDimensionNames = set(requestedDimensions.names) 

1328 if datasets is not None: 

1329 if collections is None: 

1330 raise TypeError("Cannot pass 'datasets' without 'collections'.") 

1331 for datasetType in self.queryDatasetTypes(datasets, components=components): 

1332 queryDimensionNames.update(datasetType.dimensions.names) 

1333 # If any matched dataset type is a component, just operate on 

1334 # its parent instead, because Registry doesn't know anything 

1335 # about what components exist, and here (unlike queryDatasets) 

1336 # we don't care about returning them. 

1337 parentDatasetTypeName, componentName = datasetType.nameAndComponent() 

1338 if componentName is not None: 

1339 datasetType = self.getDatasetType(parentDatasetTypeName) 

1340 standardizedDatasetTypes.add(datasetType) 

1341 # Preprocess collections expression in case the original included 

1342 # single-pass iterators (we'll want to use it multiple times 

1343 # below). 

1344 collections = CollectionQuery.fromExpression(collections) 

1345 

1346 summary = queries.QuerySummary( 

1347 requested=DimensionGraph(self.dimensions, names=queryDimensionNames), 

1348 dataId=standardizedDataId, 

1349 expression=where, 

1350 ) 

1351 builder = self.makeQueryBuilder(summary) 

1352 for datasetType in standardizedDatasetTypes: 

1353 builder.joinDataset(datasetType, collections, isResult=False) 

1354 query = builder.finish() 

1355 return queries.DataCoordinateQueryResults(self._db, query) 

1356 

1357 def queryDimensionRecords(self, element: Union[DimensionElement, str], *, 

1358 dataId: Optional[DataId] = None, 

1359 datasets: Any = None, 

1360 collections: Any = None, 

1361 where: Optional[str] = None, 

1362 components: Optional[bool] = None, 

1363 **kwargs: Any) -> Iterator[DimensionRecord]: 

1364 """Query for dimension information matching user-provided criteria. 

1365 

1366 Parameters 

1367 ---------- 

1368 element : `DimensionElement` or `str` 

1369 The dimension element to obtain r 

1370 dataId : `dict` or `DataCoordinate`, optional 

1371 A data ID whose key-value pairs are used as equality constraints 

1372 in the query. 

1373 datasets : `Any`, optional 

1374 An expression that fully or partially identifies dataset types 

1375 that should constrain the yielded records. See `queryDataIds` and 

1376 :ref:`daf_butler_dataset_type_expressions` for more information. 

1377 collections: `Any`, optional 

1378 An expression that fully or partially identifies the collections 

1379 to search for datasets. See `queryDataIds` and 

1380 :ref:`daf_butler_collection_expressions` for more information. 

1381 where : `str`, optional 

1382 A string expression similar to a SQL WHERE clause. See 

1383 `queryDataIds` and :ref:`daf_butler_dimension_expressions` for more 

1384 information. 

1385 components : `bool`, optional 

1386 Whether to apply dataset expressions to components as well. 

1387 See `queryDataIds` for more information. 

1388 **kwargs 

1389 Additional keyword arguments are forwarded to 

1390 `DataCoordinate.standardize` when processing the ``dataId`` 

1391 argument (and may be used to provide a constraining data ID even 

1392 when the ``dataId`` argument is `None`). 

1393 

1394 Returns 

1395 ------- 

1396 dataIds : `DataCoordinateQueryResults` 

1397 Data IDs matching the given query parameters. 

1398 """ 

1399 if not isinstance(element, DimensionElement): 

1400 element = self.dimensions[element] 

1401 dataIds = self.queryDataIds(element.graph, dataId=dataId, datasets=datasets, collections=collections, 

1402 where=where, components=components, **kwargs) 

1403 return iter(self._dimensions[element].fetch(dataIds)) 

1404 

1405 storageClasses: StorageClassFactory 

1406 """All storage classes known to the registry (`StorageClassFactory`). 

1407 """