Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ( 

25 "ConsistentDataIds", 

26 "Registry", 

27) 

28 

29from collections import defaultdict 

30import contextlib 

31from dataclasses import dataclass 

32import sys 

33from typing import ( 

34 Any, 

35 Iterable, 

36 Iterator, 

37 List, 

38 Mapping, 

39 Optional, 

40 Set, 

41 Type, 

42 TYPE_CHECKING, 

43 Union, 

44) 

45 

46import sqlalchemy 

47 

48import lsst.sphgeom 

49from ..core import ( 

50 Config, 

51 DataCoordinate, 

52 DataId, 

53 DatasetRef, 

54 DatasetType, 

55 Dimension, 

56 DimensionElement, 

57 DimensionGraph, 

58 DimensionRecord, 

59 DimensionUniverse, 

60 ExpandedDataCoordinate, 

61 FakeDatasetRef, 

62 StorageClassFactory, 

63) 

64from ..core import ddl 

65from ..core.utils import doImport, iterable, transactional 

66from ._config import RegistryConfig 

67from .queries import ( 

68 QueryBuilder, 

69 QuerySummary, 

70) 

71from .tables import makeRegistryTableSpecs 

72from ._collectionType import CollectionType 

73from ._exceptions import ConflictingDefinitionError, InconsistentDataIdError, OrphanedRecordError 

74from .wildcards import CategorizedWildcard, CollectionQuery, CollectionSearch 

75 

76if TYPE_CHECKING: 76 ↛ 77line 76 didn't jump to line 77, because the condition on line 76 was never true

77 from ..butlerConfig import ButlerConfig 

78 from ..core import ( 

79 Quantum 

80 ) 

81 from .interfaces import ( 

82 CollectionManager, 

83 Database, 

84 OpaqueTableStorageManager, 

85 DimensionRecordStorageManager, 

86 DatasetRecordStorageManager, 

87 ) 

88 

89 

90@dataclass 

91class ConsistentDataIds: 

92 """A struct used to report relationships between data IDs by 

93 `Registry.relateDataIds`. 

94 

95 If an instance of this class is returned (instead of `None`), the data IDs 

96 are "not inconsistent" - any keys they have in common have the same value, 

97 and any spatial or temporal relationships they have at least might involve 

98 an overlap. To capture this, any instance of `ConsistentDataIds` coerces 

99 to `True` in boolean contexts. 

100 """ 

101 

102 overlaps: bool 

103 """If `True`, the data IDs have at least one key in common, associated with 

104 the same value. 

105 

106 Note that data IDs are not inconsistent even if overlaps is `False` - they 

107 may simply have no keys in common, which means they cannot have 

108 inconsistent values for any keys. They may even be equal, in the case that 

109 both data IDs are empty. 

110 

111 This field does _not_ indicate whether a spatial or temporal overlap 

112 relationship exists. 

113 """ 

114 

115 contains: bool 

116 """If `True`, all keys in the first data ID are in the second, and are 

117 associated with the same values. 

118 

119 This includes case where the first data ID is empty. 

120 """ 

121 

122 within: bool 

123 """If `True`, all keys in the second data ID are in the first, and are 

124 associated with the same values. 

125 

126 This includes case where the second data ID is empty. 

127 """ 

128 

129 @property 

130 def equal(self) -> bool: 

131 """If `True`, the two data IDs are the same. 

132 

133 Data IDs are equal if they have both a `contains` and a `within` 

134 relationship. 

135 """ 

136 return self.contains and self.within 

137 

138 @property 

139 def disjoint(self) -> bool: 

140 """If `True`, the two data IDs have no keys in common. 

141 

142 This is simply the oppose of `overlaps`. Disjoint datasets are by 

143 definition not inconsistent. 

144 """ 

145 return not self.overlaps 

146 

147 def __bool__(self) -> bool: 

148 return True 

149 

150 

151class Registry: 

152 """Registry interface. 

153 

154 Parameters 

155 ---------- 

156 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str` 

157 Registry configuration 

158 """ 

159 

160 defaultConfigFile = None 

161 """Path to configuration defaults. Relative to $DAF_BUTLER_DIR/config or 

162 absolute path. Can be None if no defaults specified. 

163 """ 

164 

165 @classmethod 

166 def fromConfig(cls, config: Union[ButlerConfig, RegistryConfig, Config, str], create: bool = False, 

167 butlerRoot: Optional[str] = None, writeable: bool = True) -> Registry: 

168 """Create `Registry` subclass instance from `config`. 

169 

170 Uses ``registry.cls`` from `config` to determine which subclass to 

171 instantiate. 

172 

173 Parameters 

174 ---------- 

175 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str` 

176 Registry configuration 

177 create : `bool`, optional 

178 Assume empty Registry and create a new one. 

179 butlerRoot : `str`, optional 

180 Path to the repository root this `Registry` will manage. 

181 writeable : `bool`, optional 

182 If `True` (default) create a read-write connection to the database. 

183 

184 Returns 

185 ------- 

186 registry : `Registry` (subclass) 

187 A new `Registry` subclass instance. 

188 """ 

189 if not isinstance(config, RegistryConfig): 

190 if isinstance(config, str) or isinstance(config, Config): 

191 config = RegistryConfig(config) 

192 else: 

193 raise ValueError("Incompatible Registry configuration: {}".format(config)) 

194 config.replaceRoot(butlerRoot) 

195 DatabaseClass = config.getDatabaseClass() 

196 database = DatabaseClass.fromUri(str(config.connectionString), origin=config.get("origin", 0), 

197 namespace=config.get("namespace"), writeable=writeable) 

198 universe = DimensionUniverse(config) 

199 opaque = doImport(config["managers", "opaque"]) 

200 dimensions = doImport(config["managers", "dimensions"]) 

201 collections = doImport(config["managers", "collections"]) 

202 datasets = doImport(config["managers", "datasets"]) 

203 return cls(database, universe, dimensions=dimensions, opaque=opaque, collections=collections, 

204 datasets=datasets, create=create) 

205 

206 def __init__(self, database: Database, universe: DimensionUniverse, *, 

207 opaque: Type[OpaqueTableStorageManager], 

208 dimensions: Type[DimensionRecordStorageManager], 

209 collections: Type[CollectionManager], 

210 datasets: Type[DatasetRecordStorageManager], 

211 create: bool = False): 

212 self._db = database 

213 self.storageClasses = StorageClassFactory() 

214 with self._db.declareStaticTables(create=create) as context: 

215 self._dimensions = dimensions.initialize(self._db, context, universe=universe) 

216 self._collections = collections.initialize(self._db, context) 

217 self._datasets = datasets.initialize(self._db, context, 

218 collections=self._collections, 

219 universe=self.dimensions) 

220 self._tables = context.addTableTuple(makeRegistryTableSpecs(self.dimensions, 

221 self._collections, 

222 self._datasets)) 

223 self._opaque = opaque.initialize(self._db, context) 

224 self._collections.refresh() 

225 self._datasets.refresh(universe=self._dimensions.universe) 

226 

227 def __str__(self) -> str: 

228 return str(self._db) 

229 

230 def __repr__(self) -> str: 

231 return f"Registry({self._db!r}, {self.dimensions!r})" 

232 

233 def isWriteable(self) -> bool: 

234 """Return `True` if this registry allows write operations, and `False` 

235 otherwise. 

236 """ 

237 return self._db.isWriteable() 

238 

239 @property 

240 def dimensions(self) -> DimensionUniverse: 

241 """All dimensions recognized by this `Registry` (`DimensionUniverse`). 

242 """ 

243 return self._dimensions.universe 

244 

245 @contextlib.contextmanager 

246 def transaction(self): 

247 """Return a context manager that represents a transaction. 

248 """ 

249 # TODO make savepoint=False the default. 

250 try: 

251 with self._db.transaction(): 

252 yield 

253 except BaseException: 

254 # TODO: this clears the caches sometimes when we wouldn't actually 

255 # need to. Can we avoid that? 

256 self._dimensions.clearCaches() 

257 raise 

258 

259 def registerOpaqueTable(self, tableName: str, spec: ddl.TableSpec): 

260 """Add an opaque (to the `Registry`) table for use by a `Datastore` or 

261 other data repository client. 

262 

263 Opaque table records can be added via `insertOpaqueData`, retrieved via 

264 `fetchOpaqueData`, and removed via `deleteOpaqueData`. 

265 

266 Parameters 

267 ---------- 

268 tableName : `str` 

269 Logical name of the opaque table. This may differ from the 

270 actual name used in the database by a prefix and/or suffix. 

271 spec : `ddl.TableSpec` 

272 Specification for the table to be added. 

273 """ 

274 self._opaque.register(tableName, spec) 

275 

276 @transactional 

277 def insertOpaqueData(self, tableName: str, *data: dict): 

278 """Insert records into an opaque table. 

279 

280 Parameters 

281 ---------- 

282 tableName : `str` 

283 Logical name of the opaque table. Must match the name used in a 

284 previous call to `registerOpaqueTable`. 

285 data 

286 Each additional positional argument is a dictionary that represents 

287 a single row to be added. 

288 """ 

289 self._opaque[tableName].insert(*data) 

290 

291 def fetchOpaqueData(self, tableName: str, **where: Any) -> Iterator[dict]: 

292 """Retrieve records from an opaque table. 

293 

294 Parameters 

295 ---------- 

296 tableName : `str` 

297 Logical name of the opaque table. Must match the name used in a 

298 previous call to `registerOpaqueTable`. 

299 where 

300 Additional keyword arguments are interpreted as equality 

301 constraints that restrict the returned rows (combined with AND); 

302 keyword arguments are column names and values are the values they 

303 must have. 

304 

305 Yields 

306 ------ 

307 row : `dict` 

308 A dictionary representing a single result row. 

309 """ 

310 yield from self._opaque[tableName].fetch(**where) 

311 

312 @transactional 

313 def deleteOpaqueData(self, tableName: str, **where: Any): 

314 """Remove records from an opaque table. 

315 

316 Parameters 

317 ---------- 

318 tableName : `str` 

319 Logical name of the opaque table. Must match the name used in a 

320 previous call to `registerOpaqueTable`. 

321 where 

322 Additional keyword arguments are interpreted as equality 

323 constraints that restrict the deleted rows (combined with AND); 

324 keyword arguments are column names and values are the values they 

325 must have. 

326 """ 

327 self._opaque[tableName].delete(**where) 

328 

329 def registerCollection(self, name: str, type: CollectionType = CollectionType.TAGGED): 

330 """Add a new collection if one with the given name does not exist. 

331 

332 Parameters 

333 ---------- 

334 name : `str` 

335 The name of the collection to create. 

336 type : `CollectionType` 

337 Enum value indicating the type of collection to create. 

338 

339 Notes 

340 ----- 

341 This method cannot be called within transactions, as it needs to be 

342 able to perform its own transaction to be concurrent. 

343 """ 

344 self._collections.register(name, type) 

345 

346 def getCollectionType(self, name: str) -> CollectionType: 

347 """Return an enumeration value indicating the type of the given 

348 collection. 

349 

350 Parameters 

351 ---------- 

352 name : `str` 

353 The name of the collection. 

354 

355 Returns 

356 ------- 

357 type : `CollectionType` 

358 Enum value indicating the type of this collection. 

359 

360 Raises 

361 ------ 

362 MissingCollectionError 

363 Raised if no collection with the given name exists. 

364 """ 

365 return self._collections.find(name).type 

366 

367 def registerRun(self, name: str): 

368 """Add a new run if one with the given name does not exist. 

369 

370 Parameters 

371 ---------- 

372 name : `str` 

373 The name of the run to create. 

374 

375 Notes 

376 ----- 

377 This method cannot be called within transactions, as it needs to be 

378 able to perform its own transaction to be concurrent. 

379 """ 

380 self._collections.register(name, CollectionType.RUN) 

381 

382 @transactional 

383 def removeCollection(self, name: str): 

384 """Completely remove the given collection. 

385 

386 Parameters 

387 ---------- 

388 name : `str` 

389 The name of the collection to remove. 

390 

391 Raises 

392 ------ 

393 MissingCollectionError 

394 Raised if no collection with the given name exists. 

395 

396 Notes 

397 ----- 

398 If this is a `~CollectionType.RUN` collection, all datasets and quanta 

399 in it are also fully removed. This requires that those datasets be 

400 removed (or at least trashed) from any datastores that hold them first. 

401 

402 A collection may not be deleted as long as it is referenced by a 

403 `~CollectionType.CHAINED` collection; the ``CHAINED`` collection must 

404 be deleted or redefined first. 

405 """ 

406 self._collections.remove(name) 

407 

408 def getCollectionChain(self, parent: str) -> CollectionSearch: 

409 """Return the child collections in a `~CollectionType.CHAINED` 

410 collection. 

411 

412 Parameters 

413 ---------- 

414 parent : `str` 

415 Name of the chained collection. Must have already been added via 

416 a call to `Registry.registerCollection`. 

417 

418 Returns 

419 ------- 

420 children : `CollectionSearch` 

421 An object that defines the search path of the collection. 

422 See :ref:`daf_butler_collection_expressions` for more information. 

423 

424 Raises 

425 ------ 

426 MissingCollectionError 

427 Raised if ``parent`` does not exist in the `Registry`. 

428 TypeError 

429 Raised if ``parent`` does not correspond to a 

430 `~CollectionType.CHAINED` collection. 

431 """ 

432 record = self._collections.find(parent) 

433 if record.type is not CollectionType.CHAINED: 

434 raise TypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

435 return record.children 

436 

437 @transactional 

438 def setCollectionChain(self, parent: str, children: Any): 

439 """Define or redefine a `~CollectionType.CHAINED` collection. 

440 

441 Parameters 

442 ---------- 

443 parent : `str` 

444 Name of the chained collection. Must have already been added via 

445 a call to `Registry.registerCollection`. 

446 children : `Any` 

447 An expression defining an ordered search of child collections, 

448 generally an iterable of `str`. Restrictions on the dataset types 

449 to be searched can also be included, by passing mapping or an 

450 iterable containing tuples; see 

451 :ref:`daf_butler_collection_expressions` for more information. 

452 

453 Raises 

454 ------ 

455 MissingCollectionError 

456 Raised when any of the given collections do not exist in the 

457 `Registry`. 

458 TypeError 

459 Raised if ``parent`` does not correspond to a 

460 `~CollectionType.CHAINED` collection. 

461 ValueError 

462 Raised if the given collections contains a cycle. 

463 """ 

464 record = self._collections.find(parent) 

465 if record.type is not CollectionType.CHAINED: 

466 raise TypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

467 children = CollectionSearch.fromExpression(children) 

468 if children != record.children: 

469 record.update(self._collections, children) 

470 

471 def registerDatasetType(self, datasetType: DatasetType) -> bool: 

472 """ 

473 Add a new `DatasetType` to the Registry. 

474 

475 It is not an error to register the same `DatasetType` twice. 

476 

477 Parameters 

478 ---------- 

479 datasetType : `DatasetType` 

480 The `DatasetType` to be added. 

481 

482 Returns 

483 ------- 

484 inserted : `bool` 

485 `True` if ``datasetType`` was inserted, `False` if an identical 

486 existing `DatsetType` was found. Note that in either case the 

487 DatasetType is guaranteed to be defined in the Registry 

488 consistently with the given definition. 

489 

490 Raises 

491 ------ 

492 ValueError 

493 Raised if the dimensions or storage class are invalid. 

494 ConflictingDefinitionError 

495 Raised if this DatasetType is already registered with a different 

496 definition. 

497 

498 Notes 

499 ----- 

500 This method cannot be called within transactions, as it needs to be 

501 able to perform its own transaction to be concurrent. 

502 """ 

503 _, inserted = self._datasets.register(datasetType) 

504 return inserted 

505 

506 def getDatasetType(self, name: str) -> DatasetType: 

507 """Get the `DatasetType`. 

508 

509 Parameters 

510 ---------- 

511 name : `str` 

512 Name of the type. 

513 

514 Returns 

515 ------- 

516 type : `DatasetType` 

517 The `DatasetType` associated with the given name. 

518 

519 Raises 

520 ------ 

521 KeyError 

522 Requested named DatasetType could not be found in registry. 

523 """ 

524 storage = self._datasets.find(name) 

525 if storage is None: 

526 raise KeyError(f"DatasetType '{name}' could not be found.") 

527 return storage.datasetType 

528 

529 def findDataset(self, datasetType: Union[DatasetType, str], dataId: Optional[DataId] = None, *, 

530 collections: Any, **kwargs: Any) -> Optional[DatasetRef]: 

531 """Find a dataset given its `DatasetType` and data ID. 

532 

533 This can be used to obtain a `DatasetRef` that permits the dataset to 

534 be read from a `Datastore`. If the dataset is a component and can not 

535 be found using the provided dataset type, a dataset ref for the parent 

536 will be returned instead but with the correct dataset type. 

537 

538 Parameters 

539 ---------- 

540 datasetType : `DatasetType` or `str` 

541 A `DatasetType` or the name of one. 

542 dataId : `dict` or `DataCoordinate`, optional 

543 A `dict`-like object containing the `Dimension` links that identify 

544 the dataset within a collection. 

545 collections 

546 An expression that fully or partially identifies the collections 

547 to search for the dataset, such as a `str`, `re.Pattern`, or 

548 iterable thereof. `...` can be used to return all collections. 

549 See :ref:`daf_butler_collection_expressions` for more information. 

550 **kwargs 

551 Additional keyword arguments passed to 

552 `DataCoordinate.standardize` to convert ``dataId`` to a true 

553 `DataCoordinate` or augment an existing one. 

554 

555 Returns 

556 ------- 

557 ref : `DatasetRef` 

558 A reference to the dataset, or `None` if no matching Dataset 

559 was found. 

560 

561 Raises 

562 ------ 

563 LookupError 

564 Raised if one or more data ID keys are missing or the dataset type 

565 does not exist. 

566 MissingCollectionError 

567 Raised if any of ``collections`` does not exist in the registry. 

568 """ 

569 if isinstance(datasetType, DatasetType): 

570 storage = self._datasets.find(datasetType.name) 

571 if storage is None: 

572 raise LookupError(f"DatasetType '{datasetType}' has not been registered.") 

573 else: 

574 storage = self._datasets.find(datasetType) 

575 if storage is None: 

576 raise LookupError(f"DatasetType with name '{datasetType}' has not been registered.") 

577 dataId = DataCoordinate.standardize(dataId, graph=storage.datasetType.dimensions, 

578 universe=self.dimensions, **kwargs) 

579 collections = CollectionSearch.fromExpression(collections) 

580 for collectionRecord in collections.iter(self._collections, datasetType=storage.datasetType): 

581 result = storage.find(collectionRecord, dataId) 

582 if result is not None: 

583 if result.datasetType.isComposite(): 

584 result = self._datasets.fetchComponents(result) 

585 return result 

586 

587 # fallback to the parent if we got nothing and this was a component 

588 if storage.datasetType.isComponent(): 

589 parentType, _ = storage.datasetType.nameAndComponent() 

590 parentRef = self.findDataset(parentType, dataId, collections=collections, **kwargs) 

591 if parentRef is not None: 

592 # Should already conform and we know no components 

593 return DatasetRef(storage.datasetType, parentRef.dataId, id=parentRef.id, 

594 run=parentRef.run, conform=False, hasParentId=True) 

595 

596 return None 

597 

598 @transactional 

599 def insertDatasets(self, datasetType: Union[DatasetType, str], dataIds: Iterable[DataId], 

600 run: str, *, producer: Optional[Quantum] = None, recursive: bool = False 

601 ) -> List[DatasetRef]: 

602 """Insert one or more datasets into the `Registry` 

603 

604 This always adds new datasets; to associate existing datasets with 

605 a new collection, use ``associate``. 

606 

607 Parameters 

608 ---------- 

609 datasetType : `DatasetType` or `str` 

610 A `DatasetType` or the name of one. 

611 dataIds : `~collections.abc.Iterable` of `dict` or `DataCoordinate` 

612 Dimension-based identifiers for the new datasets. 

613 run : `str` 

614 The name of the run that produced the datasets. 

615 producer : `Quantum` 

616 Unit of work that produced the datasets. May be `None` to store 

617 no provenance information, but if present the `Quantum` must 

618 already have been added to the Registry. 

619 recursive : `bool` 

620 If True, recursively add datasets and attach entries for component 

621 datasets as well. 

622 

623 Returns 

624 ------- 

625 refs : `list` of `DatasetRef` 

626 Resolved `DatasetRef` instances for all given data IDs (in the same 

627 order). 

628 

629 Raises 

630 ------ 

631 ConflictingDefinitionError 

632 If a dataset with the same dataset type and data ID as one of those 

633 given already exists in ``run``. 

634 MissingCollectionError 

635 Raised if ``run`` does not exist in the registry. 

636 """ 

637 if isinstance(datasetType, DatasetType): 

638 storage = self._datasets.find(datasetType.name) 

639 if storage is None: 

640 raise LookupError(f"DatasetType '{datasetType}' has not been registered.") 

641 else: 

642 storage = self._datasets.find(datasetType) 

643 if storage is None: 

644 raise LookupError(f"DatasetType with name '{datasetType}' has not been registered.") 

645 runRecord = self._collections.find(run) 

646 dataIds = [self.expandDataId(dataId, graph=storage.datasetType.dimensions) for dataId in dataIds] 

647 try: 

648 refs = list(storage.insert(runRecord, dataIds, quantum=producer)) 

649 except sqlalchemy.exc.IntegrityError as err: 

650 raise ConflictingDefinitionError(f"A database constraint failure was triggered by inserting " 

651 f"one or more datasets of type {storage.datasetType} into " 

652 f"collection '{run}'. " 

653 f"This probably means a dataset with the same data ID " 

654 f"and dataset type already exists, but it may also mean a " 

655 f"dimension row is missing.") from err 

656 if recursive and storage.datasetType.isComposite(): 

657 # Insert component rows by recursing. 

658 composites = defaultdict(dict) 

659 # TODO: we really shouldn't be inserting all components defined by 

660 # the storage class, because there's no guarantee all of them are 

661 # actually present in these datasets. 

662 for componentName in storage.datasetType.storageClass.components: 

663 componentDatasetType = storage.datasetType.makeComponentDatasetType(componentName) 

664 componentRefs = self.insertDatasets(componentDatasetType, 

665 dataIds=dataIds, 

666 run=run, 

667 producer=producer, 

668 recursive=True) 

669 for parentRef, componentRef in zip(refs, componentRefs): 

670 composites[parentRef][componentName] = componentRef 

671 if composites: 

672 refs = list(self._datasets.attachComponents(composites.items())) 

673 return refs 

674 

675 def getDataset(self, id: int) -> Optional[DatasetRef]: 

676 """Retrieve a Dataset entry. 

677 

678 Parameters 

679 ---------- 

680 id : `int` 

681 The unique identifier for the dataset. 

682 

683 Returns 

684 ------- 

685 ref : `DatasetRef` or `None` 

686 A ref to the Dataset, or `None` if no matching Dataset 

687 was found. 

688 """ 

689 ref = self._datasets.getDatasetRef(id) 

690 if ref is None: 

691 return None 

692 if ref.datasetType.isComposite(): 

693 return self._datasets.fetchComponents(ref) 

694 return ref 

695 

696 @transactional 

697 def removeDatasets(self, refs: Iterable[DatasetRef], *, recursive: bool = True): 

698 """Remove datasets from the Registry. 

699 

700 The datasets will be removed unconditionally from all collections, and 

701 any `Quantum` that consumed this dataset will instead be marked with 

702 having a NULL input. `Datastore` records will *not* be deleted; the 

703 caller is responsible for ensuring that the dataset has already been 

704 removed from all Datastores. 

705 

706 Parameters 

707 ---------- 

708 refs : `Iterable` of `DatasetRef` 

709 References to the datasets to be removed. Must include a valid 

710 ``id`` attribute, and should be considered invalidated upon return. 

711 recursive : `bool`, optional 

712 If `True`, remove all component datasets as well. Note that 

713 this only removes components that are actually included in the 

714 given `DatasetRef` instances, which may not be the same as those in 

715 the database (especially if they were obtained from 

716 `queryDatasets`, which does not populate `DatasetRef.components`). 

717 

718 Raises 

719 ------ 

720 AmbiguousDatasetError 

721 Raised if any ``ref.id`` is `None`. 

722 OrphanedRecordError 

723 Raised if any dataset is still present in any `Datastore`. 

724 """ 

725 for datasetType, refsForType in DatasetRef.groupByType(refs, recursive=recursive).items(): 

726 storage = self._datasets.find(datasetType.name) 

727 try: 

728 storage.delete(refsForType) 

729 except sqlalchemy.exc.IntegrityError as err: 

730 raise OrphanedRecordError("One or more datasets is still " 

731 "present in one or more Datastores.") from err 

732 

733 @transactional 

734 def attachComponents(self, parent: DatasetRef, components: Mapping[str, DatasetRef]): 

735 """Attach components to a dataset. 

736 

737 Parameters 

738 ---------- 

739 parent : `DatasetRef` 

740 A reference to the parent dataset. 

741 components : `Mapping` [ `str`, `DatasetRef` ] 

742 Mapping from component name to the `DatasetRef` for that component. 

743 

744 Returns 

745 ------- 

746 ref : `DatasetRef` 

747 An updated version of ``parent`` with components included. 

748 

749 Returns 

750 ------- 

751 ref : `DatasetRef` 

752 A version ``parent`` with ``component`` included in its components. 

753 

754 Raises 

755 ------ 

756 AmbiguousDatasetError 

757 Raised if ``parent.id`` or any `DatasetRef.id` in ``components`` 

758 is `None`. 

759 """ 

760 for name, ref in components.items(): 

761 if ref.datasetType.storageClass != parent.datasetType.storageClass.components[name]: 

762 raise TypeError(f"Expected storage class " 

763 f"'{parent.datasetType.storageClass.components[name].name}' " 

764 f"for component '{name}' of dataset {parent}; got " 

765 f"dataset {ref} with storage class " 

766 f"'{ref.datasetType.storageClass.name}'.") 

767 ref, = self._datasets.attachComponents([(parent, components)]) 

768 return ref 

769 

770 @transactional 

771 def associate(self, collection: str, refs: Iterable[DatasetRef], *, recursive: bool = True): 

772 """Add existing datasets to a `~CollectionType.TAGGED` collection. 

773 

774 If a DatasetRef with the same exact integer ID is already in a 

775 collection nothing is changed. If a `DatasetRef` with the same 

776 `DatasetType` and data ID but with different integer ID 

777 exists in the collection, `ConflictingDefinitionError` is raised. 

778 

779 Parameters 

780 ---------- 

781 collection : `str` 

782 Indicates the collection the datasets should be associated with. 

783 refs : `Iterable` [ `DatasetRef` ] 

784 An iterable of resolved `DatasetRef` instances that already exist 

785 in this `Registry`. 

786 recursive : `bool`, optional 

787 If `True`, associate all component datasets as well. Note that 

788 this only associates components that are actually included in the 

789 given `DatasetRef` instances, which may not be the same as those in 

790 the database (especially if they were obtained from 

791 `queryDatasets`, which does not populate `DatasetRef.components`). 

792 

793 Raises 

794 ------ 

795 ConflictingDefinitionError 

796 If a Dataset with the given `DatasetRef` already exists in the 

797 given collection. 

798 AmbiguousDatasetError 

799 Raised if ``any(ref.id is None for ref in refs)``. 

800 MissingCollectionError 

801 Raised if ``collection`` does not exist in the registry. 

802 TypeError 

803 Raise adding new datasets to the given ``collection`` is not 

804 allowed. 

805 """ 

806 collectionRecord = self._collections.find(collection) 

807 if collectionRecord.type is not CollectionType.TAGGED: 

808 raise TypeError(f"Collection '{collection}' has type {collectionRecord.type.name}, not TAGGED.") 

809 for datasetType, refsForType in DatasetRef.groupByType(refs, recursive=recursive).items(): 

810 storage = self._datasets.find(datasetType.name) 

811 try: 

812 storage.associate(collectionRecord, refsForType) 

813 except sqlalchemy.exc.IntegrityError as err: 

814 raise ConflictingDefinitionError( 

815 f"Constraint violation while associating dataset of type {datasetType.name} with " 

816 f"collection {collection}. This probably means that one or more datasets with the same " 

817 f"dataset type and data ID already exist in the collection, but it may also indicate " 

818 f"that the datasets do not exist." 

819 ) from err 

820 

821 @transactional 

822 def disassociate(self, collection: str, refs: Iterable[DatasetRef], *, recursive: bool = True): 

823 """Remove existing datasets from a `~CollectionType.TAGGED` collection. 

824 

825 ``collection`` and ``ref`` combinations that are not currently 

826 associated are silently ignored. 

827 

828 Parameters 

829 ---------- 

830 collection : `str` 

831 The collection the datasets should no longer be associated with. 

832 refs : `Iterable` [ `DatasetRef` ] 

833 An iterable of resolved `DatasetRef` instances that already exist 

834 in this `Registry`. 

835 recursive : `bool`, optional 

836 If `True`, disassociate all component datasets as well. Note that 

837 this only disassociates components that are actually included in 

838 the given `DatasetRef` instances, which may not be the same as 

839 those in the database (especially if they were obtained from 

840 `queryDatasets`, which does not populate `DatasetRef.components`). 

841 

842 Raises 

843 ------ 

844 AmbiguousDatasetError 

845 Raised if any of the given dataset references is unresolved. 

846 MissingCollectionError 

847 Raised if ``collection`` does not exist in the registry. 

848 TypeError 

849 Raise adding new datasets to the given ``collection`` is not 

850 allowed. 

851 """ 

852 collectionRecord = self._collections.find(collection) 

853 if collectionRecord.type is not CollectionType.TAGGED: 

854 raise TypeError(f"Collection '{collection}' has type {collectionRecord.type.name}; " 

855 "expected TAGGED.") 

856 for datasetType, refsForType in DatasetRef.groupByType(refs, recursive=recursive).items(): 

857 storage = self._datasets.find(datasetType.name) 

858 storage.disassociate(collectionRecord, refsForType) 

859 

860 @transactional 

861 def insertDatasetLocations(self, datastoreName: str, refs: Iterable[DatasetRef]): 

862 """Record that a datastore holds the given datasets. 

863 

864 Typically used by `Datastore`. 

865 

866 Parameters 

867 ---------- 

868 datastoreName : `str` 

869 Name of the datastore holding these datasets. 

870 refs : `~collections.abc.Iterable` of `DatasetRef` 

871 References to the datasets. 

872 

873 Raises 

874 ------ 

875 AmbiguousDatasetError 

876 Raised if ``any(ref.id is None for ref in refs)``. 

877 """ 

878 self._db.insert( 

879 self._tables.dataset_location, 

880 *[{"datastore_name": datastoreName, "dataset_id": ref.getCheckedId()} for ref in refs] 

881 ) 

882 

883 @transactional 

884 def moveDatasetLocationToTrash(self, datastoreName: str, refs: Iterable[DatasetRef]): 

885 """Move the dataset location information to trash. 

886 

887 Parameters 

888 ---------- 

889 datastoreName : `str` 

890 Name of the datastore holding these datasets. 

891 refs : `~collections.abc.Iterable` of `DatasetRef` 

892 References to the datasets. 

893 """ 

894 # We only want to move rows that already exist in the main table 

895 filtered = self.checkDatasetLocations(datastoreName, refs) 

896 self.canDeleteDatasetLocations(datastoreName, filtered) 

897 self.removeDatasetLocation(datastoreName, filtered) 

898 

899 @transactional 

900 def canDeleteDatasetLocations(self, datastoreName: str, refs: Iterable[DatasetRef]): 

901 """Record that a datastore can delete this dataset 

902 

903 Parameters 

904 ---------- 

905 datastoreName : `str` 

906 Name of the datastore holding these datasets. 

907 refs : `~collections.abc.Iterable` of `DatasetRef` 

908 References to the datasets. 

909 

910 Raises 

911 ------ 

912 AmbiguousDatasetError 

913 Raised if ``any(ref.id is None for ref in refs)``. 

914 """ 

915 self._db.insert( 

916 self._tables.dataset_location_trash, 

917 *[{"datastore_name": datastoreName, "dataset_id": ref.getCheckedId()} for ref in refs] 

918 ) 

919 

920 def checkDatasetLocations(self, datastoreName: str, refs: Iterable[DatasetRef]) -> List[DatasetRef]: 

921 """Check which refs are listed for this datastore. 

922 

923 Parameters 

924 ---------- 

925 datastoreName : `str` 

926 Name of the datastore holding these datasets. 

927 refs : `~collections.abc.Iterable` of `DatasetRef` 

928 References to the datasets. 

929 

930 Returns 

931 ------- 

932 present : `list` of `DatasetRef` 

933 All the `DatasetRef` that are listed. 

934 """ 

935 

936 table = self._tables.dataset_location 

937 result = self._db.query( 

938 sqlalchemy.sql.select( 

939 [table.columns.datastore_name, table.columns.dataset_id] 

940 ).where( 

941 sqlalchemy.sql.and_(table.columns.dataset_id.in_([ref.id for ref in refs]), 

942 table.columns.datastore_name == datastoreName) 

943 ) 

944 ).fetchall() 

945 

946 matched_ids = {r["dataset_id"] for r in result} 

947 return [ref for ref in refs if ref.id in matched_ids] 

948 

949 def getDatasetLocations(self, ref: DatasetRef) -> Set[str]: 

950 """Retrieve datastore locations for a given dataset. 

951 

952 Typically used by `Datastore`. 

953 

954 Parameters 

955 ---------- 

956 ref : `DatasetRef` 

957 A reference to the dataset for which to retrieve storage 

958 information. 

959 

960 Returns 

961 ------- 

962 datastores : `set` of `str` 

963 All the matching datastores holding this dataset. Empty set 

964 if the dataset does not exist anywhere. 

965 

966 Raises 

967 ------ 

968 AmbiguousDatasetError 

969 Raised if ``ref.id`` is `None`. 

970 """ 

971 table = self._tables.dataset_location 

972 result = self._db.query( 

973 sqlalchemy.sql.select( 

974 [table.columns.datastore_name] 

975 ).where( 

976 table.columns.dataset_id == ref.id 

977 ) 

978 ).fetchall() 

979 return {r["datastore_name"] for r in result} 

980 

981 @transactional 

982 def getTrashedDatasets(self, datastoreName: str) -> Set[FakeDatasetRef]: 

983 """Retrieve all the dataset ref IDs that are in the trash 

984 associated with the specified datastore. 

985 

986 Parameters 

987 ---------- 

988 datastoreName : `str` 

989 The relevant datastore name to use. 

990 

991 Returns 

992 ------- 

993 ids : `set` of `FakeDatasetRef` 

994 The IDs of datasets that can be safely removed from this datastore. 

995 Can be empty. 

996 """ 

997 table = self._tables.dataset_location_trash 

998 result = self._db.query( 

999 sqlalchemy.sql.select( 

1000 [table.columns.dataset_id] 

1001 ).where( 

1002 table.columns.datastore_name == datastoreName 

1003 ) 

1004 ).fetchall() 

1005 return {FakeDatasetRef(r["dataset_id"]) for r in result} 

1006 

1007 @transactional 

1008 def emptyDatasetLocationsTrash(self, datastoreName: str, refs: Iterable[FakeDatasetRef]) -> None: 

1009 """Remove datastore location associated with these datasets from trash. 

1010 

1011 Typically used by `Datastore` when a dataset is removed. 

1012 

1013 Parameters 

1014 ---------- 

1015 datastoreName : `str` 

1016 Name of this `Datastore`. 

1017 refs : iterable of `FakeDatasetRef` 

1018 The dataset IDs to be removed. 

1019 

1020 Raises 

1021 ------ 

1022 AmbiguousDatasetError 

1023 Raised if ``ref.id`` is `None`. 

1024 """ 

1025 if not refs: 

1026 return 

1027 self._db.delete( 

1028 self._tables.dataset_location_trash, 

1029 ["dataset_id", "datastore_name"], 

1030 *[{"dataset_id": ref.id, "datastore_name": datastoreName} for ref in refs] 

1031 ) 

1032 

1033 @transactional 

1034 def removeDatasetLocation(self, datastoreName: str, refs: Iterable[DatasetRef]) -> None: 

1035 """Remove datastore location associated with this dataset. 

1036 

1037 Typically used by `Datastore` when a dataset is removed. 

1038 

1039 Parameters 

1040 ---------- 

1041 datastoreName : `str` 

1042 Name of this `Datastore`. 

1043 refs : iterable of `DatasetRef` 

1044 A reference to the dataset for which information is to be removed. 

1045 

1046 Raises 

1047 ------ 

1048 AmbiguousDatasetError 

1049 Raised if ``ref.id`` is `None`. 

1050 """ 

1051 if not refs: 

1052 return 

1053 self._db.delete( 

1054 self._tables.dataset_location, 

1055 ["dataset_id", "datastore_name"], 

1056 *[{"dataset_id": ref.getCheckedId(), "datastore_name": datastoreName} for ref in refs] 

1057 ) 

1058 

1059 def expandDataId(self, dataId: Optional[DataId] = None, *, graph: Optional[DimensionGraph] = None, 

1060 records: Optional[Mapping[DimensionElement, DimensionRecord]] = None, **kwds): 

1061 """Expand a dimension-based data ID to include additional information. 

1062 

1063 Parameters 

1064 ---------- 

1065 dataId : `DataCoordinate` or `dict`, optional 

1066 Data ID to be expanded; augmented and overridden by ``kwds``. 

1067 graph : `DimensionGraph`, optional 

1068 Set of dimensions for the expanded ID. If `None`, the dimensions 

1069 will be inferred from the keys of ``dataId`` and ``kwds``. 

1070 Dimensions that are in ``dataId`` or ``kwds`` but not in ``graph`` 

1071 are silently ignored, providing a way to extract and expand a 

1072 subset of a data ID. 

1073 records : mapping [`DimensionElement`, `DimensionRecord`], optional 

1074 Dimension record data to use before querying the database for that 

1075 data. 

1076 **kwds 

1077 Additional keywords are treated like additional key-value pairs for 

1078 ``dataId``, extending and overriding 

1079 

1080 Returns 

1081 ------- 

1082 expanded : `ExpandedDataCoordinate` 

1083 A data ID that includes full metadata for all of the dimensions it 

1084 identifieds. 

1085 """ 

1086 standardized = DataCoordinate.standardize(dataId, graph=graph, universe=self.dimensions, **kwds) 

1087 if isinstance(standardized, ExpandedDataCoordinate): 

1088 return standardized 

1089 elif isinstance(dataId, ExpandedDataCoordinate): 

1090 records = dict(records) if records is not None else {} 

1091 records.update(dataId.records) 

1092 else: 

1093 records = dict(records) if records is not None else {} 

1094 keys = dict(standardized) 

1095 regions = [] 

1096 timespans = [] 

1097 for element in standardized.graph.primaryKeyTraversalOrder: 

1098 record = records.get(element.name, ...) # Use ... to mean not found; None might mean NULL 

1099 if record is ...: 

1100 storage = self._dimensions[element] 

1101 record = storage.fetch(keys) 

1102 records[element] = record 

1103 if record is not None: 

1104 for d in element.implied: 

1105 value = getattr(record, d.name) 

1106 if keys.setdefault(d, value) != value: 

1107 raise InconsistentDataIdError(f"Data ID {standardized} has {d.name}={keys[d]!r}, " 

1108 f"but {element.name} implies {d.name}={value!r}.") 

1109 if element in standardized.graph.spatial and record.region is not None: 

1110 if any(record.region.relate(r) & lsst.sphgeom.DISJOINT for r in regions): 

1111 raise InconsistentDataIdError(f"Data ID {standardized}'s region for {element.name} " 

1112 f"is disjoint with those for other elements.") 

1113 regions.append(record.region) 

1114 if element in standardized.graph.temporal: 

1115 if any(not record.timespan.overlaps(t) for t in timespans): 

1116 raise InconsistentDataIdError(f"Data ID {standardized}'s timespan for {element.name}" 

1117 f" is disjoint with those for other elements.") 

1118 timespans.append(record.timespan) 

1119 else: 

1120 if element in standardized.graph.required: 

1121 raise LookupError( 

1122 f"Could not fetch record for required dimension {element.name} via keys {keys}." 

1123 ) 

1124 if element.alwaysJoin: 

1125 raise InconsistentDataIdError( 

1126 f"Could not fetch record for element {element.name} via keys {keys}, ", 

1127 f"but it is marked alwaysJoin=True; this means one or more dimensions are not " 

1128 f"related." 

1129 ) 

1130 records.update((d, None) for d in element.implied) 

1131 return ExpandedDataCoordinate(standardized.graph, standardized.values(), records=records) 

1132 

1133 def relateDataIds(self, a: DataId, b: DataId) -> Optional[ConsistentDataIds]: 

1134 """Compare the keys and values of a pair of data IDs for consistency. 

1135 

1136 See `ConsistentDataIds` for more information. 

1137 

1138 Parameters 

1139 ---------- 

1140 a : `dict` or `DataCoordinate` 

1141 First data ID to be compared. 

1142 b : `dict` or `DataCoordinate` 

1143 Second data ID to be compared. 

1144 

1145 Returns 

1146 ------- 

1147 relationship : `ConsistentDataIds` or `None` 

1148 Relationship information. This is not `None` and coerces to 

1149 `True` in boolean contexts if and only if the data IDs are 

1150 consistent in terms of all common key-value pairs, all many-to-many 

1151 join tables, and all spatial andtemporal relationships. 

1152 """ 

1153 a = DataCoordinate.standardize(a, universe=self.dimensions) 

1154 b = DataCoordinate.standardize(b, universe=self.dimensions) 

1155 aFull = getattr(a, "full", None) 

1156 bFull = getattr(b, "full", None) 

1157 aBest = aFull if aFull is not None else a 

1158 bBest = bFull if bFull is not None else b 

1159 jointKeys = aBest.keys() & bBest.keys() 

1160 # If any common values are not equal, we know they are inconsistent. 

1161 if any(aBest[k] != bBest[k] for k in jointKeys): 

1162 return None 

1163 # If the graphs are equal, we know the data IDs are. 

1164 if a.graph == b.graph: 

1165 return ConsistentDataIds(contains=True, within=True, overlaps=bool(jointKeys)) 

1166 # Result is still inconclusive. Try to expand a data ID containing 

1167 # keys from both; that will fail if they are inconsistent. 

1168 # First, if either input was already an ExpandedDataCoordinate, extract 

1169 # its records so we don't have to query for them. 

1170 records = {} 

1171 if hasattr(a, "records"): 

1172 records.update(a.records) 

1173 if hasattr(b, "records"): 

1174 records.update(b.records) 

1175 try: 

1176 self.expandDataId({**a, **b}, graph=(a.graph | b.graph), records=records) 

1177 except InconsistentDataIdError: 

1178 return None 

1179 # We know the answer is not `None`; time to figure out what it is. 

1180 return ConsistentDataIds( 

1181 contains=(a.graph >= b.graph), 

1182 within=(a.graph <= b.graph), 

1183 overlaps=bool(a.graph & b.graph), 

1184 ) 

1185 

1186 def insertDimensionData(self, element: Union[DimensionElement, str], 

1187 *data: Union[dict, DimensionRecord], 

1188 conform: bool = True): 

1189 """Insert one or more dimension records into the database. 

1190 

1191 Parameters 

1192 ---------- 

1193 element : `DimensionElement` or `str` 

1194 The `DimensionElement` or name thereof that identifies the table 

1195 records will be inserted into. 

1196 data : `dict` or `DimensionRecord` (variadic) 

1197 One or more records to insert. 

1198 conform : `bool`, optional 

1199 If `False` (`True` is default) perform no checking or conversions, 

1200 and assume that ``element`` is a `DimensionElement` instance and 

1201 ``data`` is a one or more `DimensionRecord` instances of the 

1202 appropriate subclass. 

1203 """ 

1204 if conform: 

1205 element = self.dimensions[element] # if this is a name, convert it to a true DimensionElement. 

1206 records = [element.RecordClass.fromDict(row) if not type(row) is element.RecordClass else row 

1207 for row in data] 

1208 else: 

1209 records = data 

1210 storage = self._dimensions[element] 

1211 storage.insert(*records) 

1212 

1213 def syncDimensionData(self, element: Union[DimensionElement, str], 

1214 row: Union[dict, DimensionRecord], 

1215 conform: bool = True) -> bool: 

1216 """Synchronize the given dimension record with the database, inserting 

1217 if it does not already exist and comparing values if it does. 

1218 

1219 Parameters 

1220 ---------- 

1221 element : `DimensionElement` or `str` 

1222 The `DimensionElement` or name thereof that identifies the table 

1223 records will be inserted into. 

1224 row : `dict` or `DimensionRecord` 

1225 The record to insert. 

1226 conform : `bool`, optional 

1227 If `False` (`True` is default) perform no checking or conversions, 

1228 and assume that ``element`` is a `DimensionElement` instance and 

1229 ``data`` is a one or more `DimensionRecord` instances of the 

1230 appropriate subclass. 

1231 

1232 Returns 

1233 ------- 

1234 inserted : `bool` 

1235 `True` if a new row was inserted, `False` otherwise. 

1236 

1237 Raises 

1238 ------ 

1239 ConflictingDefinitionError 

1240 Raised if the record exists in the database (according to primary 

1241 key lookup) but is inconsistent with the given one. 

1242 

1243 Notes 

1244 ----- 

1245 This method cannot be called within transactions, as it needs to be 

1246 able to perform its own transaction to be concurrent. 

1247 """ 

1248 if conform: 

1249 element = self.dimensions[element] # if this is a name, convert it to a true DimensionElement. 

1250 record = element.RecordClass.fromDict(row) if not type(row) is element.RecordClass else row 

1251 else: 

1252 record = row 

1253 storage = self._dimensions[element] 

1254 return storage.sync(record) 

1255 

1256 def queryDatasetTypes(self, expression: Any = ...) -> Iterator[DatasetType]: 

1257 """Iterate over the dataset types whose names match an expression. 

1258 

1259 Parameters 

1260 ---------- 

1261 expression : `Any`, optional 

1262 An expression that fully or partially identifies the dataset types 

1263 to return, such as a `str`, `re.Pattern`, or iterable thereof. 

1264 `...` can be used to return all dataset types, and is the default. 

1265 See :ref:`daf_butler_dataset_type_expressions` for more 

1266 information. 

1267 

1268 Yields 

1269 ------ 

1270 datasetType : `DatasetType` 

1271 A `DatasetType` instance whose name matches ``expression``. 

1272 """ 

1273 wildcard = CategorizedWildcard.fromExpression(expression, coerceUnrecognized=lambda d: d.name) 

1274 if wildcard is ...: 

1275 yield from self._datasets 

1276 return 

1277 done = set() 

1278 for name in wildcard.strings: 

1279 storage = self._datasets.find(name) 

1280 if storage is not None: 

1281 done.add(storage.datasetType) 

1282 yield storage.datasetType 

1283 if wildcard.patterns: 

1284 for datasetType in self._datasets: 

1285 if datasetType.name in done: 

1286 continue 

1287 if any(p.fullmatch(datasetType.name) for p in wildcard.patterns): 

1288 yield datasetType 

1289 

1290 def queryCollections(self, expression: Any = ..., 

1291 datasetType: Optional[DatasetType] = None, 

1292 collectionType: Optional[CollectionType] = None, 

1293 flattenChains: bool = False, 

1294 includeChains: Optional[bool] = None) -> Iterator[str]: 

1295 """Iterate over the collections whose names match an expression. 

1296 

1297 Parameters 

1298 ---------- 

1299 expression : `Any`, optional 

1300 An expression that fully or partially identifies the collections 

1301 to return, such as a `str`, `re.Pattern`, or iterable thereof. 

1302 `...` can be used to return all collections, and is the default. 

1303 See :ref:`daf_butler_collection_expressions` for more 

1304 information. 

1305 datasetType : `DatasetType`, optional 

1306 If provided, only yield collections that should be searched for 

1307 this dataset type according to ``expression``. If this is 

1308 not provided, any dataset type restrictions in ``expression`` are 

1309 ignored. 

1310 collectionType : `CollectionType`, optional 

1311 If provided, only yield collections of this type. 

1312 flattenChains : `bool`, optional 

1313 If `True` (`False` is default), recursively yield the child 

1314 collections of matching `~CollectionType.CHAINED` collections. 

1315 includeChains : `bool`, optional 

1316 If `True`, yield records for matching `~CollectionType.CHAINED` 

1317 collections. Default is the opposite of ``flattenChains``: include 

1318 either CHAINED collections or their children, but not both. 

1319 

1320 Yields 

1321 ------ 

1322 collection : `str` 

1323 The name of a collection that matches ``expression``. 

1324 """ 

1325 query = CollectionQuery.fromExpression(expression) 

1326 for record in query.iter(self._collections, datasetType=datasetType, collectionType=collectionType, 

1327 flattenChains=flattenChains, includeChains=includeChains): 

1328 yield record.name 

1329 

1330 def makeQueryBuilder(self, summary: QuerySummary) -> QueryBuilder: 

1331 """Return a `QueryBuilder` instance capable of constructing and 

1332 managing more complex queries than those obtainable via `Registry` 

1333 interfaces. 

1334 

1335 This is an advanced interface; downstream code should prefer 

1336 `Registry.queryDimensions` and `Registry.queryDatasets` whenever those 

1337 are sufficient. 

1338 

1339 Parameters 

1340 ---------- 

1341 summary : `QuerySummary` 

1342 Object describing and categorizing the full set of dimensions that 

1343 will be included in the query. 

1344 

1345 Returns 

1346 ------- 

1347 builder : `QueryBuilder` 

1348 Object that can be used to construct and perform advanced queries. 

1349 """ 

1350 return QueryBuilder(summary=summary, 

1351 collections=self._collections, 

1352 dimensions=self._dimensions, 

1353 datasets=self._datasets) 

1354 

1355 def queryDimensions(self, dimensions: Union[Iterable[Union[Dimension, str]], Dimension, str], *, 

1356 dataId: Optional[DataId] = None, 

1357 datasets: Any = None, 

1358 collections: Any = None, 

1359 where: Optional[str] = None, 

1360 expand: bool = True, 

1361 **kwds) -> Iterator[DataCoordinate]: 

1362 """Query for and iterate over data IDs matching user-provided criteria. 

1363 

1364 Parameters 

1365 ---------- 

1366 dimensions : `Dimension` or `str`, or iterable thereof 

1367 The dimensions of the data IDs to yield, as either `Dimension` 

1368 instances or `str`. Will be automatically expanded to a complete 

1369 `DimensionGraph`. 

1370 dataId : `dict` or `DataCoordinate`, optional 

1371 A data ID whose key-value pairs are used as equality constraints 

1372 in the query. 

1373 datasets : `Any`, optional 

1374 An expression that fully or partially identifies dataset types 

1375 that should constrain the yielded data IDs. For example, including 

1376 "raw" here would constrain the yielded ``instrument``, 

1377 ``exposure``, ``detector``, and ``physical_filter`` values to only 

1378 those for which at least one "raw" dataset exists in 

1379 ``collections``. Allowed types include `DatasetType`, `str`, 

1380 `re.Pattern`, and iterables thereof. Unlike other dataset type 

1381 expressions, `...` is not permitted - it doesn't make sense to 

1382 constrain data IDs on the existence of *all* datasets. 

1383 See :ref:`daf_butler_dataset_type_expressions` for more 

1384 information. 

1385 collections: `Any`, optional 

1386 An expression that fully or partially identifies the collections 

1387 to search for datasets, such as a `str`, `re.Pattern`, or iterable 

1388 thereof. `...` can be used to return all collections. Must be 

1389 provided if ``datasets`` is, and is ignored if it is not. See 

1390 :ref:`daf_butler_collection_expressions` for more information. 

1391 where : `str`, optional 

1392 A string expression similar to a SQL WHERE clause. May involve 

1393 any column of a dimension table or (as a shortcut for the primary 

1394 key column of a dimension table) dimension name. See 

1395 :ref:`daf_butler_dimension_expressions` for more information. 

1396 expand : `bool`, optional 

1397 If `True` (default) yield `ExpandedDataCoordinate` instead of 

1398 minimal `DataCoordinate` base-class instances. 

1399 kwds 

1400 Additional keyword arguments are forwarded to 

1401 `DataCoordinate.standardize` when processing the ``dataId`` 

1402 argument (and may be used to provide a constraining data ID even 

1403 when the ``dataId`` argument is `None`). 

1404 

1405 Yields 

1406 ------ 

1407 dataId : `DataCoordinate` 

1408 Data IDs matching the given query parameters. Order is 

1409 unspecified. 

1410 """ 

1411 dimensions = iterable(dimensions) 

1412 standardizedDataId = self.expandDataId(dataId, **kwds) 

1413 standardizedDatasetTypes = [] 

1414 requestedDimensionNames = set(self.dimensions.extract(dimensions).names) 

1415 if datasets is not None: 

1416 if collections is None: 

1417 raise TypeError("Cannot pass 'datasets' without 'collections'.") 

1418 for datasetType in self.queryDatasetTypes(datasets): 

1419 requestedDimensionNames.update(datasetType.dimensions.names) 

1420 standardizedDatasetTypes.append(datasetType) 

1421 # Preprocess collections expression in case the original included 

1422 # single-pass iterators (we'll want to use it multiple times 

1423 # below). 

1424 collections = CollectionQuery.fromExpression(collections) 

1425 

1426 summary = QuerySummary( 

1427 requested=DimensionGraph(self.dimensions, names=requestedDimensionNames), 

1428 dataId=standardizedDataId, 

1429 expression=where, 

1430 ) 

1431 builder = self.makeQueryBuilder(summary) 

1432 for datasetType in standardizedDatasetTypes: 

1433 builder.joinDataset(datasetType, collections, isResult=False) 

1434 query = builder.finish() 

1435 predicate = query.predicate() 

1436 for row in self._db.query(query.sql): 

1437 if predicate(row): 

1438 result = query.extractDataId(row) 

1439 if expand: 

1440 yield self.expandDataId(result, records=standardizedDataId.records) 

1441 else: 

1442 yield result 

1443 

1444 def queryDatasets(self, datasetType: Any, *, 

1445 collections: Any, 

1446 dimensions: Optional[Iterable[Union[Dimension, str]]] = None, 

1447 dataId: Optional[DataId] = None, 

1448 where: Optional[str] = None, 

1449 deduplicate: bool = False, 

1450 expand: bool = True, 

1451 **kwds) -> Iterator[DatasetRef]: 

1452 """Query for and iterate over dataset references matching user-provided 

1453 criteria. 

1454 

1455 Parameters 

1456 ---------- 

1457 datasetType 

1458 An expression that fully or partially identifies the dataset types 

1459 to be queried. Allowed types include `DatasetType`, `str`, 

1460 `re.Pattern`, and iterables thereof. The special value `...` can 

1461 be used to query all dataset types. See 

1462 :ref:`daf_butler_dataset_type_expressions` for more information. 

1463 collections 

1464 An expression that fully or partially identifies the collections 

1465 to search for datasets, such as a `str`, `re.Pattern`, or iterable 

1466 thereof. `...` can be used to return all collections. See 

1467 :ref:`daf_butler_collection_expressions` for more information. 

1468 dimensions : `~collections.abc.Iterable` of `Dimension` or `str` 

1469 Dimensions to include in the query (in addition to those used 

1470 to identify the queried dataset type(s)), either to constrain 

1471 the resulting datasets to those for which a matching dimension 

1472 exists, or to relate the dataset type's dimensions to dimensions 

1473 referenced by the ``dataId`` or ``where`` arguments. 

1474 dataId : `dict` or `DataCoordinate`, optional 

1475 A data ID whose key-value pairs are used as equality constraints 

1476 in the query. 

1477 where : `str`, optional 

1478 A string expression similar to a SQL WHERE clause. May involve 

1479 any column of a dimension table or (as a shortcut for the primary 

1480 key column of a dimension table) dimension name. See 

1481 :ref:`daf_butler_dimension_expressions` for more information. 

1482 deduplicate : `bool`, optional 

1483 If `True` (`False` is default), for each result data ID, only 

1484 yield one `DatasetRef` of each `DatasetType`, from the first 

1485 collection in which a dataset of that dataset type appears 

1486 (according to the order of ``collections`` passed in). If `True`, 

1487 ``collections`` must not contain regular expressions and may not 

1488 be `...`. 

1489 expand : `bool`, optional 

1490 If `True` (default) attach `ExpandedDataCoordinate` instead of 

1491 minimal `DataCoordinate` base-class instances. 

1492 kwds 

1493 Additional keyword arguments are forwarded to 

1494 `DataCoordinate.standardize` when processing the ``dataId`` 

1495 argument (and may be used to provide a constraining data ID even 

1496 when the ``dataId`` argument is `None`). 

1497 

1498 Yields 

1499 ------ 

1500 ref : `DatasetRef` 

1501 Dataset references matching the given query criteria. These 

1502 are grouped by `DatasetType` if the query evaluates to multiple 

1503 dataset types, but order is otherwise unspecified. 

1504 

1505 Raises 

1506 ------ 

1507 TypeError 

1508 Raised when the arguments are incompatible, such as when a 

1509 collection wildcard is passed when ``deduplicate`` is `True`. 

1510 

1511 Notes 

1512 ----- 

1513 When multiple dataset types are queried in a single call, the 

1514 results of this operation are equivalent to querying for each dataset 

1515 type separately in turn, and no information about the relationships 

1516 between datasets of different types is included. In contexts where 

1517 that kind of information is important, the recommended pattern is to 

1518 use `queryDimensions` to first obtain data IDs (possibly with the 

1519 desired dataset types and collections passed as constraints to the 

1520 query), and then use multiple (generally much simpler) calls to 

1521 `queryDatasets` with the returned data IDs passed as constraints. 

1522 """ 

1523 # Standardize the collections expression. 

1524 if deduplicate: 

1525 collections = CollectionSearch.fromExpression(collections) 

1526 else: 

1527 collections = CollectionQuery.fromExpression(collections) 

1528 # Standardize and expand the data ID provided as a constraint. 

1529 standardizedDataId = self.expandDataId(dataId, **kwds) 

1530 # If the datasetType passed isn't actually a DatasetType, expand it 

1531 # (it could be an expression that yields multiple DatasetTypes) and 

1532 # recurse. 

1533 if not isinstance(datasetType, DatasetType): 

1534 for trueDatasetType in self.queryDatasetTypes(datasetType): 

1535 yield from self.queryDatasets(trueDatasetType, collections=collections, 

1536 dimensions=dimensions, dataId=standardizedDataId, 

1537 where=where, deduplicate=deduplicate) 

1538 return 

1539 # The full set of dimensions in the query is the combination of those 

1540 # needed for the DatasetType and those explicitly requested, if any. 

1541 requestedDimensionNames = set(datasetType.dimensions.names) 

1542 if dimensions is not None: 

1543 requestedDimensionNames.update(self.dimensions.extract(dimensions).names) 

1544 # Construct the summary structure needed to construct a QueryBuilder. 

1545 summary = QuerySummary( 

1546 requested=DimensionGraph(self.dimensions, names=requestedDimensionNames), 

1547 dataId=standardizedDataId, 

1548 expression=where, 

1549 ) 

1550 builder = self.makeQueryBuilder(summary) 

1551 # Add the dataset subquery to the query, telling the QueryBuilder to 

1552 # include the rank of the selected collection in the results only if we 

1553 # need to deduplicate. Note that if any of the collections are 

1554 # actually wildcard expressions, and we've asked for deduplication, 

1555 # this will raise TypeError for us. 

1556 if not builder.joinDataset(datasetType, collections, isResult=True, addRank=deduplicate): 

1557 return 

1558 query = builder.finish() 

1559 predicate = query.predicate() 

1560 if not deduplicate: 

1561 # No need to de-duplicate across collections. 

1562 for row in self._db.query(query.sql): 

1563 if predicate(row): 

1564 dataId = query.extractDataId(row, graph=datasetType.dimensions) 

1565 if expand: 

1566 dataId = self.expandDataId(dataId, records=standardizedDataId.records) 

1567 yield query.extractDatasetRef(row, datasetType, dataId)[0] 

1568 else: 

1569 # For each data ID, yield only the DatasetRef with the lowest 

1570 # collection rank. 

1571 bestRefs = {} 

1572 bestRanks = {} 

1573 for row in self._db.query(query.sql): 

1574 if predicate(row): 

1575 ref, rank = query.extractDatasetRef(row, datasetType) 

1576 bestRank = bestRanks.get(ref.dataId, sys.maxsize) 

1577 if rank < bestRank: 

1578 bestRefs[ref.dataId] = ref 

1579 bestRanks[ref.dataId] = rank 

1580 # If caller requested expanded data IDs, we defer that until here 

1581 # so we do as little expansion as possible. 

1582 if expand: 

1583 for ref in bestRefs.values(): 

1584 dataId = self.expandDataId(ref.dataId, records=standardizedDataId.records) 

1585 yield ref.expanded(dataId) 

1586 else: 

1587 yield from bestRefs.values() 

1588 

1589 dimensions: DimensionUniverse 

1590 """The universe of all dimensions known to the registry 

1591 (`DimensionUniverse`). 

1592 """ 

1593 

1594 storageClasses: StorageClassFactory 

1595 """All storage classes known to the registry (`StorageClassFactory`). 

1596 """