Coverage for python/lsst/daf/butler/registry/sql_registry.py: 18%

570 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-10-27 09:44 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30from .. import ddl 

31 

32__all__ = ("SqlRegistry",) 

33 

34import contextlib 

35import logging 

36import warnings 

37from collections.abc import Iterable, Iterator, Mapping, Sequence 

38from typing import TYPE_CHECKING, Any, Literal, cast 

39 

40import sqlalchemy 

41from lsst.daf.relation import LeafRelation, Relation 

42from lsst.resources import ResourcePathExpression 

43from lsst.utils.introspection import find_outside_stacklevel 

44from lsst.utils.iteration import ensure_iterable 

45 

46from .._column_tags import DatasetColumnTag 

47from .._config import Config 

48from .._dataset_association import DatasetAssociation 

49from .._dataset_ref import DatasetId, DatasetIdGenEnum, DatasetRef 

50from .._dataset_type import DatasetType 

51from .._named import NamedKeyMapping, NameLookupMapping 

52from .._storage_class import StorageClassFactory 

53from .._timespan import Timespan 

54from ..dimensions import ( 

55 DataCoordinate, 

56 DataId, 

57 Dimension, 

58 DimensionConfig, 

59 DimensionElement, 

60 DimensionGraph, 

61 DimensionRecord, 

62 DimensionUniverse, 

63) 

64from ..progress import Progress 

65from ..registry import ( 

66 ArgumentError, 

67 CollectionExpressionError, 

68 CollectionSummary, 

69 CollectionType, 

70 CollectionTypeError, 

71 ConflictingDefinitionError, 

72 DataIdValueError, 

73 DatasetTypeError, 

74 DimensionNameError, 

75 InconsistentDataIdError, 

76 NoDefaultCollectionError, 

77 OrphanedRecordError, 

78 RegistryConfig, 

79 RegistryConsistencyError, 

80 RegistryDefaults, 

81 queries, 

82) 

83from ..registry.interfaces import ChainedCollectionRecord, ReadOnlyDatabaseError, RunRecord 

84from ..registry.managers import RegistryManagerInstances, RegistryManagerTypes 

85from ..registry.wildcards import CollectionWildcard, DatasetTypeWildcard 

86from ..utils import transactional 

87 

88if TYPE_CHECKING: 

89 from .._butler_config import ButlerConfig 

90 from ..datastore._datastore import DatastoreOpaqueTable 

91 from ..datastore.stored_file_info import StoredDatastoreItemInfo 

92 from ..registry._registry import CollectionArgType 

93 from ..registry.interfaces import ( 

94 CollectionRecord, 

95 Database, 

96 DatastoreRegistryBridgeManager, 

97 ObsCoreTableManager, 

98 ) 

99 

100 

101_LOG = logging.getLogger(__name__) 

102 

103 

104class SqlRegistry: 

105 """Butler Registry implementation that uses SQL database as backend. 

106 

107 Parameters 

108 ---------- 

109 database : `Database` 

110 Database instance to store Registry. 

111 defaults : `RegistryDefaults` 

112 Default collection search path and/or output `~CollectionType.RUN` 

113 collection. 

114 managers : `RegistryManagerInstances` 

115 All the managers required for this registry. 

116 """ 

117 

118 defaultConfigFile: str | None = None 

119 """Path to configuration defaults. Accessed within the ``configs`` resource 

120 or relative to a search path. Can be None if no defaults specified. 

121 """ 

122 

123 @classmethod 

124 def forceRegistryConfig( 

125 cls, config: ButlerConfig | RegistryConfig | Config | str | None 

126 ) -> RegistryConfig: 

127 """Force the supplied config to a `RegistryConfig`. 

128 

129 Parameters 

130 ---------- 

131 config : `RegistryConfig`, `Config` or `str` or `None` 

132 Registry configuration, if missing then default configuration will 

133 be loaded from registry.yaml. 

134 

135 Returns 

136 ------- 

137 registry_config : `RegistryConfig` 

138 A registry config. 

139 """ 

140 if not isinstance(config, RegistryConfig): 

141 if isinstance(config, str | Config) or config is None: 

142 config = RegistryConfig(config) 

143 else: 

144 raise ValueError(f"Incompatible Registry configuration: {config}") 

145 return config 

146 

147 @classmethod 

148 def createFromConfig( 

149 cls, 

150 config: RegistryConfig | str | None = None, 

151 dimensionConfig: DimensionConfig | str | None = None, 

152 butlerRoot: ResourcePathExpression | None = None, 

153 ) -> SqlRegistry: 

154 """Create registry database and return `SqlRegistry` instance. 

155 

156 This method initializes database contents, database must be empty 

157 prior to calling this method. 

158 

159 Parameters 

160 ---------- 

161 config : `RegistryConfig` or `str`, optional 

162 Registry configuration, if missing then default configuration will 

163 be loaded from registry.yaml. 

164 dimensionConfig : `DimensionConfig` or `str`, optional 

165 Dimensions configuration, if missing then default configuration 

166 will be loaded from dimensions.yaml. 

167 butlerRoot : convertible to `lsst.resources.ResourcePath`, optional 

168 Path to the repository root this `SqlRegistry` will manage. 

169 

170 Returns 

171 ------- 

172 registry : `SqlRegistry` 

173 A new `SqlRegistry` instance. 

174 """ 

175 config = cls.forceRegistryConfig(config) 

176 config.replaceRoot(butlerRoot) 

177 

178 if isinstance(dimensionConfig, str): 

179 dimensionConfig = DimensionConfig(dimensionConfig) 

180 elif dimensionConfig is None: 

181 dimensionConfig = DimensionConfig() 

182 elif not isinstance(dimensionConfig, DimensionConfig): 

183 raise TypeError(f"Incompatible Dimension configuration type: {type(dimensionConfig)}") 

184 

185 DatabaseClass = config.getDatabaseClass() 

186 database = DatabaseClass.fromUri( 

187 config.connectionString, origin=config.get("origin", 0), namespace=config.get("namespace") 

188 ) 

189 managerTypes = RegistryManagerTypes.fromConfig(config) 

190 managers = managerTypes.makeRepo(database, dimensionConfig) 

191 return cls(database, RegistryDefaults(), managers) 

192 

193 @classmethod 

194 def fromConfig( 

195 cls, 

196 config: ButlerConfig | RegistryConfig | Config | str, 

197 butlerRoot: ResourcePathExpression | None = None, 

198 writeable: bool = True, 

199 defaults: RegistryDefaults | None = None, 

200 ) -> SqlRegistry: 

201 """Create `Registry` subclass instance from `config`. 

202 

203 Registry database must be initialized prior to calling this method. 

204 

205 Parameters 

206 ---------- 

207 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str` 

208 Registry configuration 

209 butlerRoot : `lsst.resources.ResourcePathExpression`, optional 

210 Path to the repository root this `Registry` will manage. 

211 writeable : `bool`, optional 

212 If `True` (default) create a read-write connection to the database. 

213 defaults : `RegistryDefaults`, optional 

214 Default collection search path and/or output `~CollectionType.RUN` 

215 collection. 

216 

217 Returns 

218 ------- 

219 registry : `SqlRegistry` 

220 A new `SqlRegistry` subclass instance. 

221 """ 

222 config = cls.forceRegistryConfig(config) 

223 config.replaceRoot(butlerRoot) 

224 DatabaseClass = config.getDatabaseClass() 

225 database = DatabaseClass.fromUri( 

226 config.connectionString, 

227 origin=config.get("origin", 0), 

228 namespace=config.get("namespace"), 

229 writeable=writeable, 

230 ) 

231 managerTypes = RegistryManagerTypes.fromConfig(config) 

232 with database.session(): 

233 managers = managerTypes.loadRepo(database) 

234 if defaults is None: 

235 defaults = RegistryDefaults() 

236 return cls(database, defaults, managers) 

237 

238 def __init__(self, database: Database, defaults: RegistryDefaults, managers: RegistryManagerInstances): 

239 self._db = database 

240 self._managers = managers 

241 self.storageClasses = StorageClassFactory() 

242 # Intentionally invoke property setter to initialize defaults. This 

243 # can only be done after most of the rest of Registry has already been 

244 # initialized, and must be done before the property getter is used. 

245 self.defaults = defaults 

246 

247 # TODO: This is currently initialized by `make_datastore_tables`, 

248 # eventually we'll need to do it during construction. 

249 # The mapping is indexed by the opaque table name. 

250 self._datastore_record_classes: Mapping[str, type[StoredDatastoreItemInfo]] = {} 

251 

252 def __str__(self) -> str: 

253 return str(self._db) 

254 

255 def __repr__(self) -> str: 

256 return f"SqlRegistry({self._db!r}, {self.dimensions!r})" 

257 

258 def isWriteable(self) -> bool: 

259 """Return `True` if this registry allows write operations, and `False` 

260 otherwise. 

261 """ 

262 return self._db.isWriteable() 

263 

264 def copy(self, defaults: RegistryDefaults | None = None) -> SqlRegistry: 

265 """Create a new `SqlRegistry` backed by the same data repository 

266 and connection as this one, but independent defaults. 

267 

268 Parameters 

269 ---------- 

270 defaults : `~lsst.daf.butler.registry.RegistryDefaults`, optional 

271 Default collections and data ID values for the new registry. If 

272 not provided, ``self.defaults`` will be used (but future changes 

273 to either registry's defaults will not affect the other). 

274 

275 Returns 

276 ------- 

277 copy : `SqlRegistry` 

278 A new `SqlRegistry` instance with its own defaults. 

279 

280 Notes 

281 ----- 

282 Because the new registry shares a connection with the original, they 

283 also share transaction state (despite the fact that their `transaction` 

284 context manager methods do not reflect this), and must be used with 

285 care. 

286 """ 

287 if defaults is None: 

288 # No need to copy, because `RegistryDefaults` is immutable; we 

289 # effectively copy on write. 

290 defaults = self.defaults 

291 return type(self)(self._db, defaults, self._managers) 

292 

293 @property 

294 def dimensions(self) -> DimensionUniverse: 

295 """Definitions of all dimensions recognized by this `Registry` 

296 (`DimensionUniverse`). 

297 """ 

298 return self._managers.dimensions.universe 

299 

300 @property 

301 def defaults(self) -> RegistryDefaults: 

302 """Default collection search path and/or output `~CollectionType.RUN` 

303 collection (`~lsst.daf.butler.registry.RegistryDefaults`). 

304 

305 This is an immutable struct whose components may not be set 

306 individually, but the entire struct can be set by assigning to this 

307 property. 

308 """ 

309 return self._defaults 

310 

311 @defaults.setter 

312 def defaults(self, value: RegistryDefaults) -> None: 

313 if value.run is not None: 

314 self.registerRun(value.run) 

315 value.finish(self) 

316 self._defaults = value 

317 

318 def refresh(self) -> None: 

319 """Refresh all in-memory state by querying the database. 

320 

321 This may be necessary to enable querying for entities added by other 

322 registry instances after this one was constructed. 

323 """ 

324 with self._db.transaction(): 

325 self._managers.refresh() 

326 

327 @contextlib.contextmanager 

328 def transaction(self, *, savepoint: bool = False) -> Iterator[None]: 

329 """Return a context manager that represents a transaction.""" 

330 try: 

331 with self._db.transaction(savepoint=savepoint): 

332 yield 

333 except BaseException: 

334 # TODO: this clears the caches sometimes when we wouldn't actually 

335 # need to. Can we avoid that? 

336 self._managers.dimensions.clearCaches() 

337 raise 

338 

339 def resetConnectionPool(self) -> None: 

340 """Reset SQLAlchemy connection pool for `SqlRegistry` database. 

341 

342 This operation is useful when using registry with fork-based 

343 multiprocessing. To use registry across fork boundary one has to make 

344 sure that there are no currently active connections (no session or 

345 transaction is in progress) and connection pool is reset using this 

346 method. This method should be called by the child process immediately 

347 after the fork. 

348 """ 

349 self._db._engine.dispose() 

350 

351 def registerOpaqueTable(self, tableName: str, spec: ddl.TableSpec) -> None: 

352 """Add an opaque (to the `Registry`) table for use by a `Datastore` or 

353 other data repository client. 

354 

355 Opaque table records can be added via `insertOpaqueData`, retrieved via 

356 `fetchOpaqueData`, and removed via `deleteOpaqueData`. 

357 

358 Parameters 

359 ---------- 

360 tableName : `str` 

361 Logical name of the opaque table. This may differ from the 

362 actual name used in the database by a prefix and/or suffix. 

363 spec : `ddl.TableSpec` 

364 Specification for the table to be added. 

365 """ 

366 self._managers.opaque.register(tableName, spec) 

367 

368 @transactional 

369 def insertOpaqueData(self, tableName: str, *data: dict) -> None: 

370 """Insert records into an opaque table. 

371 

372 Parameters 

373 ---------- 

374 tableName : `str` 

375 Logical name of the opaque table. Must match the name used in a 

376 previous call to `registerOpaqueTable`. 

377 data 

378 Each additional positional argument is a dictionary that represents 

379 a single row to be added. 

380 """ 

381 self._managers.opaque[tableName].insert(*data) 

382 

383 def fetchOpaqueData(self, tableName: str, **where: Any) -> Iterator[Mapping[str, Any]]: 

384 """Retrieve records from an opaque table. 

385 

386 Parameters 

387 ---------- 

388 tableName : `str` 

389 Logical name of the opaque table. Must match the name used in a 

390 previous call to `registerOpaqueTable`. 

391 where 

392 Additional keyword arguments are interpreted as equality 

393 constraints that restrict the returned rows (combined with AND); 

394 keyword arguments are column names and values are the values they 

395 must have. 

396 

397 Yields 

398 ------ 

399 row : `dict` 

400 A dictionary representing a single result row. 

401 """ 

402 yield from self._managers.opaque[tableName].fetch(**where) 

403 

404 @transactional 

405 def deleteOpaqueData(self, tableName: str, **where: Any) -> None: 

406 """Remove records from an opaque table. 

407 

408 Parameters 

409 ---------- 

410 tableName : `str` 

411 Logical name of the opaque table. Must match the name used in a 

412 previous call to `registerOpaqueTable`. 

413 where 

414 Additional keyword arguments are interpreted as equality 

415 constraints that restrict the deleted rows (combined with AND); 

416 keyword arguments are column names and values are the values they 

417 must have. 

418 """ 

419 self._managers.opaque[tableName].delete(where.keys(), where) 

420 

421 def registerCollection( 

422 self, name: str, type: CollectionType = CollectionType.TAGGED, doc: str | None = None 

423 ) -> bool: 

424 """Add a new collection if one with the given name does not exist. 

425 

426 Parameters 

427 ---------- 

428 name : `str` 

429 The name of the collection to create. 

430 type : `CollectionType` 

431 Enum value indicating the type of collection to create. 

432 doc : `str`, optional 

433 Documentation string for the collection. 

434 

435 Returns 

436 ------- 

437 registered : `bool` 

438 Boolean indicating whether the collection was already registered 

439 or was created by this call. 

440 

441 Notes 

442 ----- 

443 This method cannot be called within transactions, as it needs to be 

444 able to perform its own transaction to be concurrent. 

445 """ 

446 _, registered = self._managers.collections.register(name, type, doc=doc) 

447 return registered 

448 

449 def getCollectionType(self, name: str) -> CollectionType: 

450 """Return an enumeration value indicating the type of the given 

451 collection. 

452 

453 Parameters 

454 ---------- 

455 name : `str` 

456 The name of the collection. 

457 

458 Returns 

459 ------- 

460 type : `CollectionType` 

461 Enum value indicating the type of this collection. 

462 

463 Raises 

464 ------ 

465 lsst.daf.butler.registry.MissingCollectionError 

466 Raised if no collection with the given name exists. 

467 """ 

468 return self._managers.collections.find(name).type 

469 

470 def _get_collection_record(self, name: str) -> CollectionRecord: 

471 """Return the record for this collection. 

472 

473 Parameters 

474 ---------- 

475 name : `str` 

476 Name of the collection for which the record is to be retrieved. 

477 

478 Returns 

479 ------- 

480 record : `CollectionRecord` 

481 The record for this collection. 

482 """ 

483 return self._managers.collections.find(name) 

484 

485 def registerRun(self, name: str, doc: str | None = None) -> bool: 

486 """Add a new run if one with the given name does not exist. 

487 

488 Parameters 

489 ---------- 

490 name : `str` 

491 The name of the run to create. 

492 doc : `str`, optional 

493 Documentation string for the collection. 

494 

495 Returns 

496 ------- 

497 registered : `bool` 

498 Boolean indicating whether a new run was registered. `False` 

499 if it already existed. 

500 

501 Notes 

502 ----- 

503 This method cannot be called within transactions, as it needs to be 

504 able to perform its own transaction to be concurrent. 

505 """ 

506 _, registered = self._managers.collections.register(name, CollectionType.RUN, doc=doc) 

507 return registered 

508 

509 @transactional 

510 def removeCollection(self, name: str) -> None: 

511 """Remove the given collection from the registry. 

512 

513 Parameters 

514 ---------- 

515 name : `str` 

516 The name of the collection to remove. 

517 

518 Raises 

519 ------ 

520 lsst.daf.butler.registry.MissingCollectionError 

521 Raised if no collection with the given name exists. 

522 sqlalchemy.exc.IntegrityError 

523 Raised if the database rows associated with the collection are 

524 still referenced by some other table, such as a dataset in a 

525 datastore (for `~CollectionType.RUN` collections only) or a 

526 `~CollectionType.CHAINED` collection of which this collection is 

527 a child. 

528 

529 Notes 

530 ----- 

531 If this is a `~CollectionType.RUN` collection, all datasets and quanta 

532 in it will removed from the `Registry` database. This requires that 

533 those datasets be removed (or at least trashed) from any datastores 

534 that hold them first. 

535 

536 A collection may not be deleted as long as it is referenced by a 

537 `~CollectionType.CHAINED` collection; the ``CHAINED`` collection must 

538 be deleted or redefined first. 

539 """ 

540 self._managers.collections.remove(name) 

541 

542 def getCollectionChain(self, parent: str) -> tuple[str, ...]: 

543 """Return the child collections in a `~CollectionType.CHAINED` 

544 collection. 

545 

546 Parameters 

547 ---------- 

548 parent : `str` 

549 Name of the chained collection. Must have already been added via 

550 a call to `Registry.registerCollection`. 

551 

552 Returns 

553 ------- 

554 children : `~collections.abc.Sequence` [ `str` ] 

555 An ordered sequence of collection names that are searched when the 

556 given chained collection is searched. 

557 

558 Raises 

559 ------ 

560 lsst.daf.butler.registry.MissingCollectionError 

561 Raised if ``parent`` does not exist in the `Registry`. 

562 lsst.daf.butler.registry.CollectionTypeError 

563 Raised if ``parent`` does not correspond to a 

564 `~CollectionType.CHAINED` collection. 

565 """ 

566 record = self._managers.collections.find(parent) 

567 if record.type is not CollectionType.CHAINED: 

568 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

569 assert isinstance(record, ChainedCollectionRecord) 

570 return record.children 

571 

572 @transactional 

573 def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None: 

574 """Define or redefine a `~CollectionType.CHAINED` collection. 

575 

576 Parameters 

577 ---------- 

578 parent : `str` 

579 Name of the chained collection. Must have already been added via 

580 a call to `Registry.registerCollection`. 

581 children : collection expression 

582 An expression defining an ordered search of child collections, 

583 generally an iterable of `str`; see 

584 :ref:`daf_butler_collection_expressions` for more information. 

585 flatten : `bool`, optional 

586 If `True` (`False` is default), recursively flatten out any nested 

587 `~CollectionType.CHAINED` collections in ``children`` first. 

588 

589 Raises 

590 ------ 

591 lsst.daf.butler.registry.MissingCollectionError 

592 Raised when any of the given collections do not exist in the 

593 `Registry`. 

594 lsst.daf.butler.registry.CollectionTypeError 

595 Raised if ``parent`` does not correspond to a 

596 `~CollectionType.CHAINED` collection. 

597 ValueError 

598 Raised if the given collections contains a cycle. 

599 """ 

600 record = self._managers.collections.find(parent) 

601 if record.type is not CollectionType.CHAINED: 

602 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

603 assert isinstance(record, ChainedCollectionRecord) 

604 children = CollectionWildcard.from_expression(children).require_ordered() 

605 if children != record.children or flatten: 

606 record.update(self._managers.collections, children, flatten=flatten) 

607 

608 def getCollectionParentChains(self, collection: str) -> set[str]: 

609 """Return the CHAINED collections that directly contain the given one. 

610 

611 Parameters 

612 ---------- 

613 name : `str` 

614 Name of the collection. 

615 

616 Returns 

617 ------- 

618 chains : `set` of `str` 

619 Set of `~CollectionType.CHAINED` collection names. 

620 """ 

621 return { 

622 record.name 

623 for record in self._managers.collections.getParentChains( 

624 self._managers.collections.find(collection).key 

625 ) 

626 } 

627 

628 def getCollectionDocumentation(self, collection: str) -> str | None: 

629 """Retrieve the documentation string for a collection. 

630 

631 Parameters 

632 ---------- 

633 name : `str` 

634 Name of the collection. 

635 

636 Returns 

637 ------- 

638 docs : `str` or `None` 

639 Docstring for the collection with the given name. 

640 """ 

641 return self._managers.collections.getDocumentation(self._managers.collections.find(collection).key) 

642 

643 def setCollectionDocumentation(self, collection: str, doc: str | None) -> None: 

644 """Set the documentation string for a collection. 

645 

646 Parameters 

647 ---------- 

648 name : `str` 

649 Name of the collection. 

650 docs : `str` or `None` 

651 Docstring for the collection with the given name; will replace any 

652 existing docstring. Passing `None` will remove any existing 

653 docstring. 

654 """ 

655 self._managers.collections.setDocumentation(self._managers.collections.find(collection).key, doc) 

656 

657 def getCollectionSummary(self, collection: str) -> CollectionSummary: 

658 """Return a summary for the given collection. 

659 

660 Parameters 

661 ---------- 

662 collection : `str` 

663 Name of the collection for which a summary is to be retrieved. 

664 

665 Returns 

666 ------- 

667 summary : `~lsst.daf.butler.registry.CollectionSummary` 

668 Summary of the dataset types and governor dimension values in 

669 this collection. 

670 """ 

671 record = self._managers.collections.find(collection) 

672 return self._managers.datasets.getCollectionSummary(record) 

673 

674 def registerDatasetType(self, datasetType: DatasetType) -> bool: 

675 """Add a new `DatasetType` to the Registry. 

676 

677 It is not an error to register the same `DatasetType` twice. 

678 

679 Parameters 

680 ---------- 

681 datasetType : `DatasetType` 

682 The `DatasetType` to be added. 

683 

684 Returns 

685 ------- 

686 inserted : `bool` 

687 `True` if ``datasetType`` was inserted, `False` if an identical 

688 existing `DatasetType` was found. Note that in either case the 

689 DatasetType is guaranteed to be defined in the Registry 

690 consistently with the given definition. 

691 

692 Raises 

693 ------ 

694 ValueError 

695 Raised if the dimensions or storage class are invalid. 

696 lsst.daf.butler.registry.ConflictingDefinitionError 

697 Raised if this `DatasetType` is already registered with a different 

698 definition. 

699 

700 Notes 

701 ----- 

702 This method cannot be called within transactions, as it needs to be 

703 able to perform its own transaction to be concurrent. 

704 """ 

705 _, inserted = self._managers.datasets.register(datasetType) 

706 return inserted 

707 

708 def removeDatasetType(self, name: str | tuple[str, ...]) -> None: 

709 """Remove the named `DatasetType` from the registry. 

710 

711 .. warning:: 

712 

713 Registry implementations can cache the dataset type definitions. 

714 This means that deleting the dataset type definition may result in 

715 unexpected behavior from other butler processes that are active 

716 that have not seen the deletion. 

717 

718 Parameters 

719 ---------- 

720 name : `str` or `tuple` [`str`] 

721 Name of the type to be removed or tuple containing a list of type 

722 names to be removed. Wildcards are allowed. 

723 

724 Raises 

725 ------ 

726 lsst.daf.butler.registry.OrphanedRecordError 

727 Raised if an attempt is made to remove the dataset type definition 

728 when there are already datasets associated with it. 

729 

730 Notes 

731 ----- 

732 If the dataset type is not registered the method will return without 

733 action. 

734 """ 

735 for datasetTypeExpression in ensure_iterable(name): 

736 # Catch any warnings from the caller specifying a component 

737 # dataset type. This will result in an error later but the 

738 # warning could be confusing when the caller is not querying 

739 # anything. 

740 with warnings.catch_warnings(): 

741 warnings.simplefilter("ignore", category=FutureWarning) 

742 datasetTypes = list(self.queryDatasetTypes(datasetTypeExpression)) 

743 if not datasetTypes: 

744 _LOG.info("Dataset type %r not defined", datasetTypeExpression) 

745 else: 

746 for datasetType in datasetTypes: 

747 self._managers.datasets.remove(datasetType.name) 

748 _LOG.info("Removed dataset type %r", datasetType.name) 

749 

750 def getDatasetType(self, name: str) -> DatasetType: 

751 """Get the `DatasetType`. 

752 

753 Parameters 

754 ---------- 

755 name : `str` 

756 Name of the type. 

757 

758 Returns 

759 ------- 

760 type : `DatasetType` 

761 The `DatasetType` associated with the given name. 

762 

763 Raises 

764 ------ 

765 lsst.daf.butler.registry.MissingDatasetTypeError 

766 Raised if the requested dataset type has not been registered. 

767 

768 Notes 

769 ----- 

770 This method handles component dataset types automatically, though most 

771 other registry operations do not. 

772 """ 

773 parent_name, component = DatasetType.splitDatasetTypeName(name) 

774 storage = self._managers.datasets[parent_name] 

775 if component is None: 

776 return storage.datasetType 

777 else: 

778 return storage.datasetType.makeComponentDatasetType(component) 

779 

780 def supportsIdGenerationMode(self, mode: DatasetIdGenEnum) -> bool: 

781 """Test whether the given dataset ID generation mode is supported by 

782 `insertDatasets`. 

783 

784 Parameters 

785 ---------- 

786 mode : `DatasetIdGenEnum` 

787 Enum value for the mode to test. 

788 

789 Returns 

790 ------- 

791 supported : `bool` 

792 Whether the given mode is supported. 

793 """ 

794 return self._managers.datasets.supportsIdGenerationMode(mode) 

795 

796 def findDataset( 

797 self, 

798 datasetType: DatasetType | str, 

799 dataId: DataId | None = None, 

800 *, 

801 collections: CollectionArgType | None = None, 

802 timespan: Timespan | None = None, 

803 datastore_records: bool = False, 

804 **kwargs: Any, 

805 ) -> DatasetRef | None: 

806 """Find a dataset given its `DatasetType` and data ID. 

807 

808 This can be used to obtain a `DatasetRef` that permits the dataset to 

809 be read from a `Datastore`. If the dataset is a component and can not 

810 be found using the provided dataset type, a dataset ref for the parent 

811 will be returned instead but with the correct dataset type. 

812 

813 Parameters 

814 ---------- 

815 datasetType : `DatasetType` or `str` 

816 A `DatasetType` or the name of one. If this is a `DatasetType` 

817 instance, its storage class will be respected and propagated to 

818 the output, even if it differs from the dataset type definition 

819 in the registry, as long as the storage classes are convertible. 

820 dataId : `dict` or `DataCoordinate`, optional 

821 A `dict`-like object containing the `Dimension` links that identify 

822 the dataset within a collection. 

823 collections : collection expression, optional 

824 An expression that fully or partially identifies the collections to 

825 search for the dataset; see 

826 :ref:`daf_butler_collection_expressions` for more information. 

827 Defaults to ``self.defaults.collections``. 

828 timespan : `Timespan`, optional 

829 A timespan that the validity range of the dataset must overlap. 

830 If not provided, any `~CollectionType.CALIBRATION` collections 

831 matched by the ``collections`` argument will not be searched. 

832 **kwargs 

833 Additional keyword arguments passed to 

834 `DataCoordinate.standardize` to convert ``dataId`` to a true 

835 `DataCoordinate` or augment an existing one. 

836 

837 Returns 

838 ------- 

839 ref : `DatasetRef` 

840 A reference to the dataset, or `None` if no matching Dataset 

841 was found. 

842 

843 Raises 

844 ------ 

845 lsst.daf.butler.registry.NoDefaultCollectionError 

846 Raised if ``collections`` is `None` and 

847 ``self.defaults.collections`` is `None`. 

848 LookupError 

849 Raised if one or more data ID keys are missing. 

850 lsst.daf.butler.registry.MissingDatasetTypeError 

851 Raised if the dataset type does not exist. 

852 lsst.daf.butler.registry.MissingCollectionError 

853 Raised if any of ``collections`` does not exist in the registry. 

854 

855 Notes 

856 ----- 

857 This method simply returns `None` and does not raise an exception even 

858 when the set of collections searched is intrinsically incompatible with 

859 the dataset type, e.g. if ``datasetType.isCalibration() is False``, but 

860 only `~CollectionType.CALIBRATION` collections are being searched. 

861 This may make it harder to debug some lookup failures, but the behavior 

862 is intentional; we consider it more important that failed searches are 

863 reported consistently, regardless of the reason, and that adding 

864 additional collections that do not contain a match to the search path 

865 never changes the behavior. 

866 

867 This method handles component dataset types automatically, though most 

868 other registry operations do not. 

869 """ 

870 if collections is None: 

871 if not self.defaults.collections: 

872 raise NoDefaultCollectionError( 

873 "No collections provided to findDataset, and no defaults from registry construction." 

874 ) 

875 collections = self.defaults.collections 

876 backend = queries.SqlQueryBackend(self._db, self._managers) 

877 collection_wildcard = CollectionWildcard.from_expression(collections, require_ordered=True) 

878 if collection_wildcard.empty(): 

879 return None 

880 matched_collections = backend.resolve_collection_wildcard(collection_wildcard) 

881 parent_dataset_type, components = backend.resolve_single_dataset_type_wildcard( 

882 datasetType, components_deprecated=False 

883 ) 

884 if len(components) > 1: 

885 raise DatasetTypeError( 

886 f"findDataset requires exactly one dataset type; got multiple components {components} " 

887 f"for parent dataset type {parent_dataset_type.name}." 

888 ) 

889 component = components[0] 

890 dataId = DataCoordinate.standardize( 

891 dataId, 

892 graph=parent_dataset_type.dimensions, 

893 universe=self.dimensions, 

894 defaults=self.defaults.dataId, 

895 **kwargs, 

896 ) 

897 governor_constraints = {name: {cast(str, dataId[name])} for name in dataId.graph.governors.names} 

898 (filtered_collections,) = backend.filter_dataset_collections( 

899 [parent_dataset_type], 

900 matched_collections, 

901 governor_constraints=governor_constraints, 

902 ).values() 

903 if not filtered_collections: 

904 return None 

905 if timespan is None: 

906 filtered_collections = [ 

907 collection_record 

908 for collection_record in filtered_collections 

909 if collection_record.type is not CollectionType.CALIBRATION 

910 ] 

911 if filtered_collections: 

912 requested_columns = {"dataset_id", "run", "collection"} 

913 with backend.context() as context: 

914 predicate = context.make_data_coordinate_predicate( 

915 dataId.subset(parent_dataset_type.dimensions), full=False 

916 ) 

917 if timespan is not None: 

918 requested_columns.add("timespan") 

919 predicate = predicate.logical_and( 

920 context.make_timespan_overlap_predicate( 

921 DatasetColumnTag(parent_dataset_type.name, "timespan"), timespan 

922 ) 

923 ) 

924 relation = backend.make_dataset_query_relation( 

925 parent_dataset_type, filtered_collections, requested_columns, context 

926 ).with_rows_satisfying(predicate) 

927 rows = list(context.fetch_iterable(relation)) 

928 else: 

929 rows = [] 

930 if not rows: 

931 return None 

932 elif len(rows) == 1: 

933 best_row = rows[0] 

934 else: 

935 rank_by_collection_key = {record.key: n for n, record in enumerate(filtered_collections)} 

936 collection_tag = DatasetColumnTag(parent_dataset_type.name, "collection") 

937 row_iter = iter(rows) 

938 best_row = next(row_iter) 

939 best_rank = rank_by_collection_key[best_row[collection_tag]] 

940 have_tie = False 

941 for row in row_iter: 

942 if (rank := rank_by_collection_key[row[collection_tag]]) < best_rank: 

943 best_row = row 

944 best_rank = rank 

945 have_tie = False 

946 elif rank == best_rank: 

947 have_tie = True 

948 assert timespan is not None, "Rank ties should be impossible given DB constraints." 

949 if have_tie: 

950 raise LookupError( 

951 f"Ambiguous calibration lookup for {parent_dataset_type.name} in collections " 

952 f"{collection_wildcard.strings} with timespan {timespan}." 

953 ) 

954 reader = queries.DatasetRefReader( 

955 parent_dataset_type, 

956 translate_collection=lambda k: self._managers.collections[k].name, 

957 ) 

958 ref = reader.read(best_row, data_id=dataId) 

959 if component is not None: 

960 ref = ref.makeComponentRef(component) 

961 if datastore_records: 

962 ref = self.get_datastore_records(ref) 

963 

964 return ref 

965 

966 @transactional 

967 def insertDatasets( 

968 self, 

969 datasetType: DatasetType | str, 

970 dataIds: Iterable[DataId], 

971 run: str | None = None, 

972 expand: bool = True, 

973 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

974 ) -> list[DatasetRef]: 

975 """Insert one or more datasets into the `Registry`. 

976 

977 This always adds new datasets; to associate existing datasets with 

978 a new collection, use ``associate``. 

979 

980 Parameters 

981 ---------- 

982 datasetType : `DatasetType` or `str` 

983 A `DatasetType` or the name of one. 

984 dataIds : `~collections.abc.Iterable` of `dict` or `DataCoordinate` 

985 Dimension-based identifiers for the new datasets. 

986 run : `str`, optional 

987 The name of the run that produced the datasets. Defaults to 

988 ``self.defaults.run``. 

989 expand : `bool`, optional 

990 If `True` (default), expand data IDs as they are inserted. This is 

991 necessary in general to allow datastore to generate file templates, 

992 but it may be disabled if the caller can guarantee this is 

993 unnecessary. 

994 idGenerationMode : `DatasetIdGenEnum`, optional 

995 Specifies option for generating dataset IDs. By default unique IDs 

996 are generated for each inserted dataset. 

997 

998 Returns 

999 ------- 

1000 refs : `list` of `DatasetRef` 

1001 Resolved `DatasetRef` instances for all given data IDs (in the same 

1002 order). 

1003 

1004 Raises 

1005 ------ 

1006 lsst.daf.butler.registry.DatasetTypeError 

1007 Raised if ``datasetType`` is not known to registry. 

1008 lsst.daf.butler.registry.CollectionTypeError 

1009 Raised if ``run`` collection type is not `~CollectionType.RUN`. 

1010 lsst.daf.butler.registry.NoDefaultCollectionError 

1011 Raised if ``run`` is `None` and ``self.defaults.run`` is `None`. 

1012 lsst.daf.butler.registry.ConflictingDefinitionError 

1013 If a dataset with the same dataset type and data ID as one of those 

1014 given already exists in ``run``. 

1015 lsst.daf.butler.registry.MissingCollectionError 

1016 Raised if ``run`` does not exist in the registry. 

1017 """ 

1018 if isinstance(datasetType, DatasetType): 

1019 storage = self._managers.datasets.find(datasetType.name) 

1020 if storage is None: 

1021 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.") 

1022 else: 

1023 storage = self._managers.datasets.find(datasetType) 

1024 if storage is None: 

1025 raise DatasetTypeError(f"DatasetType with name '{datasetType}' has not been registered.") 

1026 if run is None: 

1027 if self.defaults.run is None: 

1028 raise NoDefaultCollectionError( 

1029 "No run provided to insertDatasets, and no default from registry construction." 

1030 ) 

1031 run = self.defaults.run 

1032 runRecord = self._managers.collections.find(run) 

1033 if runRecord.type is not CollectionType.RUN: 

1034 raise CollectionTypeError( 

1035 f"Given collection is of type {runRecord.type.name}; RUN collection required." 

1036 ) 

1037 assert isinstance(runRecord, RunRecord) 

1038 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG) 

1039 if expand: 

1040 expandedDataIds = [ 

1041 self.expandDataId(dataId, graph=storage.datasetType.dimensions) 

1042 for dataId in progress.wrap(dataIds, f"Expanding {storage.datasetType.name} data IDs") 

1043 ] 

1044 else: 

1045 expandedDataIds = [ 

1046 DataCoordinate.standardize(dataId, graph=storage.datasetType.dimensions) for dataId in dataIds 

1047 ] 

1048 try: 

1049 refs = list(storage.insert(runRecord, expandedDataIds, idGenerationMode)) 

1050 if self._managers.obscore: 

1051 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

1052 self._managers.obscore.add_datasets(refs, context) 

1053 except sqlalchemy.exc.IntegrityError as err: 

1054 raise ConflictingDefinitionError( 

1055 "A database constraint failure was triggered by inserting " 

1056 f"one or more datasets of type {storage.datasetType} into " 

1057 f"collection '{run}'. " 

1058 "This probably means a dataset with the same data ID " 

1059 "and dataset type already exists, but it may also mean a " 

1060 "dimension row is missing." 

1061 ) from err 

1062 return refs 

1063 

1064 @transactional 

1065 def _importDatasets( 

1066 self, 

1067 datasets: Iterable[DatasetRef], 

1068 expand: bool = True, 

1069 ) -> list[DatasetRef]: 

1070 """Import one or more datasets into the `Registry`. 

1071 

1072 Difference from `insertDatasets` method is that this method accepts 

1073 `DatasetRef` instances which should already be resolved and have a 

1074 dataset ID. If registry supports globally-unique dataset IDs (e.g. 

1075 `uuid.UUID`) then datasets which already exist in the registry will be 

1076 ignored if imported again. 

1077 

1078 Parameters 

1079 ---------- 

1080 datasets : `~collections.abc.Iterable` of `DatasetRef` 

1081 Datasets to be inserted. All `DatasetRef` instances must have 

1082 identical ``datasetType`` and ``run`` attributes. ``run`` 

1083 attribute can be `None` and defaults to ``self.defaults.run``. 

1084 Datasets can specify ``id`` attribute which will be used for 

1085 inserted datasets. All dataset IDs must have the same type 

1086 (`int` or `uuid.UUID`), if type of dataset IDs does not match 

1087 configured backend then IDs will be ignored and new IDs will be 

1088 generated by backend. 

1089 expand : `bool`, optional 

1090 If `True` (default), expand data IDs as they are inserted. This is 

1091 necessary in general, but it may be disabled if the caller can 

1092 guarantee this is unnecessary. 

1093 

1094 Returns 

1095 ------- 

1096 refs : `list` of `DatasetRef` 

1097 Resolved `DatasetRef` instances for all given data IDs (in the same 

1098 order). If any of ``datasets`` has an ID which already exists in 

1099 the database then it will not be inserted or updated, but a 

1100 resolved `DatasetRef` will be returned for it in any case. 

1101 

1102 Raises 

1103 ------ 

1104 lsst.daf.butler.registry.NoDefaultCollectionError 

1105 Raised if ``run`` is `None` and ``self.defaults.run`` is `None`. 

1106 lsst.daf.butler.registry.DatasetTypeError 

1107 Raised if datasets correspond to more than one dataset type or 

1108 dataset type is not known to registry. 

1109 lsst.daf.butler.registry.ConflictingDefinitionError 

1110 If a dataset with the same dataset type and data ID as one of those 

1111 given already exists in ``run``. 

1112 lsst.daf.butler.registry.MissingCollectionError 

1113 Raised if ``run`` does not exist in the registry. 

1114 

1115 Notes 

1116 ----- 

1117 This method is considered package-private and internal to Butler 

1118 implementation. Clients outside daf_butler package should not use this 

1119 method. 

1120 """ 

1121 datasets = list(datasets) 

1122 if not datasets: 

1123 # nothing to do 

1124 return [] 

1125 

1126 # find dataset type 

1127 datasetTypes = {dataset.datasetType for dataset in datasets} 

1128 if len(datasetTypes) != 1: 

1129 raise DatasetTypeError(f"Multiple dataset types in input datasets: {datasetTypes}") 

1130 datasetType = datasetTypes.pop() 

1131 

1132 # get storage handler for this dataset type 

1133 storage = self._managers.datasets.find(datasetType.name) 

1134 if storage is None: 

1135 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.") 

1136 

1137 # find run name 

1138 runs = {dataset.run for dataset in datasets} 

1139 if len(runs) != 1: 

1140 raise ValueError(f"Multiple run names in input datasets: {runs}") 

1141 run = runs.pop() 

1142 

1143 runRecord = self._managers.collections.find(run) 

1144 if runRecord.type is not CollectionType.RUN: 

1145 raise CollectionTypeError( 

1146 f"Given collection '{runRecord.name}' is of type {runRecord.type.name};" 

1147 " RUN collection required." 

1148 ) 

1149 assert isinstance(runRecord, RunRecord) 

1150 

1151 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG) 

1152 if expand: 

1153 expandedDatasets = [ 

1154 dataset.expanded(self.expandDataId(dataset.dataId, graph=storage.datasetType.dimensions)) 

1155 for dataset in progress.wrap(datasets, f"Expanding {storage.datasetType.name} data IDs") 

1156 ] 

1157 else: 

1158 expandedDatasets = [ 

1159 DatasetRef(datasetType, dataset.dataId, id=dataset.id, run=dataset.run, conform=True) 

1160 for dataset in datasets 

1161 ] 

1162 

1163 try: 

1164 refs = list(storage.import_(runRecord, expandedDatasets)) 

1165 if self._managers.obscore: 

1166 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

1167 self._managers.obscore.add_datasets(refs, context) 

1168 except sqlalchemy.exc.IntegrityError as err: 

1169 raise ConflictingDefinitionError( 

1170 "A database constraint failure was triggered by inserting " 

1171 f"one or more datasets of type {storage.datasetType} into " 

1172 f"collection '{run}'. " 

1173 "This probably means a dataset with the same data ID " 

1174 "and dataset type already exists, but it may also mean a " 

1175 "dimension row is missing." 

1176 ) from err 

1177 # Check that imported dataset IDs match the input 

1178 for imported_ref, input_ref in zip(refs, datasets, strict=True): 

1179 if imported_ref.id != input_ref.id: 

1180 raise RegistryConsistencyError( 

1181 "Imported dataset ID differs from input dataset ID, " 

1182 f"input ref: {input_ref}, imported ref: {imported_ref}" 

1183 ) 

1184 return refs 

1185 

1186 def getDataset(self, id: DatasetId) -> DatasetRef | None: 

1187 """Retrieve a Dataset entry. 

1188 

1189 Parameters 

1190 ---------- 

1191 id : `DatasetId` 

1192 The unique identifier for the dataset. 

1193 

1194 Returns 

1195 ------- 

1196 ref : `DatasetRef` or `None` 

1197 A ref to the Dataset, or `None` if no matching Dataset 

1198 was found. 

1199 """ 

1200 return self._managers.datasets.getDatasetRef(id) 

1201 

1202 @transactional 

1203 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None: 

1204 """Remove datasets from the Registry. 

1205 

1206 The datasets will be removed unconditionally from all collections, and 

1207 any `Quantum` that consumed this dataset will instead be marked with 

1208 having a NULL input. `Datastore` records will *not* be deleted; the 

1209 caller is responsible for ensuring that the dataset has already been 

1210 removed from all Datastores. 

1211 

1212 Parameters 

1213 ---------- 

1214 refs : `~collections.abc.Iterable` [`DatasetRef`] 

1215 References to the datasets to be removed. Must include a valid 

1216 ``id`` attribute, and should be considered invalidated upon return. 

1217 

1218 Raises 

1219 ------ 

1220 lsst.daf.butler.AmbiguousDatasetError 

1221 Raised if any ``ref.id`` is `None`. 

1222 lsst.daf.butler.registry.OrphanedRecordError 

1223 Raised if any dataset is still present in any `Datastore`. 

1224 """ 

1225 progress = Progress("lsst.daf.butler.Registry.removeDatasets", level=logging.DEBUG) 

1226 for datasetType, refsForType in progress.iter_item_chunks( 

1227 DatasetRef.iter_by_type(refs), desc="Removing datasets by type" 

1228 ): 

1229 storage = self._managers.datasets[datasetType.name] 

1230 try: 

1231 storage.delete(refsForType) 

1232 except sqlalchemy.exc.IntegrityError as err: 

1233 raise OrphanedRecordError( 

1234 "One or more datasets is still present in one or more Datastores." 

1235 ) from err 

1236 

1237 @transactional 

1238 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

1239 """Add existing datasets to a `~CollectionType.TAGGED` collection. 

1240 

1241 If a DatasetRef with the same exact ID is already in a collection 

1242 nothing is changed. If a `DatasetRef` with the same `DatasetType` and 

1243 data ID but with different ID exists in the collection, 

1244 `~lsst.daf.butler.registry.ConflictingDefinitionError` is raised. 

1245 

1246 Parameters 

1247 ---------- 

1248 collection : `str` 

1249 Indicates the collection the datasets should be associated with. 

1250 refs : `~collections.abc.Iterable` [ `DatasetRef` ] 

1251 An iterable of resolved `DatasetRef` instances that already exist 

1252 in this `Registry`. 

1253 

1254 Raises 

1255 ------ 

1256 lsst.daf.butler.registry.ConflictingDefinitionError 

1257 If a Dataset with the given `DatasetRef` already exists in the 

1258 given collection. 

1259 lsst.daf.butler.registry.MissingCollectionError 

1260 Raised if ``collection`` does not exist in the registry. 

1261 lsst.daf.butler.registry.CollectionTypeError 

1262 Raise adding new datasets to the given ``collection`` is not 

1263 allowed. 

1264 """ 

1265 progress = Progress("lsst.daf.butler.Registry.associate", level=logging.DEBUG) 

1266 collectionRecord = self._managers.collections.find(collection) 

1267 if collectionRecord.type is not CollectionType.TAGGED: 

1268 raise CollectionTypeError( 

1269 f"Collection '{collection}' has type {collectionRecord.type.name}, not TAGGED." 

1270 ) 

1271 for datasetType, refsForType in progress.iter_item_chunks( 

1272 DatasetRef.iter_by_type(refs), desc="Associating datasets by type" 

1273 ): 

1274 storage = self._managers.datasets[datasetType.name] 

1275 try: 

1276 storage.associate(collectionRecord, refsForType) 

1277 if self._managers.obscore: 

1278 # If a TAGGED collection is being monitored by ObsCore 

1279 # manager then we may need to save the dataset. 

1280 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

1281 self._managers.obscore.associate(refsForType, collectionRecord, context) 

1282 except sqlalchemy.exc.IntegrityError as err: 

1283 raise ConflictingDefinitionError( 

1284 f"Constraint violation while associating dataset of type {datasetType.name} with " 

1285 f"collection {collection}. This probably means that one or more datasets with the same " 

1286 "dataset type and data ID already exist in the collection, but it may also indicate " 

1287 "that the datasets do not exist." 

1288 ) from err 

1289 

1290 @transactional 

1291 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

1292 """Remove existing datasets from a `~CollectionType.TAGGED` collection. 

1293 

1294 ``collection`` and ``ref`` combinations that are not currently 

1295 associated are silently ignored. 

1296 

1297 Parameters 

1298 ---------- 

1299 collection : `str` 

1300 The collection the datasets should no longer be associated with. 

1301 refs : `~collections.abc.Iterable` [ `DatasetRef` ] 

1302 An iterable of resolved `DatasetRef` instances that already exist 

1303 in this `Registry`. 

1304 

1305 Raises 

1306 ------ 

1307 lsst.daf.butler.AmbiguousDatasetError 

1308 Raised if any of the given dataset references is unresolved. 

1309 lsst.daf.butler.registry.MissingCollectionError 

1310 Raised if ``collection`` does not exist in the registry. 

1311 lsst.daf.butler.registry.CollectionTypeError 

1312 Raise adding new datasets to the given ``collection`` is not 

1313 allowed. 

1314 """ 

1315 progress = Progress("lsst.daf.butler.Registry.disassociate", level=logging.DEBUG) 

1316 collectionRecord = self._managers.collections.find(collection) 

1317 if collectionRecord.type is not CollectionType.TAGGED: 

1318 raise CollectionTypeError( 

1319 f"Collection '{collection}' has type {collectionRecord.type.name}; expected TAGGED." 

1320 ) 

1321 for datasetType, refsForType in progress.iter_item_chunks( 

1322 DatasetRef.iter_by_type(refs), desc="Disassociating datasets by type" 

1323 ): 

1324 storage = self._managers.datasets[datasetType.name] 

1325 storage.disassociate(collectionRecord, refsForType) 

1326 if self._managers.obscore: 

1327 self._managers.obscore.disassociate(refsForType, collectionRecord) 

1328 

1329 @transactional 

1330 def certify(self, collection: str, refs: Iterable[DatasetRef], timespan: Timespan) -> None: 

1331 """Associate one or more datasets with a calibration collection and a 

1332 validity range within it. 

1333 

1334 Parameters 

1335 ---------- 

1336 collection : `str` 

1337 The name of an already-registered `~CollectionType.CALIBRATION` 

1338 collection. 

1339 refs : `~collections.abc.Iterable` [ `DatasetRef` ] 

1340 Datasets to be associated. 

1341 timespan : `Timespan` 

1342 The validity range for these datasets within the collection. 

1343 

1344 Raises 

1345 ------ 

1346 lsst.daf.butler.AmbiguousDatasetError 

1347 Raised if any of the given `DatasetRef` instances is unresolved. 

1348 lsst.daf.butler.registry.ConflictingDefinitionError 

1349 Raised if the collection already contains a different dataset with 

1350 the same `DatasetType` and data ID and an overlapping validity 

1351 range. 

1352 lsst.daf.butler.registry.CollectionTypeError 

1353 Raised if ``collection`` is not a `~CollectionType.CALIBRATION` 

1354 collection or if one or more datasets are of a dataset type for 

1355 which `DatasetType.isCalibration` returns `False`. 

1356 """ 

1357 progress = Progress("lsst.daf.butler.Registry.certify", level=logging.DEBUG) 

1358 collectionRecord = self._managers.collections.find(collection) 

1359 for datasetType, refsForType in progress.iter_item_chunks( 

1360 DatasetRef.iter_by_type(refs), desc="Certifying datasets by type" 

1361 ): 

1362 storage = self._managers.datasets[datasetType.name] 

1363 storage.certify( 

1364 collectionRecord, 

1365 refsForType, 

1366 timespan, 

1367 context=queries.SqlQueryContext(self._db, self._managers.column_types), 

1368 ) 

1369 

1370 @transactional 

1371 def decertify( 

1372 self, 

1373 collection: str, 

1374 datasetType: str | DatasetType, 

1375 timespan: Timespan, 

1376 *, 

1377 dataIds: Iterable[DataId] | None = None, 

1378 ) -> None: 

1379 """Remove or adjust datasets to clear a validity range within a 

1380 calibration collection. 

1381 

1382 Parameters 

1383 ---------- 

1384 collection : `str` 

1385 The name of an already-registered `~CollectionType.CALIBRATION` 

1386 collection. 

1387 datasetType : `str` or `DatasetType` 

1388 Name or `DatasetType` instance for the datasets to be decertified. 

1389 timespan : `Timespan`, optional 

1390 The validity range to remove datasets from within the collection. 

1391 Datasets that overlap this range but are not contained by it will 

1392 have their validity ranges adjusted to not overlap it, which may 

1393 split a single dataset validity range into two. 

1394 dataIds : iterable [`dict` or `DataCoordinate`], optional 

1395 Data IDs that should be decertified within the given validity range 

1396 If `None`, all data IDs for ``self.datasetType`` will be 

1397 decertified. 

1398 

1399 Raises 

1400 ------ 

1401 lsst.daf.butler.registry.CollectionTypeError 

1402 Raised if ``collection`` is not a `~CollectionType.CALIBRATION` 

1403 collection or if ``datasetType.isCalibration() is False``. 

1404 """ 

1405 collectionRecord = self._managers.collections.find(collection) 

1406 if isinstance(datasetType, str): 

1407 storage = self._managers.datasets[datasetType] 

1408 else: 

1409 storage = self._managers.datasets[datasetType.name] 

1410 standardizedDataIds = None 

1411 if dataIds is not None: 

1412 standardizedDataIds = [ 

1413 DataCoordinate.standardize(d, graph=storage.datasetType.dimensions) for d in dataIds 

1414 ] 

1415 storage.decertify( 

1416 collectionRecord, 

1417 timespan, 

1418 dataIds=standardizedDataIds, 

1419 context=queries.SqlQueryContext(self._db, self._managers.column_types), 

1420 ) 

1421 

1422 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager: 

1423 """Return an object that allows a new `Datastore` instance to 

1424 communicate with this `Registry`. 

1425 

1426 Returns 

1427 ------- 

1428 manager : `~.interfaces.DatastoreRegistryBridgeManager` 

1429 Object that mediates communication between this `Registry` and its 

1430 associated datastores. 

1431 """ 

1432 return self._managers.datastores 

1433 

1434 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]: 

1435 """Retrieve datastore locations for a given dataset. 

1436 

1437 Parameters 

1438 ---------- 

1439 ref : `DatasetRef` 

1440 A reference to the dataset for which to retrieve storage 

1441 information. 

1442 

1443 Returns 

1444 ------- 

1445 datastores : `~collections.abc.Iterable` [ `str` ] 

1446 All the matching datastores holding this dataset. 

1447 

1448 Raises 

1449 ------ 

1450 lsst.daf.butler.AmbiguousDatasetError 

1451 Raised if ``ref.id`` is `None`. 

1452 """ 

1453 return self._managers.datastores.findDatastores(ref) 

1454 

1455 def expandDataId( 

1456 self, 

1457 dataId: DataId | None = None, 

1458 *, 

1459 graph: DimensionGraph | None = None, 

1460 records: NameLookupMapping[DimensionElement, DimensionRecord | None] | None = None, 

1461 withDefaults: bool = True, 

1462 **kwargs: Any, 

1463 ) -> DataCoordinate: 

1464 """Expand a dimension-based data ID to include additional information. 

1465 

1466 Parameters 

1467 ---------- 

1468 dataId : `DataCoordinate` or `dict`, optional 

1469 Data ID to be expanded; augmented and overridden by ``kwargs``. 

1470 graph : `DimensionGraph`, optional 

1471 Set of dimensions for the expanded ID. If `None`, the dimensions 

1472 will be inferred from the keys of ``dataId`` and ``kwargs``. 

1473 Dimensions that are in ``dataId`` or ``kwargs`` but not in 

1474 ``graph`` are silently ignored, providing a way to extract and 

1475 ``graph`` expand a subset of a data ID. 

1476 records : `~collections.abc.Mapping` [`str`, `DimensionRecord`], \ 

1477 optional 

1478 Dimension record data to use before querying the database for that 

1479 data, keyed by element name. 

1480 withDefaults : `bool`, optional 

1481 Utilize ``self.defaults.dataId`` to fill in missing governor 

1482 dimension key-value pairs. Defaults to `True` (i.e. defaults are 

1483 used). 

1484 **kwargs 

1485 Additional keywords are treated like additional key-value pairs for 

1486 ``dataId``, extending and overriding 

1487 

1488 Returns 

1489 ------- 

1490 expanded : `DataCoordinate` 

1491 A data ID that includes full metadata for all of the dimensions it 

1492 identifies, i.e. guarantees that ``expanded.hasRecords()`` and 

1493 ``expanded.hasFull()`` both return `True`. 

1494 

1495 Raises 

1496 ------ 

1497 lsst.daf.butler.registry.DataIdError 

1498 Raised when ``dataId`` or keyword arguments specify unknown 

1499 dimensions or values, or when a resulting data ID contains 

1500 contradictory key-value pairs, according to dimension 

1501 relationships. 

1502 

1503 Notes 

1504 ----- 

1505 This method cannot be relied upon to reject invalid data ID values 

1506 for dimensions that do actually not have any record columns. For 

1507 efficiency reasons the records for these dimensions (which have only 

1508 dimension key values that are given by the caller) may be constructed 

1509 directly rather than obtained from the registry database. 

1510 """ 

1511 if not withDefaults: 

1512 defaults = None 

1513 else: 

1514 defaults = self.defaults.dataId 

1515 try: 

1516 standardized = DataCoordinate.standardize( 

1517 dataId, graph=graph, universe=self.dimensions, defaults=defaults, **kwargs 

1518 ) 

1519 except KeyError as exc: 

1520 # This means either kwargs have some odd name or required 

1521 # dimension is missing. 

1522 raise DimensionNameError(str(exc)) from exc 

1523 if standardized.hasRecords(): 

1524 return standardized 

1525 if records is None: 

1526 records = {} 

1527 elif isinstance(records, NamedKeyMapping): 

1528 records = records.byName() 

1529 else: 

1530 records = dict(records) 

1531 if isinstance(dataId, DataCoordinate) and dataId.hasRecords(): 

1532 records.update(dataId.records.byName()) 

1533 keys = standardized.byName() 

1534 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

1535 for element in standardized.graph.primaryKeyTraversalOrder: 

1536 record = records.get(element.name, ...) # Use ... to mean not found; None might mean NULL 

1537 if record is ...: 

1538 if isinstance(element, Dimension) and keys.get(element.name) is None: 

1539 if element in standardized.graph.required: 

1540 raise DimensionNameError( 

1541 f"No value or null value for required dimension {element.name}." 

1542 ) 

1543 keys[element.name] = None 

1544 record = None 

1545 else: 

1546 storage = self._managers.dimensions[element] 

1547 record = storage.fetch_one(DataCoordinate.standardize(keys, graph=element.graph), context) 

1548 records[element.name] = record 

1549 if record is not None: 

1550 for d in element.implied: 

1551 value = getattr(record, d.name) 

1552 if keys.setdefault(d.name, value) != value: 

1553 raise InconsistentDataIdError( 

1554 f"Data ID {standardized} has {d.name}={keys[d.name]!r}, " 

1555 f"but {element.name} implies {d.name}={value!r}." 

1556 ) 

1557 else: 

1558 if element in standardized.graph.required: 

1559 raise DataIdValueError( 

1560 f"Could not fetch record for required dimension {element.name} via keys {keys}." 

1561 ) 

1562 if element.alwaysJoin: 

1563 raise InconsistentDataIdError( 

1564 f"Could not fetch record for element {element.name} via keys {keys}, ", 

1565 "but it is marked alwaysJoin=True; this means one or more dimensions are not " 

1566 "related.", 

1567 ) 

1568 for d in element.implied: 

1569 keys.setdefault(d.name, None) 

1570 records.setdefault(d.name, None) 

1571 return DataCoordinate.standardize(keys, graph=standardized.graph).expanded(records=records) 

1572 

1573 def insertDimensionData( 

1574 self, 

1575 element: DimensionElement | str, 

1576 *data: Mapping[str, Any] | DimensionRecord, 

1577 conform: bool = True, 

1578 replace: bool = False, 

1579 skip_existing: bool = False, 

1580 ) -> None: 

1581 """Insert one or more dimension records into the database. 

1582 

1583 Parameters 

1584 ---------- 

1585 element : `DimensionElement` or `str` 

1586 The `DimensionElement` or name thereof that identifies the table 

1587 records will be inserted into. 

1588 *data : `dict` or `DimensionRecord` 

1589 One or more records to insert. 

1590 conform : `bool`, optional 

1591 If `False` (`True` is default) perform no checking or conversions, 

1592 and assume that ``element`` is a `DimensionElement` instance and 

1593 ``data`` is a one or more `DimensionRecord` instances of the 

1594 appropriate subclass. 

1595 replace : `bool`, optional 

1596 If `True` (`False` is default), replace existing records in the 

1597 database if there is a conflict. 

1598 skip_existing : `bool`, optional 

1599 If `True` (`False` is default), skip insertion if a record with 

1600 the same primary key values already exists. Unlike 

1601 `syncDimensionData`, this will not detect when the given record 

1602 differs from what is in the database, and should not be used when 

1603 this is a concern. 

1604 """ 

1605 if conform: 

1606 if isinstance(element, str): 

1607 element = self.dimensions[element] 

1608 records = [ 

1609 row if isinstance(row, DimensionRecord) else element.RecordClass(**row) for row in data 

1610 ] 

1611 else: 

1612 # Ignore typing since caller said to trust them with conform=False. 

1613 records = data # type: ignore 

1614 storage = self._managers.dimensions[element] 

1615 storage.insert(*records, replace=replace, skip_existing=skip_existing) 

1616 

1617 def syncDimensionData( 

1618 self, 

1619 element: DimensionElement | str, 

1620 row: Mapping[str, Any] | DimensionRecord, 

1621 conform: bool = True, 

1622 update: bool = False, 

1623 ) -> bool | dict[str, Any]: 

1624 """Synchronize the given dimension record with the database, inserting 

1625 if it does not already exist and comparing values if it does. 

1626 

1627 Parameters 

1628 ---------- 

1629 element : `DimensionElement` or `str` 

1630 The `DimensionElement` or name thereof that identifies the table 

1631 records will be inserted into. 

1632 row : `dict` or `DimensionRecord` 

1633 The record to insert. 

1634 conform : `bool`, optional 

1635 If `False` (`True` is default) perform no checking or conversions, 

1636 and assume that ``element`` is a `DimensionElement` instance and 

1637 ``data`` is a one or more `DimensionRecord` instances of the 

1638 appropriate subclass. 

1639 update : `bool`, optional 

1640 If `True` (`False` is default), update the existing record in the 

1641 database if there is a conflict. 

1642 

1643 Returns 

1644 ------- 

1645 inserted_or_updated : `bool` or `dict` 

1646 `True` if a new row was inserted, `False` if no changes were 

1647 needed, or a `dict` mapping updated column names to their old 

1648 values if an update was performed (only possible if 

1649 ``update=True``). 

1650 

1651 Raises 

1652 ------ 

1653 lsst.daf.butler.registry.ConflictingDefinitionError 

1654 Raised if the record exists in the database (according to primary 

1655 key lookup) but is inconsistent with the given one. 

1656 """ 

1657 if conform: 

1658 if isinstance(element, str): 

1659 element = self.dimensions[element] 

1660 record = row if isinstance(row, DimensionRecord) else element.RecordClass(**row) 

1661 else: 

1662 # Ignore typing since caller said to trust them with conform=False. 

1663 record = row # type: ignore 

1664 storage = self._managers.dimensions[element] 

1665 return storage.sync(record, update=update) 

1666 

1667 def queryDatasetTypes( 

1668 self, 

1669 expression: Any = ..., 

1670 *, 

1671 components: bool | None = False, 

1672 missing: list[str] | None = None, 

1673 ) -> Iterable[DatasetType]: 

1674 """Iterate over the dataset types whose names match an expression. 

1675 

1676 Parameters 

1677 ---------- 

1678 expression : dataset type expression, optional 

1679 An expression that fully or partially identifies the dataset types 

1680 to return, such as a `str`, `re.Pattern`, or iterable thereof. 

1681 ``...`` can be used to return all dataset types, and is the 

1682 default. See :ref:`daf_butler_dataset_type_expressions` for more 

1683 information. 

1684 components : `bool`, optional 

1685 If `True`, apply all expression patterns to component dataset type 

1686 names as well. If `False`, never apply patterns to components. 

1687 If `None`, apply patterns to components only if their 

1688 parent datasets were not matched by the expression. 

1689 Fully-specified component datasets (`str` or `DatasetType` 

1690 instances) are always included. 

1691 

1692 Values other than `False` are deprecated, and only `False` will be 

1693 supported after v26. After v27 this argument will be removed 

1694 entirely. 

1695 missing : `list` of `str`, optional 

1696 String dataset type names that were explicitly given (i.e. not 

1697 regular expression patterns) but not found will be appended to this 

1698 list, if it is provided. 

1699 

1700 Returns 

1701 ------- 

1702 dataset_types : `~collections.abc.Iterable` [ `DatasetType`] 

1703 An `~collections.abc.Iterable` of `DatasetType` instances whose 

1704 names match ``expression``. 

1705 

1706 Raises 

1707 ------ 

1708 lsst.daf.butler.registry.DatasetTypeExpressionError 

1709 Raised when ``expression`` is invalid. 

1710 """ 

1711 wildcard = DatasetTypeWildcard.from_expression(expression) 

1712 composition_dict = self._managers.datasets.resolve_wildcard( 

1713 wildcard, 

1714 components=components, 

1715 missing=missing, 

1716 ) 

1717 result: list[DatasetType] = [] 

1718 for parent_dataset_type, components_for_parent in composition_dict.items(): 

1719 result.extend( 

1720 parent_dataset_type.makeComponentDatasetType(c) if c is not None else parent_dataset_type 

1721 for c in components_for_parent 

1722 ) 

1723 return result 

1724 

1725 def queryCollections( 

1726 self, 

1727 expression: Any = ..., 

1728 datasetType: DatasetType | None = None, 

1729 collectionTypes: Iterable[CollectionType] | CollectionType = CollectionType.all(), 

1730 flattenChains: bool = False, 

1731 includeChains: bool | None = None, 

1732 ) -> Sequence[str]: 

1733 """Iterate over the collections whose names match an expression. 

1734 

1735 Parameters 

1736 ---------- 

1737 expression : collection expression, optional 

1738 An expression that identifies the collections to return, such as 

1739 a `str` (for full matches or partial matches via globs), 

1740 `re.Pattern` (for partial matches), or iterable thereof. ``...`` 

1741 can be used to return all collections, and is the default. 

1742 See :ref:`daf_butler_collection_expressions` for more information. 

1743 datasetType : `DatasetType`, optional 

1744 If provided, only yield collections that may contain datasets of 

1745 this type. This is a conservative approximation in general; it may 

1746 yield collections that do not have any such datasets. 

1747 collectionTypes : `~collections.abc.Set` [`CollectionType`] or \ 

1748 `CollectionType`, optional 

1749 If provided, only yield collections of these types. 

1750 flattenChains : `bool`, optional 

1751 If `True` (`False` is default), recursively yield the child 

1752 collections of matching `~CollectionType.CHAINED` collections. 

1753 includeChains : `bool`, optional 

1754 If `True`, yield records for matching `~CollectionType.CHAINED` 

1755 collections. Default is the opposite of ``flattenChains``: include 

1756 either CHAINED collections or their children, but not both. 

1757 

1758 Returns 

1759 ------- 

1760 collections : `~collections.abc.Sequence` [ `str` ] 

1761 The names of collections that match ``expression``. 

1762 

1763 Raises 

1764 ------ 

1765 lsst.daf.butler.registry.CollectionExpressionError 

1766 Raised when ``expression`` is invalid. 

1767 

1768 Notes 

1769 ----- 

1770 The order in which collections are returned is unspecified, except that 

1771 the children of a `~CollectionType.CHAINED` collection are guaranteed 

1772 to be in the order in which they are searched. When multiple parent 

1773 `~CollectionType.CHAINED` collections match the same criteria, the 

1774 order in which the two lists appear is unspecified, and the lists of 

1775 children may be incomplete if a child has multiple parents. 

1776 """ 

1777 # Right now the datasetTypes argument is completely ignored, but that 

1778 # is consistent with its [lack of] guarantees. DM-24939 or a follow-up 

1779 # ticket will take care of that. 

1780 try: 

1781 wildcard = CollectionWildcard.from_expression(expression) 

1782 except TypeError as exc: 

1783 raise CollectionExpressionError(f"Invalid collection expression '{expression}'") from exc 

1784 collectionTypes = ensure_iterable(collectionTypes) 

1785 return [ 

1786 record.name 

1787 for record in self._managers.collections.resolve_wildcard( 

1788 wildcard, 

1789 collection_types=frozenset(collectionTypes), 

1790 flatten_chains=flattenChains, 

1791 include_chains=includeChains, 

1792 ) 

1793 ] 

1794 

1795 def _makeQueryBuilder( 

1796 self, 

1797 summary: queries.QuerySummary, 

1798 doomed_by: Iterable[str] = (), 

1799 ) -> queries.QueryBuilder: 

1800 """Return a `QueryBuilder` instance capable of constructing and 

1801 managing more complex queries than those obtainable via `Registry` 

1802 interfaces. 

1803 

1804 This is an advanced interface; downstream code should prefer 

1805 `Registry.queryDataIds` and `Registry.queryDatasets` whenever those 

1806 are sufficient. 

1807 

1808 Parameters 

1809 ---------- 

1810 summary : `queries.QuerySummary` 

1811 Object describing and categorizing the full set of dimensions that 

1812 will be included in the query. 

1813 doomed_by : `~collections.abc.Iterable` of `str`, optional 

1814 A list of diagnostic messages that indicate why the query is going 

1815 to yield no results and should not even be executed. If an empty 

1816 container (default) the query will be executed unless other code 

1817 determines that it is doomed. 

1818 

1819 Returns 

1820 ------- 

1821 builder : `queries.QueryBuilder` 

1822 Object that can be used to construct and perform advanced queries. 

1823 """ 

1824 doomed_by = list(doomed_by) 

1825 backend = queries.SqlQueryBackend(self._db, self._managers) 

1826 context = backend.context() 

1827 relation: Relation | None = None 

1828 if doomed_by: 

1829 relation = LeafRelation.make_doomed(context.sql_engine, set(), doomed_by) 

1830 return queries.QueryBuilder( 

1831 summary, 

1832 backend=backend, 

1833 context=context, 

1834 relation=relation, 

1835 ) 

1836 

1837 def _standardize_query_data_id_args( 

1838 self, data_id: DataId | None, *, doomed_by: list[str], **kwargs: Any 

1839 ) -> DataCoordinate: 

1840 """Preprocess the data ID arguments passed to query* methods. 

1841 

1842 Parameters 

1843 ---------- 

1844 data_id : `DataId` or `None` 

1845 Data ID that constrains the query results. 

1846 doomed_by : `list` [ `str` ] 

1847 List to append messages indicating why the query is doomed to 

1848 yield no results. 

1849 **kwargs 

1850 Additional data ID key-value pairs, extending and overriding 

1851 ``data_id``. 

1852 

1853 Returns 

1854 ------- 

1855 data_id : `DataCoordinate` 

1856 Standardized data ID. Will be fully expanded unless expansion 

1857 fails, in which case a message will be appended to ``doomed_by`` 

1858 on return. 

1859 """ 

1860 try: 

1861 return self.expandDataId(data_id, **kwargs) 

1862 except DataIdValueError as err: 

1863 doomed_by.append(str(err)) 

1864 return DataCoordinate.standardize( 

1865 data_id, **kwargs, universe=self.dimensions, defaults=self.defaults.dataId 

1866 ) 

1867 

1868 def _standardize_query_dataset_args( 

1869 self, 

1870 datasets: Any, 

1871 collections: CollectionArgType | None, 

1872 components: bool | None, 

1873 mode: Literal["find_first"] | Literal["find_all"] | Literal["constrain"] = "constrain", 

1874 *, 

1875 doomed_by: list[str], 

1876 ) -> tuple[dict[DatasetType, list[str | None]], CollectionWildcard | None]: 

1877 """Preprocess dataset arguments passed to query* methods. 

1878 

1879 Parameters 

1880 ---------- 

1881 datasets : `DatasetType`, `str`, `re.Pattern`, or iterable of these 

1882 Expression identifying dataset types. See `queryDatasetTypes` for 

1883 details. 

1884 collections : `str`, `re.Pattern`, or iterable of these 

1885 Expression identifying collections to be searched. See 

1886 `queryCollections` for details. 

1887 components : `bool`, optional 

1888 If `True`, apply all expression patterns to component dataset type 

1889 names as well. If `False`, never apply patterns to components. 

1890 If `None` (default), apply patterns to components only if their 

1891 parent datasets were not matched by the expression. 

1892 Fully-specified component datasets (`str` or `DatasetType` 

1893 instances) are always included. 

1894 

1895 Values other than `False` are deprecated, and only `False` will be 

1896 supported after v26. After v27 this argument will be removed 

1897 entirely. 

1898 mode : `str`, optional 

1899 The way in which datasets are being used in this query; one of: 

1900 

1901 - "find_first": this is a query for the first dataset in an 

1902 ordered list of collections. Prohibits collection wildcards, 

1903 but permits dataset type wildcards. 

1904 

1905 - "find_all": this is a query for all datasets in all matched 

1906 collections. Permits collection and dataset type wildcards. 

1907 

1908 - "constrain": this is a query for something other than datasets, 

1909 with results constrained by dataset existence. Permits 

1910 collection wildcards and prohibits ``...`` as a dataset type 

1911 wildcard. 

1912 doomed_by : `list` [ `str` ] 

1913 List to append messages indicating why the query is doomed to 

1914 yield no results. 

1915 

1916 Returns 

1917 ------- 

1918 composition : `defaultdict` [ `DatasetType`, `list` [ `str` ] ] 

1919 Dictionary mapping parent dataset type to `list` of components 

1920 matched for that dataset type (or `None` for the parent itself). 

1921 collections : `CollectionWildcard` 

1922 Processed collection expression. 

1923 """ 

1924 composition: dict[DatasetType, list[str | None]] = {} 

1925 collection_wildcard: CollectionWildcard | None = None 

1926 if datasets is not None: 

1927 if collections is None: 

1928 if not self.defaults.collections: 

1929 raise NoDefaultCollectionError("No collections, and no registry default collections.") 

1930 collection_wildcard = CollectionWildcard.from_expression(self.defaults.collections) 

1931 else: 

1932 collection_wildcard = CollectionWildcard.from_expression(collections) 

1933 if mode == "find_first" and collection_wildcard.patterns: 

1934 raise TypeError( 

1935 f"Collection pattern(s) {collection_wildcard.patterns} not allowed in this context." 

1936 ) 

1937 missing: list[str] = [] 

1938 composition = self._managers.datasets.resolve_wildcard( 

1939 datasets, components=components, missing=missing, explicit_only=(mode == "constrain") 

1940 ) 

1941 if missing and mode == "constrain": 

1942 # After v26 this should raise MissingDatasetTypeError, to be 

1943 # implemented on DM-36303. 

1944 warnings.warn( 

1945 f"Dataset type(s) {missing} are not registered; this will be an error after v26.", 

1946 FutureWarning, 

1947 stacklevel=find_outside_stacklevel("lsst.daf.butler"), 

1948 ) 

1949 doomed_by.extend(f"Dataset type {name} is not registered." for name in missing) 

1950 elif collections: 

1951 # I think this check should actually be `collections is not None`, 

1952 # but it looks like some CLI scripts use empty tuple as default. 

1953 raise ArgumentError(f"Cannot pass 'collections' (='{collections}') without 'datasets'.") 

1954 return composition, collection_wildcard 

1955 

1956 def queryDatasets( 

1957 self, 

1958 datasetType: Any, 

1959 *, 

1960 collections: CollectionArgType | None = None, 

1961 dimensions: Iterable[Dimension | str] | None = None, 

1962 dataId: DataId | None = None, 

1963 where: str = "", 

1964 findFirst: bool = False, 

1965 components: bool | None = False, 

1966 bind: Mapping[str, Any] | None = None, 

1967 check: bool = True, 

1968 **kwargs: Any, 

1969 ) -> queries.DatasetQueryResults: 

1970 """Query for and iterate over dataset references matching user-provided 

1971 criteria. 

1972 

1973 Parameters 

1974 ---------- 

1975 datasetType : dataset type expression 

1976 An expression that fully or partially identifies the dataset types 

1977 to be queried. Allowed types include `DatasetType`, `str`, 

1978 `re.Pattern`, and iterables thereof. The special value ``...`` can 

1979 be used to query all dataset types. See 

1980 :ref:`daf_butler_dataset_type_expressions` for more information. 

1981 collections : collection expression, optional 

1982 An expression that identifies the collections to search, such as a 

1983 `str` (for full matches or partial matches via globs), `re.Pattern` 

1984 (for partial matches), or iterable thereof. ``...`` can be used to 

1985 search all collections (actually just all `~CollectionType.RUN` 

1986 collections, because this will still find all datasets). 

1987 If not provided, ``self.default.collections`` is used. See 

1988 :ref:`daf_butler_collection_expressions` for more information. 

1989 dimensions : `~collections.abc.Iterable` of `Dimension` or `str` 

1990 Dimensions to include in the query (in addition to those used 

1991 to identify the queried dataset type(s)), either to constrain 

1992 the resulting datasets to those for which a matching dimension 

1993 exists, or to relate the dataset type's dimensions to dimensions 

1994 referenced by the ``dataId`` or ``where`` arguments. 

1995 dataId : `dict` or `DataCoordinate`, optional 

1996 A data ID whose key-value pairs are used as equality constraints 

1997 in the query. 

1998 where : `str`, optional 

1999 A string expression similar to a SQL WHERE clause. May involve 

2000 any column of a dimension table or (as a shortcut for the primary 

2001 key column of a dimension table) dimension name. See 

2002 :ref:`daf_butler_dimension_expressions` for more information. 

2003 findFirst : `bool`, optional 

2004 If `True` (`False` is default), for each result data ID, only 

2005 yield one `DatasetRef` of each `DatasetType`, from the first 

2006 collection in which a dataset of that dataset type appears 

2007 (according to the order of ``collections`` passed in). If `True`, 

2008 ``collections`` must not contain regular expressions and may not 

2009 be ``...``. 

2010 components : `bool`, optional 

2011 If `True`, apply all dataset expression patterns to component 

2012 dataset type names as well. If `False`, never apply patterns to 

2013 components. If `None`, apply patterns to components only 

2014 if their parent datasets were not matched by the expression. 

2015 Fully-specified component datasets (`str` or `DatasetType` 

2016 instances) are always included. 

2017 

2018 Values other than `False` are deprecated, and only `False` will be 

2019 supported after v26. After v27 this argument will be removed 

2020 entirely. 

2021 bind : `~collections.abc.Mapping`, optional 

2022 Mapping containing literal values that should be injected into the 

2023 ``where`` expression, keyed by the identifiers they replace. 

2024 Values of collection type can be expanded in some cases; see 

2025 :ref:`daf_butler_dimension_expressions_identifiers` for more 

2026 information. 

2027 check : `bool`, optional 

2028 If `True` (default) check the query for consistency before 

2029 executing it. This may reject some valid queries that resemble 

2030 common mistakes (e.g. queries for visits without specifying an 

2031 instrument). 

2032 **kwargs 

2033 Additional keyword arguments are forwarded to 

2034 `DataCoordinate.standardize` when processing the ``dataId`` 

2035 argument (and may be used to provide a constraining data ID even 

2036 when the ``dataId`` argument is `None`). 

2037 

2038 Returns 

2039 ------- 

2040 refs : `.queries.DatasetQueryResults` 

2041 Dataset references matching the given query criteria. Nested data 

2042 IDs are guaranteed to include values for all implied dimensions 

2043 (i.e. `DataCoordinate.hasFull` will return `True`), but will not 

2044 include dimension records (`DataCoordinate.hasRecords` will be 

2045 `False`) unless `~.queries.DatasetQueryResults.expanded` is 

2046 called on the result object (which returns a new one). 

2047 

2048 Raises 

2049 ------ 

2050 lsst.daf.butler.registry.DatasetTypeExpressionError 

2051 Raised when ``datasetType`` expression is invalid. 

2052 TypeError 

2053 Raised when the arguments are incompatible, such as when a 

2054 collection wildcard is passed when ``findFirst`` is `True`, or 

2055 when ``collections`` is `None` and ``self.defaults.collections`` is 

2056 also `None`. 

2057 lsst.daf.butler.registry.DataIdError 

2058 Raised when ``dataId`` or keyword arguments specify unknown 

2059 dimensions or values, or when they contain inconsistent values. 

2060 lsst.daf.butler.registry.UserExpressionError 

2061 Raised when ``where`` expression is invalid. 

2062 

2063 Notes 

2064 ----- 

2065 When multiple dataset types are queried in a single call, the 

2066 results of this operation are equivalent to querying for each dataset 

2067 type separately in turn, and no information about the relationships 

2068 between datasets of different types is included. In contexts where 

2069 that kind of information is important, the recommended pattern is to 

2070 use `queryDataIds` to first obtain data IDs (possibly with the 

2071 desired dataset types and collections passed as constraints to the 

2072 query), and then use multiple (generally much simpler) calls to 

2073 `queryDatasets` with the returned data IDs passed as constraints. 

2074 """ 

2075 doomed_by: list[str] = [] 

2076 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs) 

2077 dataset_composition, collection_wildcard = self._standardize_query_dataset_args( 

2078 datasetType, 

2079 collections, 

2080 components, 

2081 mode="find_first" if findFirst else "find_all", 

2082 doomed_by=doomed_by, 

2083 ) 

2084 if collection_wildcard is not None and collection_wildcard.empty(): 

2085 doomed_by.append("No datasets can be found because collection list is empty.") 

2086 return queries.ChainedDatasetQueryResults([], doomed_by=doomed_by) 

2087 parent_results: list[queries.ParentDatasetQueryResults] = [] 

2088 for parent_dataset_type, components_for_parent in dataset_composition.items(): 

2089 # The full set of dimensions in the query is the combination of 

2090 # those needed for the DatasetType and those explicitly requested, 

2091 # if any. 

2092 dimension_names = set(parent_dataset_type.dimensions.names) 

2093 if dimensions is not None: 

2094 dimension_names.update(self.dimensions.extract(dimensions).names) 

2095 # Construct the summary structure needed to construct a 

2096 # QueryBuilder. 

2097 summary = queries.QuerySummary( 

2098 requested=DimensionGraph(self.dimensions, names=dimension_names), 

2099 column_types=self._managers.column_types, 

2100 data_id=data_id, 

2101 expression=where, 

2102 bind=bind, 

2103 defaults=self.defaults.dataId, 

2104 check=check, 

2105 datasets=[parent_dataset_type], 

2106 ) 

2107 builder = self._makeQueryBuilder(summary) 

2108 # Add the dataset subquery to the query, telling the QueryBuilder 

2109 # to include the rank of the selected collection in the results 

2110 # only if we need to findFirst. Note that if any of the 

2111 # collections are actually wildcard expressions, and 

2112 # findFirst=True, this will raise TypeError for us. 

2113 builder.joinDataset(parent_dataset_type, collection_wildcard, isResult=True, findFirst=findFirst) 

2114 query = builder.finish() 

2115 parent_results.append( 

2116 queries.ParentDatasetQueryResults( 

2117 query, parent_dataset_type, components=components_for_parent 

2118 ) 

2119 ) 

2120 if not parent_results: 

2121 doomed_by.extend( 

2122 f"No registered dataset type matching {t!r} found, so no matching datasets can " 

2123 "exist in any collection." 

2124 for t in ensure_iterable(datasetType) 

2125 ) 

2126 return queries.ChainedDatasetQueryResults([], doomed_by=doomed_by) 

2127 elif len(parent_results) == 1: 

2128 return parent_results[0] 

2129 else: 

2130 return queries.ChainedDatasetQueryResults(parent_results) 

2131 

2132 def queryDataIds( 

2133 self, 

2134 dimensions: Iterable[Dimension | str] | Dimension | str, 

2135 *, 

2136 dataId: DataId | None = None, 

2137 datasets: Any = None, 

2138 collections: CollectionArgType | None = None, 

2139 where: str = "", 

2140 components: bool | None = None, 

2141 bind: Mapping[str, Any] | None = None, 

2142 check: bool = True, 

2143 **kwargs: Any, 

2144 ) -> queries.DataCoordinateQueryResults: 

2145 """Query for data IDs matching user-provided criteria. 

2146 

2147 Parameters 

2148 ---------- 

2149 dimensions : `Dimension` or `str`, or iterable thereof 

2150 The dimensions of the data IDs to yield, as either `Dimension` 

2151 instances or `str`. Will be automatically expanded to a complete 

2152 `DimensionGraph`. 

2153 dataId : `dict` or `DataCoordinate`, optional 

2154 A data ID whose key-value pairs are used as equality constraints 

2155 in the query. 

2156 datasets : dataset type expression, optional 

2157 An expression that fully or partially identifies dataset types 

2158 that should constrain the yielded data IDs. For example, including 

2159 "raw" here would constrain the yielded ``instrument``, 

2160 ``exposure``, ``detector``, and ``physical_filter`` values to only 

2161 those for which at least one "raw" dataset exists in 

2162 ``collections``. Allowed types include `DatasetType`, `str`, 

2163 and iterables thereof. Regular expression objects (i.e. 

2164 `re.Pattern`) are deprecated and will be removed after the v26 

2165 release. See :ref:`daf_butler_dataset_type_expressions` for more 

2166 information. 

2167 collections : collection expression, optional 

2168 An expression that identifies the collections to search for 

2169 datasets, such as a `str` (for full matches or partial matches 

2170 via globs), `re.Pattern` (for partial matches), or iterable 

2171 thereof. ``...`` can be used to search all collections (actually 

2172 just all `~CollectionType.RUN` collections, because this will 

2173 still find all datasets). If not provided, 

2174 ``self.default.collections`` is used. Ignored unless ``datasets`` 

2175 is also passed. See :ref:`daf_butler_collection_expressions` for 

2176 more information. 

2177 where : `str`, optional 

2178 A string expression similar to a SQL WHERE clause. May involve 

2179 any column of a dimension table or (as a shortcut for the primary 

2180 key column of a dimension table) dimension name. See 

2181 :ref:`daf_butler_dimension_expressions` for more information. 

2182 components : `bool`, optional 

2183 If `True`, apply all dataset expression patterns to component 

2184 dataset type names as well. If `False`, never apply patterns to 

2185 components. If `None`, apply patterns to components only 

2186 if their parent datasets were not matched by the expression. 

2187 Fully-specified component datasets (`str` or `DatasetType` 

2188 instances) are always included. 

2189 

2190 Values other than `False` are deprecated, and only `False` will be 

2191 supported after v26. After v27 this argument will be removed 

2192 entirely. 

2193 bind : `~collections.abc.Mapping`, optional 

2194 Mapping containing literal values that should be injected into the 

2195 ``where`` expression, keyed by the identifiers they replace. 

2196 Values of collection type can be expanded in some cases; see 

2197 :ref:`daf_butler_dimension_expressions_identifiers` for more 

2198 information. 

2199 check : `bool`, optional 

2200 If `True` (default) check the query for consistency before 

2201 executing it. This may reject some valid queries that resemble 

2202 common mistakes (e.g. queries for visits without specifying an 

2203 instrument). 

2204 **kwargs 

2205 Additional keyword arguments are forwarded to 

2206 `DataCoordinate.standardize` when processing the ``dataId`` 

2207 argument (and may be used to provide a constraining data ID even 

2208 when the ``dataId`` argument is `None`). 

2209 

2210 Returns 

2211 ------- 

2212 dataIds : `.queries.DataCoordinateQueryResults` 

2213 Data IDs matching the given query parameters. These are guaranteed 

2214 to identify all dimensions (`DataCoordinate.hasFull` returns 

2215 `True`), but will not contain `DimensionRecord` objects 

2216 (`DataCoordinate.hasRecords` returns `False`). Call 

2217 `~.queries.DataCoordinateQueryResults.expanded` on the 

2218 returned object to fetch those (and consider using 

2219 `~.queries.DataCoordinateQueryResults.materialize` on the 

2220 returned object first if the expected number of rows is very 

2221 large). See documentation for those methods for additional 

2222 information. 

2223 

2224 Raises 

2225 ------ 

2226 lsst.daf.butler.registry.NoDefaultCollectionError 

2227 Raised if ``collections`` is `None` and 

2228 ``self.defaults.collections`` is `None`. 

2229 lsst.daf.butler.registry.CollectionExpressionError 

2230 Raised when ``collections`` expression is invalid. 

2231 lsst.daf.butler.registry.DataIdError 

2232 Raised when ``dataId`` or keyword arguments specify unknown 

2233 dimensions or values, or when they contain inconsistent values. 

2234 lsst.daf.butler.registry.DatasetTypeExpressionError 

2235 Raised when ``datasetType`` expression is invalid. 

2236 lsst.daf.butler.registry.UserExpressionError 

2237 Raised when ``where`` expression is invalid. 

2238 """ 

2239 dimensions = ensure_iterable(dimensions) 

2240 requestedDimensions = self.dimensions.extract(dimensions) 

2241 doomed_by: list[str] = [] 

2242 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs) 

2243 dataset_composition, collection_wildcard = self._standardize_query_dataset_args( 

2244 datasets, collections, components, doomed_by=doomed_by 

2245 ) 

2246 if collection_wildcard is not None and collection_wildcard.empty(): 

2247 doomed_by.append("No data coordinates can be found because collection list is empty.") 

2248 summary = queries.QuerySummary( 

2249 requested=requestedDimensions, 

2250 column_types=self._managers.column_types, 

2251 data_id=data_id, 

2252 expression=where, 

2253 bind=bind, 

2254 defaults=self.defaults.dataId, 

2255 check=check, 

2256 datasets=dataset_composition.keys(), 

2257 ) 

2258 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by) 

2259 for datasetType in dataset_composition: 

2260 builder.joinDataset(datasetType, collection_wildcard, isResult=False) 

2261 query = builder.finish() 

2262 

2263 return queries.DataCoordinateQueryResults(query) 

2264 

2265 def queryDimensionRecords( 

2266 self, 

2267 element: DimensionElement | str, 

2268 *, 

2269 dataId: DataId | None = None, 

2270 datasets: Any = None, 

2271 collections: CollectionArgType | None = None, 

2272 where: str = "", 

2273 components: bool | None = None, 

2274 bind: Mapping[str, Any] | None = None, 

2275 check: bool = True, 

2276 **kwargs: Any, 

2277 ) -> queries.DimensionRecordQueryResults: 

2278 """Query for dimension information matching user-provided criteria. 

2279 

2280 Parameters 

2281 ---------- 

2282 element : `DimensionElement` or `str` 

2283 The dimension element to obtain records for. 

2284 dataId : `dict` or `DataCoordinate`, optional 

2285 A data ID whose key-value pairs are used as equality constraints 

2286 in the query. 

2287 datasets : dataset type expression, optional 

2288 An expression that fully or partially identifies dataset types 

2289 that should constrain the yielded records. See `queryDataIds` and 

2290 :ref:`daf_butler_dataset_type_expressions` for more information. 

2291 collections : collection expression, optional 

2292 An expression that identifies the collections to search for 

2293 datasets, such as a `str` (for full matches or partial matches 

2294 via globs), `re.Pattern` (for partial matches), or iterable 

2295 thereof. ``...`` can be used to search all collections (actually 

2296 just all `~CollectionType.RUN` collections, because this will 

2297 still find all datasets). If not provided, 

2298 ``self.default.collections`` is used. Ignored unless ``datasets`` 

2299 is also passed. See :ref:`daf_butler_collection_expressions` for 

2300 more information. 

2301 where : `str`, optional 

2302 A string expression similar to a SQL WHERE clause. See 

2303 `queryDataIds` and :ref:`daf_butler_dimension_expressions` for more 

2304 information. 

2305 components : `bool`, optional 

2306 Whether to apply dataset expressions to components as well. 

2307 See `queryDataIds` for more information. 

2308 

2309 Values other than `False` are deprecated, and only `False` will be 

2310 supported after v26. After v27 this argument will be removed 

2311 entirely. 

2312 bind : `~collections.abc.Mapping`, optional 

2313 Mapping containing literal values that should be injected into the 

2314 ``where`` expression, keyed by the identifiers they replace. 

2315 Values of collection type can be expanded in some cases; see 

2316 :ref:`daf_butler_dimension_expressions_identifiers` for more 

2317 information. 

2318 check : `bool`, optional 

2319 If `True` (default) check the query for consistency before 

2320 executing it. This may reject some valid queries that resemble 

2321 common mistakes (e.g. queries for visits without specifying an 

2322 instrument). 

2323 **kwargs 

2324 Additional keyword arguments are forwarded to 

2325 `DataCoordinate.standardize` when processing the ``dataId`` 

2326 argument (and may be used to provide a constraining data ID even 

2327 when the ``dataId`` argument is `None`). 

2328 

2329 Returns 

2330 ------- 

2331 dataIds : `.queries.DimensionRecordQueryResults` 

2332 Data IDs matching the given query parameters. 

2333 

2334 Raises 

2335 ------ 

2336 lsst.daf.butler.registry.NoDefaultCollectionError 

2337 Raised if ``collections`` is `None` and 

2338 ``self.defaults.collections`` is `None`. 

2339 lsst.daf.butler.registry.CollectionExpressionError 

2340 Raised when ``collections`` expression is invalid. 

2341 lsst.daf.butler.registry.DataIdError 

2342 Raised when ``dataId`` or keyword arguments specify unknown 

2343 dimensions or values, or when they contain inconsistent values. 

2344 lsst.daf.butler.registry.DatasetTypeExpressionError 

2345 Raised when ``datasetType`` expression is invalid. 

2346 lsst.daf.butler.registry.UserExpressionError 

2347 Raised when ``where`` expression is invalid. 

2348 """ 

2349 if not isinstance(element, DimensionElement): 

2350 try: 

2351 element = self.dimensions[element] 

2352 except KeyError as e: 

2353 raise DimensionNameError( 

2354 f"No such dimension '{element}', available dimensions: " 

2355 + str(self.dimensions.getStaticElements()) 

2356 ) from e 

2357 doomed_by: list[str] = [] 

2358 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs) 

2359 dataset_composition, collection_wildcard = self._standardize_query_dataset_args( 

2360 datasets, collections, components, doomed_by=doomed_by 

2361 ) 

2362 if collection_wildcard is not None and collection_wildcard.empty(): 

2363 doomed_by.append("No dimension records can be found because collection list is empty.") 

2364 summary = queries.QuerySummary( 

2365 requested=element.graph, 

2366 column_types=self._managers.column_types, 

2367 data_id=data_id, 

2368 expression=where, 

2369 bind=bind, 

2370 defaults=self.defaults.dataId, 

2371 check=check, 

2372 datasets=dataset_composition.keys(), 

2373 ) 

2374 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by) 

2375 for datasetType in dataset_composition: 

2376 builder.joinDataset(datasetType, collection_wildcard, isResult=False) 

2377 query = builder.finish().with_record_columns(element) 

2378 return queries.DatabaseDimensionRecordQueryResults(query, element) 

2379 

2380 def queryDatasetAssociations( 

2381 self, 

2382 datasetType: str | DatasetType, 

2383 collections: CollectionArgType | None = ..., 

2384 *, 

2385 collectionTypes: Iterable[CollectionType] = CollectionType.all(), 

2386 flattenChains: bool = False, 

2387 ) -> Iterator[DatasetAssociation]: 

2388 """Iterate over dataset-collection combinations where the dataset is in 

2389 the collection. 

2390 

2391 This method is a temporary placeholder for better support for 

2392 association results in `queryDatasets`. It will probably be 

2393 removed in the future, and should be avoided in production code 

2394 whenever possible. 

2395 

2396 Parameters 

2397 ---------- 

2398 datasetType : `DatasetType` or `str` 

2399 A dataset type object or the name of one. 

2400 collections : collection expression, optional 

2401 An expression that identifies the collections to search for 

2402 datasets, such as a `str` (for full matches or partial matches 

2403 via globs), `re.Pattern` (for partial matches), or iterable 

2404 thereof. ``...`` can be used to search all collections (actually 

2405 just all `~CollectionType.RUN` collections, because this will still 

2406 find all datasets). If not provided, ``self.default.collections`` 

2407 is used. See :ref:`daf_butler_collection_expressions` for more 

2408 information. 

2409 collectionTypes : `~collections.abc.Set` [ `CollectionType` ], optional 

2410 If provided, only yield associations from collections of these 

2411 types. 

2412 flattenChains : `bool`, optional 

2413 If `True`, search in the children of `~CollectionType.CHAINED` 

2414 collections. If `False`, ``CHAINED`` collections are ignored. 

2415 

2416 Yields 

2417 ------ 

2418 association : `.DatasetAssociation` 

2419 Object representing the relationship between a single dataset and 

2420 a single collection. 

2421 

2422 Raises 

2423 ------ 

2424 lsst.daf.butler.registry.NoDefaultCollectionError 

2425 Raised if ``collections`` is `None` and 

2426 ``self.defaults.collections`` is `None`. 

2427 lsst.daf.butler.registry.CollectionExpressionError 

2428 Raised when ``collections`` expression is invalid. 

2429 """ 

2430 if collections is None: 

2431 if not self.defaults.collections: 

2432 raise NoDefaultCollectionError( 

2433 "No collections provided to queryDatasetAssociations, " 

2434 "and no defaults from registry construction." 

2435 ) 

2436 collections = self.defaults.collections 

2437 collection_wildcard = CollectionWildcard.from_expression(collections) 

2438 backend = queries.SqlQueryBackend(self._db, self._managers) 

2439 parent_dataset_type, _ = backend.resolve_single_dataset_type_wildcard(datasetType, components=False) 

2440 timespan_tag = DatasetColumnTag(parent_dataset_type.name, "timespan") 

2441 collection_tag = DatasetColumnTag(parent_dataset_type.name, "collection") 

2442 for parent_collection_record in backend.resolve_collection_wildcard( 

2443 collection_wildcard, 

2444 collection_types=frozenset(collectionTypes), 

2445 flatten_chains=flattenChains, 

2446 ): 

2447 # Resolve this possibly-chained collection into a list of 

2448 # non-CHAINED collections that actually hold datasets of this 

2449 # type. 

2450 candidate_collection_records = backend.resolve_dataset_collections( 

2451 parent_dataset_type, 

2452 CollectionWildcard.from_names([parent_collection_record.name]), 

2453 allow_calibration_collections=True, 

2454 governor_constraints={}, 

2455 ) 

2456 if not candidate_collection_records: 

2457 continue 

2458 with backend.context() as context: 

2459 relation = backend.make_dataset_query_relation( 

2460 parent_dataset_type, 

2461 candidate_collection_records, 

2462 columns={"dataset_id", "run", "timespan", "collection"}, 

2463 context=context, 

2464 ) 

2465 reader = queries.DatasetRefReader( 

2466 parent_dataset_type, 

2467 translate_collection=lambda k: self._managers.collections[k].name, 

2468 full=False, 

2469 ) 

2470 for row in context.fetch_iterable(relation): 

2471 ref = reader.read(row) 

2472 collection_record = self._managers.collections[row[collection_tag]] 

2473 if collection_record.type is CollectionType.CALIBRATION: 

2474 timespan = row[timespan_tag] 

2475 else: 

2476 # For backwards compatibility and (possibly?) user 

2477 # convenience we continue to define the timespan of a 

2478 # DatasetAssociation row for a non-CALIBRATION 

2479 # collection to be None rather than a fully unbounded 

2480 # timespan. 

2481 timespan = None 

2482 yield DatasetAssociation(ref=ref, collection=collection_record.name, timespan=timespan) 

2483 

2484 def get_datastore_records(self, ref: DatasetRef) -> DatasetRef: 

2485 """Retrieve datastore records for given ref. 

2486 

2487 Parameters 

2488 ---------- 

2489 ref : `DatasetRef` 

2490 Dataset reference for which to retrieve its corresponding datastore 

2491 records. 

2492 

2493 Returns 

2494 ------- 

2495 updated_ref : `DatasetRef` 

2496 Dataset reference with filled datastore records. 

2497 

2498 Notes 

2499 ----- 

2500 If this method is called with the dataset ref that is not known to the 

2501 registry then the reference with an empty set of records is returned. 

2502 """ 

2503 datastore_records: dict[str, list[StoredDatastoreItemInfo]] = {} 

2504 for opaque, record_class in self._datastore_record_classes.items(): 

2505 records = self.fetchOpaqueData(opaque, dataset_id=ref.id) 

2506 datastore_records[opaque] = [record_class.from_record(record) for record in records] 

2507 return ref.replace(datastore_records=datastore_records) 

2508 

2509 def store_datastore_records(self, refs: Mapping[str, DatasetRef]) -> None: 

2510 """Store datastore records for given refs. 

2511 

2512 Parameters 

2513 ---------- 

2514 refs : `~collections.abc.Mapping` [`str`, `DatasetRef`] 

2515 Mapping of a datastore name to dataset reference stored in that 

2516 datastore, reference must include datastore records. 

2517 """ 

2518 for datastore_name, ref in refs.items(): 

2519 # Store ref IDs in the bridge table. 

2520 bridge = self._managers.datastores.register(datastore_name) 

2521 bridge.insert([ref]) 

2522 

2523 # store records in opaque tables 

2524 assert ref._datastore_records is not None, "Dataset ref must have datastore records" 

2525 for table_name, records in ref._datastore_records.items(): 

2526 opaque_table = self._managers.opaque.get(table_name) 

2527 assert opaque_table is not None, f"Unexpected opaque table name {table_name}" 

2528 opaque_table.insert(*(record.to_record(dataset_id=ref.id) for record in records)) 

2529 

2530 def make_datastore_tables(self, tables: Mapping[str, DatastoreOpaqueTable]) -> None: 

2531 """Create opaque tables used by datastores. 

2532 

2533 Parameters 

2534 ---------- 

2535 tables : `~collections.abc.Mapping` 

2536 Maps opaque table name to its definition. 

2537 

2538 Notes 

2539 ----- 

2540 This method should disappear in the future when opaque table 

2541 definitions will be provided during `Registry` construction. 

2542 """ 

2543 datastore_record_classes = {} 

2544 for table_name, table_def in tables.items(): 

2545 datastore_record_classes[table_name] = table_def.record_class 

2546 try: 

2547 self._managers.opaque.register(table_name, table_def.table_spec) 

2548 except ReadOnlyDatabaseError: 

2549 # If the database is read only and we just tried and failed to 

2550 # create a table, it means someone is trying to create a 

2551 # read-only butler client for an empty repo. That should be 

2552 # okay, as long as they then try to get any datasets before 

2553 # some other client creates the table. Chances are they're 

2554 # just validating configuration. 

2555 pass 

2556 self._datastore_record_classes = datastore_record_classes 

2557 

2558 @property 

2559 def obsCoreTableManager(self) -> ObsCoreTableManager | None: 

2560 """The ObsCore manager instance for this registry 

2561 (`~.interfaces.ObsCoreTableManager` 

2562 or `None`). 

2563 

2564 ObsCore manager may not be implemented for all registry backend, or 

2565 may not be enabled for many repositories. 

2566 """ 

2567 return self._managers.obscore 

2568 

2569 storageClasses: StorageClassFactory 

2570 """All storage classes known to the registry (`StorageClassFactory`). 

2571 """ 

2572 

2573 _defaults: RegistryDefaults 

2574 """Default collections used for registry queries (`RegistryDefaults`)."""