Coverage for python/lsst/daf/butler/registry/sql_registry.py: 18%

580 statements  

« prev     ^ index     » next       coverage.py v7.5.1, created at 2024-05-16 02:58 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30from .. import ddl 

31 

32__all__ = ("SqlRegistry",) 

33 

34import contextlib 

35import logging 

36import warnings 

37from collections.abc import Iterable, Iterator, Mapping, Sequence 

38from typing import TYPE_CHECKING, Any, Literal, cast 

39 

40import sqlalchemy 

41from lsst.daf.relation import LeafRelation, Relation 

42from lsst.resources import ResourcePathExpression 

43from lsst.utils.introspection import find_outside_stacklevel 

44from lsst.utils.iteration import ensure_iterable 

45 

46from .._column_tags import DatasetColumnTag 

47from .._config import Config 

48from .._dataset_association import DatasetAssociation 

49from .._dataset_ref import DatasetId, DatasetIdGenEnum, DatasetRef 

50from .._dataset_type import DatasetType 

51from .._exceptions import ( 

52 CalibrationLookupError, 

53 DataIdValueError, 

54 DimensionNameError, 

55 InconsistentDataIdError, 

56) 

57from .._named import NamedKeyMapping, NameLookupMapping 

58from .._storage_class import StorageClassFactory 

59from .._timespan import Timespan 

60from ..dimensions import ( 

61 DataCoordinate, 

62 DataId, 

63 Dimension, 

64 DimensionConfig, 

65 DimensionElement, 

66 DimensionGraph, 

67 DimensionGroup, 

68 DimensionRecord, 

69 DimensionUniverse, 

70) 

71from ..dimensions.record_cache import DimensionRecordCache 

72from ..progress import Progress 

73from ..registry import ( 

74 ArgumentError, 

75 CollectionExpressionError, 

76 CollectionSummary, 

77 CollectionType, 

78 CollectionTypeError, 

79 ConflictingDefinitionError, 

80 DatasetTypeError, 

81 MissingDatasetTypeError, 

82 NoDefaultCollectionError, 

83 OrphanedRecordError, 

84 RegistryConfig, 

85 RegistryConsistencyError, 

86 RegistryDefaults, 

87 queries, 

88) 

89from ..registry.interfaces import ChainedCollectionRecord, ReadOnlyDatabaseError, RunRecord 

90from ..registry.managers import RegistryManagerInstances, RegistryManagerTypes 

91from ..registry.wildcards import CollectionWildcard, DatasetTypeWildcard 

92from ..utils import _DefaultMarker, _Marker, transactional 

93 

94if TYPE_CHECKING: 

95 from .._butler_config import ButlerConfig 

96 from ..datastore._datastore import DatastoreOpaqueTable 

97 from ..datastore.stored_file_info import StoredDatastoreItemInfo 

98 from ..registry._registry import CollectionArgType 

99 from ..registry.interfaces import ( 

100 CollectionRecord, 

101 Database, 

102 DatastoreRegistryBridgeManager, 

103 ObsCoreTableManager, 

104 ) 

105 

106 

107_LOG = logging.getLogger(__name__) 

108 

109 

110class SqlRegistry: 

111 """Butler Registry implementation that uses SQL database as backend. 

112 

113 Parameters 

114 ---------- 

115 database : `Database` 

116 Database instance to store Registry. 

117 defaults : `RegistryDefaults` 

118 Default collection search path and/or output `~CollectionType.RUN` 

119 collection. 

120 managers : `RegistryManagerInstances` 

121 All the managers required for this registry. 

122 """ 

123 

124 defaultConfigFile: str | None = None 

125 """Path to configuration defaults. Accessed within the ``configs`` resource 

126 or relative to a search path. Can be None if no defaults specified. 

127 """ 

128 

129 @classmethod 

130 def forceRegistryConfig( 

131 cls, config: ButlerConfig | RegistryConfig | Config | str | None 

132 ) -> RegistryConfig: 

133 """Force the supplied config to a `RegistryConfig`. 

134 

135 Parameters 

136 ---------- 

137 config : `RegistryConfig`, `Config` or `str` or `None` 

138 Registry configuration, if missing then default configuration will 

139 be loaded from registry.yaml. 

140 

141 Returns 

142 ------- 

143 registry_config : `RegistryConfig` 

144 A registry config. 

145 """ 

146 if not isinstance(config, RegistryConfig): 

147 if isinstance(config, str | Config) or config is None: 

148 config = RegistryConfig(config) 

149 else: 

150 raise ValueError(f"Incompatible Registry configuration: {config}") 

151 return config 

152 

153 @classmethod 

154 def createFromConfig( 

155 cls, 

156 config: RegistryConfig | str | None = None, 

157 dimensionConfig: DimensionConfig | str | None = None, 

158 butlerRoot: ResourcePathExpression | None = None, 

159 ) -> SqlRegistry: 

160 """Create registry database and return `SqlRegistry` instance. 

161 

162 This method initializes database contents, database must be empty 

163 prior to calling this method. 

164 

165 Parameters 

166 ---------- 

167 config : `RegistryConfig` or `str`, optional 

168 Registry configuration, if missing then default configuration will 

169 be loaded from registry.yaml. 

170 dimensionConfig : `DimensionConfig` or `str`, optional 

171 Dimensions configuration, if missing then default configuration 

172 will be loaded from dimensions.yaml. 

173 butlerRoot : convertible to `lsst.resources.ResourcePath`, optional 

174 Path to the repository root this `SqlRegistry` will manage. 

175 

176 Returns 

177 ------- 

178 registry : `SqlRegistry` 

179 A new `SqlRegistry` instance. 

180 """ 

181 config = cls.forceRegistryConfig(config) 

182 config.replaceRoot(butlerRoot) 

183 

184 if isinstance(dimensionConfig, str): 

185 dimensionConfig = DimensionConfig(dimensionConfig) 

186 elif dimensionConfig is None: 

187 dimensionConfig = DimensionConfig() 

188 elif not isinstance(dimensionConfig, DimensionConfig): 

189 raise TypeError(f"Incompatible Dimension configuration type: {type(dimensionConfig)}") 

190 

191 DatabaseClass = config.getDatabaseClass() 

192 database = DatabaseClass.fromUri( 

193 config.connectionString, origin=config.get("origin", 0), namespace=config.get("namespace") 

194 ) 

195 managerTypes = RegistryManagerTypes.fromConfig(config) 

196 managers = managerTypes.makeRepo(database, dimensionConfig) 

197 return cls(database, RegistryDefaults(), managers) 

198 

199 @classmethod 

200 def fromConfig( 

201 cls, 

202 config: ButlerConfig | RegistryConfig | Config | str, 

203 butlerRoot: ResourcePathExpression | None = None, 

204 writeable: bool = True, 

205 defaults: RegistryDefaults | None = None, 

206 ) -> SqlRegistry: 

207 """Create `Registry` subclass instance from `config`. 

208 

209 Registry database must be initialized prior to calling this method. 

210 

211 Parameters 

212 ---------- 

213 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str` 

214 Registry configuration. 

215 butlerRoot : `lsst.resources.ResourcePathExpression`, optional 

216 Path to the repository root this `Registry` will manage. 

217 writeable : `bool`, optional 

218 If `True` (default) create a read-write connection to the database. 

219 defaults : `RegistryDefaults`, optional 

220 Default collection search path and/or output `~CollectionType.RUN` 

221 collection. 

222 

223 Returns 

224 ------- 

225 registry : `SqlRegistry` 

226 A new `SqlRegistry` subclass instance. 

227 """ 

228 config = cls.forceRegistryConfig(config) 

229 config.replaceRoot(butlerRoot) 

230 DatabaseClass = config.getDatabaseClass() 

231 database = DatabaseClass.fromUri( 

232 config.connectionString, 

233 origin=config.get("origin", 0), 

234 namespace=config.get("namespace"), 

235 writeable=writeable, 

236 ) 

237 managerTypes = RegistryManagerTypes.fromConfig(config) 

238 with database.session(): 

239 managers = managerTypes.loadRepo(database) 

240 if defaults is None: 

241 defaults = RegistryDefaults() 

242 return cls(database, defaults, managers) 

243 

244 def __init__( 

245 self, 

246 database: Database, 

247 defaults: RegistryDefaults, 

248 managers: RegistryManagerInstances, 

249 ): 

250 self._db = database 

251 self._managers = managers 

252 self.storageClasses = StorageClassFactory() 

253 # This is public to SqlRegistry's internal-to-daf_butler callers, but 

254 # it is intentionally not part of RegistryShim. 

255 self.dimension_record_cache = DimensionRecordCache( 

256 self._managers.dimensions.universe, 

257 fetch=self._managers.dimensions.fetch_cache_dict, 

258 ) 

259 # Intentionally invoke property setter to initialize defaults. This 

260 # can only be done after most of the rest of Registry has already been 

261 # initialized, and must be done before the property getter is used. 

262 self.defaults = defaults 

263 # TODO: This is currently initialized by `make_datastore_tables`, 

264 # eventually we'll need to do it during construction. 

265 # The mapping is indexed by the opaque table name. 

266 self._datastore_record_classes: Mapping[str, type[StoredDatastoreItemInfo]] = {} 

267 

268 def __str__(self) -> str: 

269 return str(self._db) 

270 

271 def __repr__(self) -> str: 

272 return f"SqlRegistry({self._db!r}, {self.dimensions!r})" 

273 

274 def isWriteable(self) -> bool: 

275 """Return `True` if this registry allows write operations, and `False` 

276 otherwise. 

277 """ 

278 return self._db.isWriteable() 

279 

280 def copy(self, defaults: RegistryDefaults | None = None) -> SqlRegistry: 

281 """Create a new `SqlRegistry` backed by the same data repository 

282 as this one and sharing a database connection pool with it, but with 

283 independent defaults and database sessions. 

284 

285 Parameters 

286 ---------- 

287 defaults : `~lsst.daf.butler.registry.RegistryDefaults`, optional 

288 Default collections and data ID values for the new registry. If 

289 not provided, ``self.defaults`` will be used (but future changes 

290 to either registry's defaults will not affect the other). 

291 

292 Returns 

293 ------- 

294 copy : `SqlRegistry` 

295 A new `SqlRegistry` instance with its own defaults. 

296 """ 

297 if defaults is None: 

298 # No need to copy, because `RegistryDefaults` is immutable; we 

299 # effectively copy on write. 

300 defaults = self.defaults 

301 db = self._db.clone() 

302 result = SqlRegistry(db, defaults, self._managers.clone(db)) 

303 result._datastore_record_classes = dict(self._datastore_record_classes) 

304 result.dimension_record_cache.load_from(self.dimension_record_cache) 

305 return result 

306 

307 @property 

308 def dimensions(self) -> DimensionUniverse: 

309 """Definitions of all dimensions recognized by this `Registry` 

310 (`DimensionUniverse`). 

311 """ 

312 return self._managers.dimensions.universe 

313 

314 @property 

315 def defaults(self) -> RegistryDefaults: 

316 """Default collection search path and/or output `~CollectionType.RUN` 

317 collection (`~lsst.daf.butler.registry.RegistryDefaults`). 

318 

319 This is an immutable struct whose components may not be set 

320 individually, but the entire struct can be set by assigning to this 

321 property. 

322 """ 

323 return self._defaults 

324 

325 @defaults.setter 

326 def defaults(self, value: RegistryDefaults) -> None: 

327 if value.run is not None: 

328 self.registerRun(value.run) 

329 value.finish(self) 

330 self._defaults = value 

331 

332 def refresh(self) -> None: 

333 """Refresh all in-memory state by querying the database. 

334 

335 This may be necessary to enable querying for entities added by other 

336 registry instances after this one was constructed. 

337 """ 

338 self.dimension_record_cache.reset() 

339 with self._db.transaction(): 

340 self._managers.refresh() 

341 

342 def caching_context(self) -> contextlib.AbstractContextManager[None]: 

343 """Return context manager that enables caching. 

344 

345 Returns 

346 ------- 

347 manager 

348 A context manager that enables client-side caching. Entering 

349 the context returns `None`. 

350 """ 

351 return self._managers.caching_context_manager() 

352 

353 @contextlib.contextmanager 

354 def transaction(self, *, savepoint: bool = False) -> Iterator[None]: 

355 """Return a context manager that represents a transaction. 

356 

357 Parameters 

358 ---------- 

359 savepoint : `bool` 

360 Whether to issue a SAVEPOINT in the database. 

361 

362 Yields 

363 ------ 

364 `None` 

365 """ 

366 with self._db.transaction(savepoint=savepoint): 

367 yield 

368 

369 def resetConnectionPool(self) -> None: 

370 """Reset SQLAlchemy connection pool for `SqlRegistry` database. 

371 

372 This operation is useful when using registry with fork-based 

373 multiprocessing. To use registry across fork boundary one has to make 

374 sure that there are no currently active connections (no session or 

375 transaction is in progress) and connection pool is reset using this 

376 method. This method should be called by the child process immediately 

377 after the fork. 

378 """ 

379 self._db._engine.dispose() 

380 

381 def registerOpaqueTable(self, tableName: str, spec: ddl.TableSpec) -> None: 

382 """Add an opaque (to the `Registry`) table for use by a `Datastore` or 

383 other data repository client. 

384 

385 Opaque table records can be added via `insertOpaqueData`, retrieved via 

386 `fetchOpaqueData`, and removed via `deleteOpaqueData`. 

387 

388 Parameters 

389 ---------- 

390 tableName : `str` 

391 Logical name of the opaque table. This may differ from the 

392 actual name used in the database by a prefix and/or suffix. 

393 spec : `ddl.TableSpec` 

394 Specification for the table to be added. 

395 """ 

396 self._managers.opaque.register(tableName, spec) 

397 

398 @transactional 

399 def insertOpaqueData(self, tableName: str, *data: dict) -> None: 

400 """Insert records into an opaque table. 

401 

402 Parameters 

403 ---------- 

404 tableName : `str` 

405 Logical name of the opaque table. Must match the name used in a 

406 previous call to `registerOpaqueTable`. 

407 *data 

408 Each additional positional argument is a dictionary that represents 

409 a single row to be added. 

410 """ 

411 self._managers.opaque[tableName].insert(*data) 

412 

413 def fetchOpaqueData(self, tableName: str, **where: Any) -> Iterator[Mapping[str, Any]]: 

414 """Retrieve records from an opaque table. 

415 

416 Parameters 

417 ---------- 

418 tableName : `str` 

419 Logical name of the opaque table. Must match the name used in a 

420 previous call to `registerOpaqueTable`. 

421 **where 

422 Additional keyword arguments are interpreted as equality 

423 constraints that restrict the returned rows (combined with AND); 

424 keyword arguments are column names and values are the values they 

425 must have. 

426 

427 Yields 

428 ------ 

429 row : `dict` 

430 A dictionary representing a single result row. 

431 """ 

432 yield from self._managers.opaque[tableName].fetch(**where) 

433 

434 @transactional 

435 def deleteOpaqueData(self, tableName: str, **where: Any) -> None: 

436 """Remove records from an opaque table. 

437 

438 Parameters 

439 ---------- 

440 tableName : `str` 

441 Logical name of the opaque table. Must match the name used in a 

442 previous call to `registerOpaqueTable`. 

443 **where 

444 Additional keyword arguments are interpreted as equality 

445 constraints that restrict the deleted rows (combined with AND); 

446 keyword arguments are column names and values are the values they 

447 must have. 

448 """ 

449 self._managers.opaque[tableName].delete(where.keys(), where) 

450 

451 def registerCollection( 

452 self, name: str, type: CollectionType = CollectionType.TAGGED, doc: str | None = None 

453 ) -> bool: 

454 """Add a new collection if one with the given name does not exist. 

455 

456 Parameters 

457 ---------- 

458 name : `str` 

459 The name of the collection to create. 

460 type : `CollectionType` 

461 Enum value indicating the type of collection to create. 

462 doc : `str`, optional 

463 Documentation string for the collection. 

464 

465 Returns 

466 ------- 

467 registered : `bool` 

468 Boolean indicating whether the collection was already registered 

469 or was created by this call. 

470 

471 Notes 

472 ----- 

473 This method cannot be called within transactions, as it needs to be 

474 able to perform its own transaction to be concurrent. 

475 """ 

476 _, registered = self._managers.collections.register(name, type, doc=doc) 

477 return registered 

478 

479 def getCollectionType(self, name: str) -> CollectionType: 

480 """Return an enumeration value indicating the type of the given 

481 collection. 

482 

483 Parameters 

484 ---------- 

485 name : `str` 

486 The name of the collection. 

487 

488 Returns 

489 ------- 

490 type : `CollectionType` 

491 Enum value indicating the type of this collection. 

492 

493 Raises 

494 ------ 

495 lsst.daf.butler.registry.MissingCollectionError 

496 Raised if no collection with the given name exists. 

497 """ 

498 return self._managers.collections.find(name).type 

499 

500 def get_collection_record(self, name: str) -> CollectionRecord: 

501 """Return the record for this collection. 

502 

503 Parameters 

504 ---------- 

505 name : `str` 

506 Name of the collection for which the record is to be retrieved. 

507 

508 Returns 

509 ------- 

510 record : `CollectionRecord` 

511 The record for this collection. 

512 """ 

513 return self._managers.collections.find(name) 

514 

515 def registerRun(self, name: str, doc: str | None = None) -> bool: 

516 """Add a new run if one with the given name does not exist. 

517 

518 Parameters 

519 ---------- 

520 name : `str` 

521 The name of the run to create. 

522 doc : `str`, optional 

523 Documentation string for the collection. 

524 

525 Returns 

526 ------- 

527 registered : `bool` 

528 Boolean indicating whether a new run was registered. `False` 

529 if it already existed. 

530 

531 Notes 

532 ----- 

533 This method cannot be called within transactions, as it needs to be 

534 able to perform its own transaction to be concurrent. 

535 """ 

536 _, registered = self._managers.collections.register(name, CollectionType.RUN, doc=doc) 

537 return registered 

538 

539 @transactional 

540 def removeCollection(self, name: str) -> None: 

541 """Remove the given collection from the registry. 

542 

543 Parameters 

544 ---------- 

545 name : `str` 

546 The name of the collection to remove. 

547 

548 Raises 

549 ------ 

550 lsst.daf.butler.registry.MissingCollectionError 

551 Raised if no collection with the given name exists. 

552 sqlalchemy.exc.IntegrityError 

553 Raised if the database rows associated with the collection are 

554 still referenced by some other table, such as a dataset in a 

555 datastore (for `~CollectionType.RUN` collections only) or a 

556 `~CollectionType.CHAINED` collection of which this collection is 

557 a child. 

558 

559 Notes 

560 ----- 

561 If this is a `~CollectionType.RUN` collection, all datasets and quanta 

562 in it will removed from the `Registry` database. This requires that 

563 those datasets be removed (or at least trashed) from any datastores 

564 that hold them first. 

565 

566 A collection may not be deleted as long as it is referenced by a 

567 `~CollectionType.CHAINED` collection; the ``CHAINED`` collection must 

568 be deleted or redefined first. 

569 """ 

570 self._managers.collections.remove(name) 

571 

572 def getCollectionChain(self, parent: str) -> tuple[str, ...]: 

573 """Return the child collections in a `~CollectionType.CHAINED` 

574 collection. 

575 

576 Parameters 

577 ---------- 

578 parent : `str` 

579 Name of the chained collection. Must have already been added via 

580 a call to `Registry.registerCollection`. 

581 

582 Returns 

583 ------- 

584 children : `~collections.abc.Sequence` [ `str` ] 

585 An ordered sequence of collection names that are searched when the 

586 given chained collection is searched. 

587 

588 Raises 

589 ------ 

590 lsst.daf.butler.registry.MissingCollectionError 

591 Raised if ``parent`` does not exist in the `Registry`. 

592 lsst.daf.butler.registry.CollectionTypeError 

593 Raised if ``parent`` does not correspond to a 

594 `~CollectionType.CHAINED` collection. 

595 """ 

596 record = self._managers.collections.find(parent) 

597 if record.type is not CollectionType.CHAINED: 

598 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

599 assert isinstance(record, ChainedCollectionRecord) 

600 return record.children 

601 

602 @transactional 

603 def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None: 

604 """Define or redefine a `~CollectionType.CHAINED` collection. 

605 

606 Parameters 

607 ---------- 

608 parent : `str` 

609 Name of the chained collection. Must have already been added via 

610 a call to `Registry.registerCollection`. 

611 children : collection expression 

612 An expression defining an ordered search of child collections, 

613 generally an iterable of `str`; see 

614 :ref:`daf_butler_collection_expressions` for more information. 

615 flatten : `bool`, optional 

616 If `True` (`False` is default), recursively flatten out any nested 

617 `~CollectionType.CHAINED` collections in ``children`` first. 

618 

619 Raises 

620 ------ 

621 lsst.daf.butler.registry.MissingCollectionError 

622 Raised when any of the given collections do not exist in the 

623 `Registry`. 

624 lsst.daf.butler.registry.CollectionTypeError 

625 Raised if ``parent`` does not correspond to a 

626 `~CollectionType.CHAINED` collection. 

627 CollectionCycleError 

628 Raised if the given collections contains a cycle. 

629 

630 Notes 

631 ----- 

632 If this function is called within a call to ``Butler.transaction``, it 

633 will hold a lock that prevents other processes from modifying the 

634 parent collection until the end of the transaction. Keep these 

635 transactions short. 

636 """ 

637 children = CollectionWildcard.from_expression(children).require_ordered() 

638 if flatten: 

639 children = self.queryCollections(children, flattenChains=True) 

640 

641 self._managers.collections.update_chain(parent, list(children), allow_use_in_caching_context=True) 

642 

643 def getCollectionParentChains(self, collection: str) -> set[str]: 

644 """Return the CHAINED collections that directly contain the given one. 

645 

646 Parameters 

647 ---------- 

648 collection : `str` 

649 Name of the collection. 

650 

651 Returns 

652 ------- 

653 chains : `set` of `str` 

654 Set of `~CollectionType.CHAINED` collection names. 

655 """ 

656 return self._managers.collections.getParentChains(self._managers.collections.find(collection).key) 

657 

658 def getCollectionDocumentation(self, collection: str) -> str | None: 

659 """Retrieve the documentation string for a collection. 

660 

661 Parameters 

662 ---------- 

663 collection : `str` 

664 Name of the collection. 

665 

666 Returns 

667 ------- 

668 docs : `str` or `None` 

669 Docstring for the collection with the given name. 

670 """ 

671 return self._managers.collections.getDocumentation(self._managers.collections.find(collection).key) 

672 

673 def setCollectionDocumentation(self, collection: str, doc: str | None) -> None: 

674 """Set the documentation string for a collection. 

675 

676 Parameters 

677 ---------- 

678 collection : `str` 

679 Name of the collection. 

680 doc : `str` or `None` 

681 Docstring for the collection with the given name; will replace any 

682 existing docstring. Passing `None` will remove any existing 

683 docstring. 

684 """ 

685 self._managers.collections.setDocumentation(self._managers.collections.find(collection).key, doc) 

686 

687 def getCollectionSummary(self, collection: str) -> CollectionSummary: 

688 """Return a summary for the given collection. 

689 

690 Parameters 

691 ---------- 

692 collection : `str` 

693 Name of the collection for which a summary is to be retrieved. 

694 

695 Returns 

696 ------- 

697 summary : `~lsst.daf.butler.registry.CollectionSummary` 

698 Summary of the dataset types and governor dimension values in 

699 this collection. 

700 """ 

701 record = self._managers.collections.find(collection) 

702 return self._managers.datasets.getCollectionSummary(record) 

703 

704 def registerDatasetType(self, datasetType: DatasetType) -> bool: 

705 """Add a new `DatasetType` to the Registry. 

706 

707 It is not an error to register the same `DatasetType` twice. 

708 

709 Parameters 

710 ---------- 

711 datasetType : `DatasetType` 

712 The `DatasetType` to be added. 

713 

714 Returns 

715 ------- 

716 inserted : `bool` 

717 `True` if ``datasetType`` was inserted, `False` if an identical 

718 existing `DatasetType` was found. Note that in either case the 

719 DatasetType is guaranteed to be defined in the Registry 

720 consistently with the given definition. 

721 

722 Raises 

723 ------ 

724 ValueError 

725 Raised if the dimensions or storage class are invalid. 

726 lsst.daf.butler.registry.ConflictingDefinitionError 

727 Raised if this `DatasetType` is already registered with a different 

728 definition. 

729 

730 Notes 

731 ----- 

732 This method cannot be called within transactions, as it needs to be 

733 able to perform its own transaction to be concurrent. 

734 """ 

735 return self._managers.datasets.register(datasetType) 

736 

737 def removeDatasetType(self, name: str | tuple[str, ...]) -> None: 

738 """Remove the named `DatasetType` from the registry. 

739 

740 .. warning:: 

741 

742 Registry implementations can cache the dataset type definitions. 

743 This means that deleting the dataset type definition may result in 

744 unexpected behavior from other butler processes that are active 

745 that have not seen the deletion. 

746 

747 Parameters 

748 ---------- 

749 name : `str` or `tuple` [`str`] 

750 Name of the type to be removed or tuple containing a list of type 

751 names to be removed. Wildcards are allowed. 

752 

753 Raises 

754 ------ 

755 lsst.daf.butler.registry.OrphanedRecordError 

756 Raised if an attempt is made to remove the dataset type definition 

757 when there are already datasets associated with it. 

758 

759 Notes 

760 ----- 

761 If the dataset type is not registered the method will return without 

762 action. 

763 """ 

764 for datasetTypeExpression in ensure_iterable(name): 

765 # Catch any warnings from the caller specifying a component 

766 # dataset type. This will result in an error later but the 

767 # warning could be confusing when the caller is not querying 

768 # anything. 

769 with warnings.catch_warnings(): 

770 warnings.simplefilter("ignore", category=FutureWarning) 

771 datasetTypes = list(self.queryDatasetTypes(datasetTypeExpression)) 

772 if not datasetTypes: 

773 _LOG.info("Dataset type %r not defined", datasetTypeExpression) 

774 else: 

775 for datasetType in datasetTypes: 

776 self._managers.datasets.remove(datasetType.name) 

777 _LOG.info("Removed dataset type %r", datasetType.name) 

778 

779 def getDatasetType(self, name: str) -> DatasetType: 

780 """Get the `DatasetType`. 

781 

782 Parameters 

783 ---------- 

784 name : `str` 

785 Name of the type. 

786 

787 Returns 

788 ------- 

789 type : `DatasetType` 

790 The `DatasetType` associated with the given name. 

791 

792 Raises 

793 ------ 

794 lsst.daf.butler.registry.MissingDatasetTypeError 

795 Raised if the requested dataset type has not been registered. 

796 

797 Notes 

798 ----- 

799 This method handles component dataset types automatically, though most 

800 other registry operations do not. 

801 """ 

802 parent_name, component = DatasetType.splitDatasetTypeName(name) 

803 storage = self._managers.datasets[parent_name] 

804 if component is None: 

805 return storage.datasetType 

806 else: 

807 return storage.datasetType.makeComponentDatasetType(component) 

808 

809 def supportsIdGenerationMode(self, mode: DatasetIdGenEnum) -> bool: 

810 """Test whether the given dataset ID generation mode is supported by 

811 `insertDatasets`. 

812 

813 Parameters 

814 ---------- 

815 mode : `DatasetIdGenEnum` 

816 Enum value for the mode to test. 

817 

818 Returns 

819 ------- 

820 supported : `bool` 

821 Whether the given mode is supported. 

822 """ 

823 return self._managers.datasets.supportsIdGenerationMode(mode) 

824 

825 def findDataset( 

826 self, 

827 datasetType: DatasetType | str, 

828 dataId: DataId | None = None, 

829 *, 

830 collections: CollectionArgType | None = None, 

831 timespan: Timespan | None = None, 

832 datastore_records: bool = False, 

833 **kwargs: Any, 

834 ) -> DatasetRef | None: 

835 """Find a dataset given its `DatasetType` and data ID. 

836 

837 This can be used to obtain a `DatasetRef` that permits the dataset to 

838 be read from a `Datastore`. If the dataset is a component and can not 

839 be found using the provided dataset type, a dataset ref for the parent 

840 will be returned instead but with the correct dataset type. 

841 

842 Parameters 

843 ---------- 

844 datasetType : `DatasetType` or `str` 

845 A `DatasetType` or the name of one. If this is a `DatasetType` 

846 instance, its storage class will be respected and propagated to 

847 the output, even if it differs from the dataset type definition 

848 in the registry, as long as the storage classes are convertible. 

849 dataId : `dict` or `DataCoordinate`, optional 

850 A `dict`-like object containing the `Dimension` links that identify 

851 the dataset within a collection. 

852 collections : collection expression, optional 

853 An expression that fully or partially identifies the collections to 

854 search for the dataset; see 

855 :ref:`daf_butler_collection_expressions` for more information. 

856 Defaults to ``self.defaults.collections``. 

857 timespan : `Timespan`, optional 

858 A timespan that the validity range of the dataset must overlap. 

859 If not provided, any `~CollectionType.CALIBRATION` collections 

860 matched by the ``collections`` argument will not be searched. 

861 datastore_records : `bool`, optional 

862 Whether to attach datastore records to the `DatasetRef`. 

863 **kwargs 

864 Additional keyword arguments passed to 

865 `DataCoordinate.standardize` to convert ``dataId`` to a true 

866 `DataCoordinate` or augment an existing one. 

867 

868 Returns 

869 ------- 

870 ref : `DatasetRef` 

871 A reference to the dataset, or `None` if no matching Dataset 

872 was found. 

873 

874 Raises 

875 ------ 

876 lsst.daf.butler.registry.NoDefaultCollectionError 

877 Raised if ``collections`` is `None` and 

878 ``self.defaults.collections`` is `None`. 

879 LookupError 

880 Raised if one or more data ID keys are missing. 

881 lsst.daf.butler.registry.MissingDatasetTypeError 

882 Raised if the dataset type does not exist. 

883 lsst.daf.butler.registry.MissingCollectionError 

884 Raised if any of ``collections`` does not exist in the registry. 

885 

886 Notes 

887 ----- 

888 This method simply returns `None` and does not raise an exception even 

889 when the set of collections searched is intrinsically incompatible with 

890 the dataset type, e.g. if ``datasetType.isCalibration() is False``, but 

891 only `~CollectionType.CALIBRATION` collections are being searched. 

892 This may make it harder to debug some lookup failures, but the behavior 

893 is intentional; we consider it more important that failed searches are 

894 reported consistently, regardless of the reason, and that adding 

895 additional collections that do not contain a match to the search path 

896 never changes the behavior. 

897 

898 This method handles component dataset types automatically, though most 

899 other registry operations do not. 

900 """ 

901 if collections is None: 

902 if not self.defaults.collections: 

903 raise NoDefaultCollectionError( 

904 "No collections provided to findDataset, and no defaults from registry construction." 

905 ) 

906 collections = self.defaults.collections 

907 backend = queries.SqlQueryBackend(self._db, self._managers, self.dimension_record_cache) 

908 with backend.caching_context(): 

909 collection_wildcard = CollectionWildcard.from_expression(collections, require_ordered=True) 

910 if collection_wildcard.empty(): 

911 return None 

912 matched_collections = backend.resolve_collection_wildcard(collection_wildcard) 

913 resolved_dataset_type = backend.resolve_single_dataset_type_wildcard(datasetType) 

914 dataId = DataCoordinate.standardize( 

915 dataId, 

916 dimensions=resolved_dataset_type.dimensions, 

917 universe=self.dimensions, 

918 defaults=self.defaults.dataId, 

919 **kwargs, 

920 ) 

921 governor_constraints = {name: {cast(str, dataId[name])} for name in dataId.dimensions.governors} 

922 (filtered_collections,) = backend.filter_dataset_collections( 

923 [resolved_dataset_type], 

924 matched_collections, 

925 governor_constraints=governor_constraints, 

926 ).values() 

927 if not filtered_collections: 

928 return None 

929 if timespan is None: 

930 filtered_collections = [ 

931 collection_record 

932 for collection_record in filtered_collections 

933 if collection_record.type is not CollectionType.CALIBRATION 

934 ] 

935 if filtered_collections: 

936 requested_columns = {"dataset_id", "run", "collection"} 

937 with backend.context() as context: 

938 predicate = context.make_data_coordinate_predicate( 

939 dataId.subset(resolved_dataset_type.dimensions), full=False 

940 ) 

941 if timespan is not None: 

942 requested_columns.add("timespan") 

943 predicate = predicate.logical_and( 

944 context.make_timespan_overlap_predicate( 

945 DatasetColumnTag(resolved_dataset_type.name, "timespan"), timespan 

946 ) 

947 ) 

948 relation = backend.make_dataset_query_relation( 

949 resolved_dataset_type, filtered_collections, requested_columns, context 

950 ).with_rows_satisfying(predicate) 

951 rows = list(context.fetch_iterable(relation)) 

952 else: 

953 rows = [] 

954 if not rows: 

955 return None 

956 elif len(rows) == 1: 

957 best_row = rows[0] 

958 else: 

959 rank_by_collection_key = {record.key: n for n, record in enumerate(filtered_collections)} 

960 collection_tag = DatasetColumnTag(resolved_dataset_type.name, "collection") 

961 row_iter = iter(rows) 

962 best_row = next(row_iter) 

963 best_rank = rank_by_collection_key[best_row[collection_tag]] 

964 have_tie = False 

965 for row in row_iter: 

966 if (rank := rank_by_collection_key[row[collection_tag]]) < best_rank: 

967 best_row = row 

968 best_rank = rank 

969 have_tie = False 

970 elif rank == best_rank: 

971 have_tie = True 

972 assert timespan is not None, "Rank ties should be impossible given DB constraints." 

973 if have_tie: 

974 raise CalibrationLookupError( 

975 f"Ambiguous calibration lookup for {resolved_dataset_type.name} in collections " 

976 f"{collection_wildcard.strings} with timespan {timespan}." 

977 ) 

978 reader = queries.DatasetRefReader( 

979 resolved_dataset_type, 

980 translate_collection=lambda k: self._managers.collections[k].name, 

981 ) 

982 ref = reader.read(best_row, data_id=dataId) 

983 if datastore_records: 

984 ref = self.get_datastore_records(ref) 

985 

986 return ref 

987 

988 @transactional 

989 def insertDatasets( 

990 self, 

991 datasetType: DatasetType | str, 

992 dataIds: Iterable[DataId], 

993 run: str | None = None, 

994 expand: bool = True, 

995 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

996 ) -> list[DatasetRef]: 

997 """Insert one or more datasets into the `Registry`. 

998 

999 This always adds new datasets; to associate existing datasets with 

1000 a new collection, use ``associate``. 

1001 

1002 Parameters 

1003 ---------- 

1004 datasetType : `DatasetType` or `str` 

1005 A `DatasetType` or the name of one. 

1006 dataIds : `~collections.abc.Iterable` of `dict` or `DataCoordinate` 

1007 Dimension-based identifiers for the new datasets. 

1008 run : `str`, optional 

1009 The name of the run that produced the datasets. Defaults to 

1010 ``self.defaults.run``. 

1011 expand : `bool`, optional 

1012 If `True` (default), expand data IDs as they are inserted. This is 

1013 necessary in general to allow datastore to generate file templates, 

1014 but it may be disabled if the caller can guarantee this is 

1015 unnecessary. 

1016 idGenerationMode : `DatasetIdGenEnum`, optional 

1017 Specifies option for generating dataset IDs. By default unique IDs 

1018 are generated for each inserted dataset. 

1019 

1020 Returns 

1021 ------- 

1022 refs : `list` of `DatasetRef` 

1023 Resolved `DatasetRef` instances for all given data IDs (in the same 

1024 order). 

1025 

1026 Raises 

1027 ------ 

1028 lsst.daf.butler.registry.DatasetTypeError 

1029 Raised if ``datasetType`` is not known to registry. 

1030 lsst.daf.butler.registry.CollectionTypeError 

1031 Raised if ``run`` collection type is not `~CollectionType.RUN`. 

1032 lsst.daf.butler.registry.NoDefaultCollectionError 

1033 Raised if ``run`` is `None` and ``self.defaults.run`` is `None`. 

1034 lsst.daf.butler.registry.ConflictingDefinitionError 

1035 If a dataset with the same dataset type and data ID as one of those 

1036 given already exists in ``run``. 

1037 lsst.daf.butler.registry.MissingCollectionError 

1038 Raised if ``run`` does not exist in the registry. 

1039 """ 

1040 if isinstance(datasetType, DatasetType): 

1041 storage = self._managers.datasets.find(datasetType.name) 

1042 if storage is None: 

1043 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.") 

1044 else: 

1045 storage = self._managers.datasets.find(datasetType) 

1046 if storage is None: 

1047 raise DatasetTypeError(f"DatasetType with name '{datasetType}' has not been registered.") 

1048 if run is None: 

1049 if self.defaults.run is None: 

1050 raise NoDefaultCollectionError( 

1051 "No run provided to insertDatasets, and no default from registry construction." 

1052 ) 

1053 run = self.defaults.run 

1054 runRecord = self._managers.collections.find(run) 

1055 if runRecord.type is not CollectionType.RUN: 

1056 raise CollectionTypeError( 

1057 f"Given collection is of type {runRecord.type.name}; RUN collection required." 

1058 ) 

1059 assert isinstance(runRecord, RunRecord) 

1060 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG) 

1061 if expand: 

1062 expandedDataIds = [ 

1063 self.expandDataId(dataId, dimensions=storage.datasetType.dimensions) 

1064 for dataId in progress.wrap(dataIds, f"Expanding {storage.datasetType.name} data IDs") 

1065 ] 

1066 else: 

1067 expandedDataIds = [ 

1068 DataCoordinate.standardize(dataId, graph=storage.datasetType.dimensions) for dataId in dataIds 

1069 ] 

1070 try: 

1071 refs = list(storage.insert(runRecord, expandedDataIds, idGenerationMode)) 

1072 if self._managers.obscore: 

1073 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

1074 self._managers.obscore.add_datasets(refs, context) 

1075 except sqlalchemy.exc.IntegrityError as err: 

1076 raise ConflictingDefinitionError( 

1077 "A database constraint failure was triggered by inserting " 

1078 f"one or more datasets of type {storage.datasetType} into " 

1079 f"collection '{run}'. " 

1080 "This probably means a dataset with the same data ID " 

1081 "and dataset type already exists, but it may also mean a " 

1082 "dimension row is missing." 

1083 ) from err 

1084 return refs 

1085 

1086 @transactional 

1087 def _importDatasets( 

1088 self, 

1089 datasets: Iterable[DatasetRef], 

1090 expand: bool = True, 

1091 ) -> list[DatasetRef]: 

1092 """Import one or more datasets into the `Registry`. 

1093 

1094 Difference from `insertDatasets` method is that this method accepts 

1095 `DatasetRef` instances which should already be resolved and have a 

1096 dataset ID. If registry supports globally-unique dataset IDs (e.g. 

1097 `uuid.UUID`) then datasets which already exist in the registry will be 

1098 ignored if imported again. 

1099 

1100 Parameters 

1101 ---------- 

1102 datasets : `~collections.abc.Iterable` of `DatasetRef` 

1103 Datasets to be inserted. All `DatasetRef` instances must have 

1104 identical ``datasetType`` and ``run`` attributes. ``run`` 

1105 attribute can be `None` and defaults to ``self.defaults.run``. 

1106 Datasets can specify ``id`` attribute which will be used for 

1107 inserted datasets. All dataset IDs must have the same type 

1108 (`int` or `uuid.UUID`), if type of dataset IDs does not match 

1109 configured backend then IDs will be ignored and new IDs will be 

1110 generated by backend. 

1111 expand : `bool`, optional 

1112 If `True` (default), expand data IDs as they are inserted. This is 

1113 necessary in general, but it may be disabled if the caller can 

1114 guarantee this is unnecessary. 

1115 

1116 Returns 

1117 ------- 

1118 refs : `list` of `DatasetRef` 

1119 Resolved `DatasetRef` instances for all given data IDs (in the same 

1120 order). If any of ``datasets`` has an ID which already exists in 

1121 the database then it will not be inserted or updated, but a 

1122 resolved `DatasetRef` will be returned for it in any case. 

1123 

1124 Raises 

1125 ------ 

1126 lsst.daf.butler.registry.NoDefaultCollectionError 

1127 Raised if ``run`` is `None` and ``self.defaults.run`` is `None`. 

1128 lsst.daf.butler.registry.DatasetTypeError 

1129 Raised if datasets correspond to more than one dataset type or 

1130 dataset type is not known to registry. 

1131 lsst.daf.butler.registry.ConflictingDefinitionError 

1132 If a dataset with the same dataset type and data ID as one of those 

1133 given already exists in ``run``. 

1134 lsst.daf.butler.registry.MissingCollectionError 

1135 Raised if ``run`` does not exist in the registry. 

1136 

1137 Notes 

1138 ----- 

1139 This method is considered package-private and internal to Butler 

1140 implementation. Clients outside daf_butler package should not use this 

1141 method. 

1142 """ 

1143 datasets = list(datasets) 

1144 if not datasets: 

1145 # nothing to do 

1146 return [] 

1147 

1148 # find dataset type 

1149 datasetTypes = {dataset.datasetType for dataset in datasets} 

1150 if len(datasetTypes) != 1: 

1151 raise DatasetTypeError(f"Multiple dataset types in input datasets: {datasetTypes}") 

1152 datasetType = datasetTypes.pop() 

1153 

1154 # get storage handler for this dataset type 

1155 storage = self._managers.datasets.find(datasetType.name) 

1156 if storage is None: 

1157 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.") 

1158 

1159 # find run name 

1160 runs = {dataset.run for dataset in datasets} 

1161 if len(runs) != 1: 

1162 raise ValueError(f"Multiple run names in input datasets: {runs}") 

1163 run = runs.pop() 

1164 

1165 runRecord = self._managers.collections.find(run) 

1166 if runRecord.type is not CollectionType.RUN: 

1167 raise CollectionTypeError( 

1168 f"Given collection '{runRecord.name}' is of type {runRecord.type.name};" 

1169 " RUN collection required." 

1170 ) 

1171 assert isinstance(runRecord, RunRecord) 

1172 

1173 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG) 

1174 if expand: 

1175 expandedDatasets = [ 

1176 dataset.expanded(self.expandDataId(dataset.dataId, dimensions=storage.datasetType.dimensions)) 

1177 for dataset in progress.wrap(datasets, f"Expanding {storage.datasetType.name} data IDs") 

1178 ] 

1179 else: 

1180 expandedDatasets = [ 

1181 DatasetRef(datasetType, dataset.dataId, id=dataset.id, run=dataset.run, conform=True) 

1182 for dataset in datasets 

1183 ] 

1184 

1185 try: 

1186 refs = list(storage.import_(runRecord, expandedDatasets)) 

1187 if self._managers.obscore: 

1188 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

1189 self._managers.obscore.add_datasets(refs, context) 

1190 except sqlalchemy.exc.IntegrityError as err: 

1191 raise ConflictingDefinitionError( 

1192 "A database constraint failure was triggered by inserting " 

1193 f"one or more datasets of type {storage.datasetType} into " 

1194 f"collection '{run}'. " 

1195 "This probably means a dataset with the same data ID " 

1196 "and dataset type already exists, but it may also mean a " 

1197 "dimension row is missing." 

1198 ) from err 

1199 # Check that imported dataset IDs match the input 

1200 for imported_ref, input_ref in zip(refs, datasets, strict=True): 

1201 if imported_ref.id != input_ref.id: 

1202 raise RegistryConsistencyError( 

1203 "Imported dataset ID differs from input dataset ID, " 

1204 f"input ref: {input_ref}, imported ref: {imported_ref}" 

1205 ) 

1206 return refs 

1207 

1208 def getDataset(self, id: DatasetId) -> DatasetRef | None: 

1209 """Retrieve a Dataset entry. 

1210 

1211 Parameters 

1212 ---------- 

1213 id : `DatasetId` 

1214 The unique identifier for the dataset. 

1215 

1216 Returns 

1217 ------- 

1218 ref : `DatasetRef` or `None` 

1219 A ref to the Dataset, or `None` if no matching Dataset 

1220 was found. 

1221 """ 

1222 return self._managers.datasets.getDatasetRef(id) 

1223 

1224 @transactional 

1225 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None: 

1226 """Remove datasets from the Registry. 

1227 

1228 The datasets will be removed unconditionally from all collections, and 

1229 any `Quantum` that consumed this dataset will instead be marked with 

1230 having a NULL input. `Datastore` records will *not* be deleted; the 

1231 caller is responsible for ensuring that the dataset has already been 

1232 removed from all Datastores. 

1233 

1234 Parameters 

1235 ---------- 

1236 refs : `~collections.abc.Iterable` [`DatasetRef`] 

1237 References to the datasets to be removed. Must include a valid 

1238 ``id`` attribute, and should be considered invalidated upon return. 

1239 

1240 Raises 

1241 ------ 

1242 lsst.daf.butler.AmbiguousDatasetError 

1243 Raised if any ``ref.id`` is `None`. 

1244 lsst.daf.butler.registry.OrphanedRecordError 

1245 Raised if any dataset is still present in any `Datastore`. 

1246 """ 

1247 progress = Progress("lsst.daf.butler.Registry.removeDatasets", level=logging.DEBUG) 

1248 for datasetType, refsForType in progress.iter_item_chunks( 

1249 DatasetRef.iter_by_type(refs), desc="Removing datasets by type" 

1250 ): 

1251 storage = self._managers.datasets[datasetType.name] 

1252 try: 

1253 storage.delete(refsForType) 

1254 except sqlalchemy.exc.IntegrityError as err: 

1255 raise OrphanedRecordError( 

1256 "One or more datasets is still present in one or more Datastores." 

1257 ) from err 

1258 

1259 @transactional 

1260 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

1261 """Add existing datasets to a `~CollectionType.TAGGED` collection. 

1262 

1263 If a DatasetRef with the same exact ID is already in a collection 

1264 nothing is changed. If a `DatasetRef` with the same `DatasetType` and 

1265 data ID but with different ID exists in the collection, 

1266 `~lsst.daf.butler.registry.ConflictingDefinitionError` is raised. 

1267 

1268 Parameters 

1269 ---------- 

1270 collection : `str` 

1271 Indicates the collection the datasets should be associated with. 

1272 refs : `~collections.abc.Iterable` [ `DatasetRef` ] 

1273 An iterable of resolved `DatasetRef` instances that already exist 

1274 in this `Registry`. 

1275 

1276 Raises 

1277 ------ 

1278 lsst.daf.butler.registry.ConflictingDefinitionError 

1279 If a Dataset with the given `DatasetRef` already exists in the 

1280 given collection. 

1281 lsst.daf.butler.registry.MissingCollectionError 

1282 Raised if ``collection`` does not exist in the registry. 

1283 lsst.daf.butler.registry.CollectionTypeError 

1284 Raise adding new datasets to the given ``collection`` is not 

1285 allowed. 

1286 """ 

1287 progress = Progress("lsst.daf.butler.Registry.associate", level=logging.DEBUG) 

1288 collectionRecord = self._managers.collections.find(collection) 

1289 if collectionRecord.type is not CollectionType.TAGGED: 

1290 raise CollectionTypeError( 

1291 f"Collection '{collection}' has type {collectionRecord.type.name}, not TAGGED." 

1292 ) 

1293 for datasetType, refsForType in progress.iter_item_chunks( 

1294 DatasetRef.iter_by_type(refs), desc="Associating datasets by type" 

1295 ): 

1296 storage = self._managers.datasets[datasetType.name] 

1297 try: 

1298 storage.associate(collectionRecord, refsForType) 

1299 if self._managers.obscore: 

1300 # If a TAGGED collection is being monitored by ObsCore 

1301 # manager then we may need to save the dataset. 

1302 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

1303 self._managers.obscore.associate(refsForType, collectionRecord, context) 

1304 except sqlalchemy.exc.IntegrityError as err: 

1305 raise ConflictingDefinitionError( 

1306 f"Constraint violation while associating dataset of type {datasetType.name} with " 

1307 f"collection {collection}. This probably means that one or more datasets with the same " 

1308 "dataset type and data ID already exist in the collection, but it may also indicate " 

1309 "that the datasets do not exist." 

1310 ) from err 

1311 

1312 @transactional 

1313 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

1314 """Remove existing datasets from a `~CollectionType.TAGGED` collection. 

1315 

1316 ``collection`` and ``ref`` combinations that are not currently 

1317 associated are silently ignored. 

1318 

1319 Parameters 

1320 ---------- 

1321 collection : `str` 

1322 The collection the datasets should no longer be associated with. 

1323 refs : `~collections.abc.Iterable` [ `DatasetRef` ] 

1324 An iterable of resolved `DatasetRef` instances that already exist 

1325 in this `Registry`. 

1326 

1327 Raises 

1328 ------ 

1329 lsst.daf.butler.AmbiguousDatasetError 

1330 Raised if any of the given dataset references is unresolved. 

1331 lsst.daf.butler.registry.MissingCollectionError 

1332 Raised if ``collection`` does not exist in the registry. 

1333 lsst.daf.butler.registry.CollectionTypeError 

1334 Raise adding new datasets to the given ``collection`` is not 

1335 allowed. 

1336 """ 

1337 progress = Progress("lsst.daf.butler.Registry.disassociate", level=logging.DEBUG) 

1338 collectionRecord = self._managers.collections.find(collection) 

1339 if collectionRecord.type is not CollectionType.TAGGED: 

1340 raise CollectionTypeError( 

1341 f"Collection '{collection}' has type {collectionRecord.type.name}; expected TAGGED." 

1342 ) 

1343 for datasetType, refsForType in progress.iter_item_chunks( 

1344 DatasetRef.iter_by_type(refs), desc="Disassociating datasets by type" 

1345 ): 

1346 storage = self._managers.datasets[datasetType.name] 

1347 storage.disassociate(collectionRecord, refsForType) 

1348 if self._managers.obscore: 

1349 self._managers.obscore.disassociate(refsForType, collectionRecord) 

1350 

1351 @transactional 

1352 def certify(self, collection: str, refs: Iterable[DatasetRef], timespan: Timespan) -> None: 

1353 """Associate one or more datasets with a calibration collection and a 

1354 validity range within it. 

1355 

1356 Parameters 

1357 ---------- 

1358 collection : `str` 

1359 The name of an already-registered `~CollectionType.CALIBRATION` 

1360 collection. 

1361 refs : `~collections.abc.Iterable` [ `DatasetRef` ] 

1362 Datasets to be associated. 

1363 timespan : `Timespan` 

1364 The validity range for these datasets within the collection. 

1365 

1366 Raises 

1367 ------ 

1368 lsst.daf.butler.AmbiguousDatasetError 

1369 Raised if any of the given `DatasetRef` instances is unresolved. 

1370 lsst.daf.butler.registry.ConflictingDefinitionError 

1371 Raised if the collection already contains a different dataset with 

1372 the same `DatasetType` and data ID and an overlapping validity 

1373 range. 

1374 lsst.daf.butler.registry.CollectionTypeError 

1375 Raised if ``collection`` is not a `~CollectionType.CALIBRATION` 

1376 collection or if one or more datasets are of a dataset type for 

1377 which `DatasetType.isCalibration` returns `False`. 

1378 """ 

1379 progress = Progress("lsst.daf.butler.Registry.certify", level=logging.DEBUG) 

1380 collectionRecord = self._managers.collections.find(collection) 

1381 for datasetType, refsForType in progress.iter_item_chunks( 

1382 DatasetRef.iter_by_type(refs), desc="Certifying datasets by type" 

1383 ): 

1384 storage = self._managers.datasets[datasetType.name] 

1385 storage.certify( 

1386 collectionRecord, 

1387 refsForType, 

1388 timespan, 

1389 context=queries.SqlQueryContext(self._db, self._managers.column_types), 

1390 ) 

1391 

1392 @transactional 

1393 def decertify( 

1394 self, 

1395 collection: str, 

1396 datasetType: str | DatasetType, 

1397 timespan: Timespan, 

1398 *, 

1399 dataIds: Iterable[DataId] | None = None, 

1400 ) -> None: 

1401 """Remove or adjust datasets to clear a validity range within a 

1402 calibration collection. 

1403 

1404 Parameters 

1405 ---------- 

1406 collection : `str` 

1407 The name of an already-registered `~CollectionType.CALIBRATION` 

1408 collection. 

1409 datasetType : `str` or `DatasetType` 

1410 Name or `DatasetType` instance for the datasets to be decertified. 

1411 timespan : `Timespan`, optional 

1412 The validity range to remove datasets from within the collection. 

1413 Datasets that overlap this range but are not contained by it will 

1414 have their validity ranges adjusted to not overlap it, which may 

1415 split a single dataset validity range into two. 

1416 dataIds : iterable [`dict` or `DataCoordinate`], optional 

1417 Data IDs that should be decertified within the given validity range 

1418 If `None`, all data IDs for ``self.datasetType`` will be 

1419 decertified. 

1420 

1421 Raises 

1422 ------ 

1423 lsst.daf.butler.registry.CollectionTypeError 

1424 Raised if ``collection`` is not a `~CollectionType.CALIBRATION` 

1425 collection or if ``datasetType.isCalibration() is False``. 

1426 """ 

1427 collectionRecord = self._managers.collections.find(collection) 

1428 if isinstance(datasetType, str): 

1429 storage = self._managers.datasets[datasetType] 

1430 else: 

1431 storage = self._managers.datasets[datasetType.name] 

1432 standardizedDataIds = None 

1433 if dataIds is not None: 

1434 standardizedDataIds = [ 

1435 DataCoordinate.standardize(d, dimensions=storage.datasetType.dimensions) for d in dataIds 

1436 ] 

1437 storage.decertify( 

1438 collectionRecord, 

1439 timespan, 

1440 dataIds=standardizedDataIds, 

1441 context=queries.SqlQueryContext(self._db, self._managers.column_types), 

1442 ) 

1443 

1444 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager: 

1445 """Return an object that allows a new `Datastore` instance to 

1446 communicate with this `Registry`. 

1447 

1448 Returns 

1449 ------- 

1450 manager : `~.interfaces.DatastoreRegistryBridgeManager` 

1451 Object that mediates communication between this `Registry` and its 

1452 associated datastores. 

1453 """ 

1454 return self._managers.datastores 

1455 

1456 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]: 

1457 """Retrieve datastore locations for a given dataset. 

1458 

1459 Parameters 

1460 ---------- 

1461 ref : `DatasetRef` 

1462 A reference to the dataset for which to retrieve storage 

1463 information. 

1464 

1465 Returns 

1466 ------- 

1467 datastores : `~collections.abc.Iterable` [ `str` ] 

1468 All the matching datastores holding this dataset. 

1469 

1470 Raises 

1471 ------ 

1472 lsst.daf.butler.AmbiguousDatasetError 

1473 Raised if ``ref.id`` is `None`. 

1474 """ 

1475 return self._managers.datastores.findDatastores(ref) 

1476 

1477 def expandDataId( 

1478 self, 

1479 dataId: DataId | None = None, 

1480 *, 

1481 dimensions: Iterable[str] | DimensionGroup | DimensionGraph | None = None, 

1482 graph: DimensionGraph | None = None, 

1483 records: NameLookupMapping[DimensionElement, DimensionRecord | None] | None = None, 

1484 withDefaults: bool = True, 

1485 **kwargs: Any, 

1486 ) -> DataCoordinate: 

1487 """Expand a dimension-based data ID to include additional information. 

1488 

1489 Parameters 

1490 ---------- 

1491 dataId : `DataCoordinate` or `dict`, optional 

1492 Data ID to be expanded; augmented and overridden by ``kwargs``. 

1493 dimensions : `~collections.abc.Iterable` [ `str` ], \ 

1494 `DimensionGroup`, or `DimensionGraph`, optional 

1495 The dimensions to be identified by the new `DataCoordinate`. 

1496 If not provided, will be inferred from the keys of ``mapping`` and 

1497 ``**kwargs``, and ``universe`` must be provided unless ``mapping`` 

1498 is already a `DataCoordinate`. 

1499 graph : `DimensionGraph`, optional 

1500 Like ``dimensions``, but as a ``DimensionGraph`` instance. Ignored 

1501 if ``dimensions`` is provided. Deprecated and will be removed 

1502 after v27. 

1503 records : `~collections.abc.Mapping` [`str`, `DimensionRecord`], \ 

1504 optional 

1505 Dimension record data to use before querying the database for that 

1506 data, keyed by element name. 

1507 withDefaults : `bool`, optional 

1508 Utilize ``self.defaults.dataId`` to fill in missing governor 

1509 dimension key-value pairs. Defaults to `True` (i.e. defaults are 

1510 used). 

1511 **kwargs 

1512 Additional keywords are treated like additional key-value pairs for 

1513 ``dataId``, extending and overriding. 

1514 

1515 Returns 

1516 ------- 

1517 expanded : `DataCoordinate` 

1518 A data ID that includes full metadata for all of the dimensions it 

1519 identifies, i.e. guarantees that ``expanded.hasRecords()`` and 

1520 ``expanded.hasFull()`` both return `True`. 

1521 

1522 Raises 

1523 ------ 

1524 lsst.daf.butler.registry.DataIdError 

1525 Raised when ``dataId`` or keyword arguments specify unknown 

1526 dimensions or values, or when a resulting data ID contains 

1527 contradictory key-value pairs, according to dimension 

1528 relationships. 

1529 

1530 Notes 

1531 ----- 

1532 This method cannot be relied upon to reject invalid data ID values 

1533 for dimensions that do actually not have any record columns. For 

1534 efficiency reasons the records for these dimensions (which have only 

1535 dimension key values that are given by the caller) may be constructed 

1536 directly rather than obtained from the registry database. 

1537 """ 

1538 if not withDefaults: 

1539 defaults = None 

1540 else: 

1541 defaults = self.defaults.dataId 

1542 standardized = DataCoordinate.standardize( 

1543 dataId, 

1544 graph=graph, 

1545 dimensions=dimensions, 

1546 universe=self.dimensions, 

1547 defaults=defaults, 

1548 **kwargs, 

1549 ) 

1550 if standardized.hasRecords(): 

1551 return standardized 

1552 if records is None: 

1553 records = {} 

1554 elif isinstance(records, NamedKeyMapping): 

1555 records = records.byName() 

1556 else: 

1557 records = dict(records) 

1558 if isinstance(dataId, DataCoordinate) and dataId.hasRecords(): 

1559 for element_name in dataId.dimensions.elements: 

1560 records[element_name] = dataId.records[element_name] 

1561 keys = dict(standardized.mapping) 

1562 for element_name in standardized.dimensions.lookup_order: 

1563 element = self.dimensions[element_name] 

1564 record = records.get(element_name, ...) # Use ... to mean not found; None might mean NULL 

1565 if record is ...: 

1566 if element_name in self.dimensions.dimensions.names and keys.get(element_name) is None: 

1567 if element_name in standardized.dimensions.required: 

1568 raise DimensionNameError( 

1569 f"No value or null value for required dimension {element_name}." 

1570 ) 

1571 keys[element_name] = None 

1572 record = None 

1573 else: 

1574 record = self._managers.dimensions.fetch_one( 

1575 element_name, 

1576 DataCoordinate.standardize(keys, dimensions=element.minimal_group), 

1577 self.dimension_record_cache, 

1578 ) 

1579 records[element_name] = record 

1580 if record is not None: 

1581 for d in element.implied: 

1582 value = getattr(record, d.name) 

1583 if keys.setdefault(d.name, value) != value: 

1584 raise InconsistentDataIdError( 

1585 f"Data ID {standardized} has {d.name}={keys[d.name]!r}, " 

1586 f"but {element_name} implies {d.name}={value!r}." 

1587 ) 

1588 else: 

1589 if element_name in standardized.dimensions.required: 

1590 raise DataIdValueError( 

1591 f"Could not fetch record for required dimension {element.name} via keys {keys}." 

1592 ) 

1593 if element.defines_relationships: 

1594 raise InconsistentDataIdError( 

1595 f"Could not fetch record for element {element_name} via keys {keys}, " 

1596 "but it is marked as defining relationships; this means one or more dimensions are " 

1597 "have inconsistent values.", 

1598 ) 

1599 for d in element.implied: 

1600 keys.setdefault(d.name, None) 

1601 records.setdefault(d.name, None) 

1602 return DataCoordinate.standardize(keys, dimensions=standardized.dimensions).expanded(records=records) 

1603 

1604 def insertDimensionData( 

1605 self, 

1606 element: DimensionElement | str, 

1607 *data: Mapping[str, Any] | DimensionRecord, 

1608 conform: bool = True, 

1609 replace: bool = False, 

1610 skip_existing: bool = False, 

1611 ) -> None: 

1612 """Insert one or more dimension records into the database. 

1613 

1614 Parameters 

1615 ---------- 

1616 element : `DimensionElement` or `str` 

1617 The `DimensionElement` or name thereof that identifies the table 

1618 records will be inserted into. 

1619 *data : `dict` or `DimensionRecord` 

1620 One or more records to insert. 

1621 conform : `bool`, optional 

1622 If `False` (`True` is default) perform no checking or conversions, 

1623 and assume that ``element`` is a `DimensionElement` instance and 

1624 ``data`` is a one or more `DimensionRecord` instances of the 

1625 appropriate subclass. 

1626 replace : `bool`, optional 

1627 If `True` (`False` is default), replace existing records in the 

1628 database if there is a conflict. 

1629 skip_existing : `bool`, optional 

1630 If `True` (`False` is default), skip insertion if a record with 

1631 the same primary key values already exists. Unlike 

1632 `syncDimensionData`, this will not detect when the given record 

1633 differs from what is in the database, and should not be used when 

1634 this is a concern. 

1635 """ 

1636 if isinstance(element, str): 

1637 element = self.dimensions[element] 

1638 if conform: 

1639 records = [ 

1640 row if isinstance(row, DimensionRecord) else element.RecordClass(**row) for row in data 

1641 ] 

1642 else: 

1643 # Ignore typing since caller said to trust them with conform=False. 

1644 records = data # type: ignore 

1645 if element.name in self.dimension_record_cache: 

1646 self.dimension_record_cache.reset() 

1647 self._managers.dimensions.insert( 

1648 element, 

1649 *records, 

1650 replace=replace, 

1651 skip_existing=skip_existing, 

1652 ) 

1653 

1654 def syncDimensionData( 

1655 self, 

1656 element: DimensionElement | str, 

1657 row: Mapping[str, Any] | DimensionRecord, 

1658 conform: bool = True, 

1659 update: bool = False, 

1660 ) -> bool | dict[str, Any]: 

1661 """Synchronize the given dimension record with the database, inserting 

1662 if it does not already exist and comparing values if it does. 

1663 

1664 Parameters 

1665 ---------- 

1666 element : `DimensionElement` or `str` 

1667 The `DimensionElement` or name thereof that identifies the table 

1668 records will be inserted into. 

1669 row : `dict` or `DimensionRecord` 

1670 The record to insert. 

1671 conform : `bool`, optional 

1672 If `False` (`True` is default) perform no checking or conversions, 

1673 and assume that ``element`` is a `DimensionElement` instance and 

1674 ``data`` is a `DimensionRecord` instances of the appropriate 

1675 subclass. 

1676 update : `bool`, optional 

1677 If `True` (`False` is default), update the existing record in the 

1678 database if there is a conflict. 

1679 

1680 Returns 

1681 ------- 

1682 inserted_or_updated : `bool` or `dict` 

1683 `True` if a new row was inserted, `False` if no changes were 

1684 needed, or a `dict` mapping updated column names to their old 

1685 values if an update was performed (only possible if 

1686 ``update=True``). 

1687 

1688 Raises 

1689 ------ 

1690 lsst.daf.butler.registry.ConflictingDefinitionError 

1691 Raised if the record exists in the database (according to primary 

1692 key lookup) but is inconsistent with the given one. 

1693 """ 

1694 if conform: 

1695 if isinstance(element, str): 

1696 element = self.dimensions[element] 

1697 record = row if isinstance(row, DimensionRecord) else element.RecordClass(**row) 

1698 else: 

1699 # Ignore typing since caller said to trust them with conform=False. 

1700 record = row # type: ignore 

1701 if record.definition.name in self.dimension_record_cache: 

1702 self.dimension_record_cache.reset() 

1703 return self._managers.dimensions.sync(record, update=update) 

1704 

1705 def queryDatasetTypes( 

1706 self, 

1707 expression: Any = ..., 

1708 *, 

1709 components: bool | _Marker = _DefaultMarker, 

1710 missing: list[str] | None = None, 

1711 ) -> Iterable[DatasetType]: 

1712 """Iterate over the dataset types whose names match an expression. 

1713 

1714 Parameters 

1715 ---------- 

1716 expression : dataset type expression, optional 

1717 An expression that fully or partially identifies the dataset types 

1718 to return, such as a `str`, `re.Pattern`, or iterable thereof. 

1719 ``...`` can be used to return all dataset types, and is the 

1720 default. See :ref:`daf_butler_dataset_type_expressions` for more 

1721 information. 

1722 components : `bool`, optional 

1723 Must be `False`. Provided only for backwards compatibility. After 

1724 v27 this argument will be removed entirely. 

1725 missing : `list` of `str`, optional 

1726 String dataset type names that were explicitly given (i.e. not 

1727 regular expression patterns) but not found will be appended to this 

1728 list, if it is provided. 

1729 

1730 Returns 

1731 ------- 

1732 dataset_types : `~collections.abc.Iterable` [ `DatasetType`] 

1733 An `~collections.abc.Iterable` of `DatasetType` instances whose 

1734 names match ``expression``. 

1735 

1736 Raises 

1737 ------ 

1738 lsst.daf.butler.registry.DatasetTypeExpressionError 

1739 Raised when ``expression`` is invalid. 

1740 """ 

1741 if components is not _DefaultMarker: 

1742 if components is not False: 

1743 raise DatasetTypeError( 

1744 "Dataset component queries are no longer supported by Registry. Use " 

1745 "DatasetType methods to obtain components from parent dataset types instead." 

1746 ) 

1747 else: 

1748 warnings.warn( 

1749 "The components parameter is ignored. It will be removed after v27.", 

1750 category=FutureWarning, 

1751 stacklevel=find_outside_stacklevel("lsst.daf.butler"), 

1752 ) 

1753 wildcard = DatasetTypeWildcard.from_expression(expression) 

1754 return self._managers.datasets.resolve_wildcard(wildcard, missing=missing) 

1755 

1756 def queryCollections( 

1757 self, 

1758 expression: Any = ..., 

1759 datasetType: DatasetType | None = None, 

1760 collectionTypes: Iterable[CollectionType] | CollectionType = CollectionType.all(), 

1761 flattenChains: bool = False, 

1762 includeChains: bool | None = None, 

1763 ) -> Sequence[str]: 

1764 """Iterate over the collections whose names match an expression. 

1765 

1766 Parameters 

1767 ---------- 

1768 expression : collection expression, optional 

1769 An expression that identifies the collections to return, such as 

1770 a `str` (for full matches or partial matches via globs), 

1771 `re.Pattern` (for partial matches), or iterable thereof. ``...`` 

1772 can be used to return all collections, and is the default. 

1773 See :ref:`daf_butler_collection_expressions` for more information. 

1774 datasetType : `DatasetType`, optional 

1775 If provided, only yield collections that may contain datasets of 

1776 this type. This is a conservative approximation in general; it may 

1777 yield collections that do not have any such datasets. 

1778 collectionTypes : `~collections.abc.Set` [`CollectionType`] or \ 

1779 `CollectionType`, optional 

1780 If provided, only yield collections of these types. 

1781 flattenChains : `bool`, optional 

1782 If `True` (`False` is default), recursively yield the child 

1783 collections of matching `~CollectionType.CHAINED` collections. 

1784 includeChains : `bool`, optional 

1785 If `True`, yield records for matching `~CollectionType.CHAINED` 

1786 collections. Default is the opposite of ``flattenChains``: include 

1787 either CHAINED collections or their children, but not both. 

1788 

1789 Returns 

1790 ------- 

1791 collections : `~collections.abc.Sequence` [ `str` ] 

1792 The names of collections that match ``expression``. 

1793 

1794 Raises 

1795 ------ 

1796 lsst.daf.butler.registry.CollectionExpressionError 

1797 Raised when ``expression`` is invalid. 

1798 

1799 Notes 

1800 ----- 

1801 The order in which collections are returned is unspecified, except that 

1802 the children of a `~CollectionType.CHAINED` collection are guaranteed 

1803 to be in the order in which they are searched. When multiple parent 

1804 `~CollectionType.CHAINED` collections match the same criteria, the 

1805 order in which the two lists appear is unspecified, and the lists of 

1806 children may be incomplete if a child has multiple parents. 

1807 """ 

1808 # Right now the datasetTypes argument is completely ignored, but that 

1809 # is consistent with its [lack of] guarantees. DM-24939 or a follow-up 

1810 # ticket will take care of that. 

1811 try: 

1812 wildcard = CollectionWildcard.from_expression(expression) 

1813 except TypeError as exc: 

1814 raise CollectionExpressionError(f"Invalid collection expression '{expression}'") from exc 

1815 collectionTypes = ensure_iterable(collectionTypes) 

1816 return [ 

1817 record.name 

1818 for record in self._managers.collections.resolve_wildcard( 

1819 wildcard, 

1820 collection_types=frozenset(collectionTypes), 

1821 flatten_chains=flattenChains, 

1822 include_chains=includeChains, 

1823 ) 

1824 ] 

1825 

1826 def _makeQueryBuilder( 

1827 self, 

1828 summary: queries.QuerySummary, 

1829 doomed_by: Iterable[str] = (), 

1830 ) -> queries.QueryBuilder: 

1831 """Return a `QueryBuilder` instance capable of constructing and 

1832 managing more complex queries than those obtainable via `Registry` 

1833 interfaces. 

1834 

1835 This is an advanced interface; downstream code should prefer 

1836 `Registry.queryDataIds` and `Registry.queryDatasets` whenever those 

1837 are sufficient. 

1838 

1839 Parameters 

1840 ---------- 

1841 summary : `queries.QuerySummary` 

1842 Object describing and categorizing the full set of dimensions that 

1843 will be included in the query. 

1844 doomed_by : `~collections.abc.Iterable` of `str`, optional 

1845 A list of diagnostic messages that indicate why the query is going 

1846 to yield no results and should not even be executed. If an empty 

1847 container (default) the query will be executed unless other code 

1848 determines that it is doomed. 

1849 

1850 Returns 

1851 ------- 

1852 builder : `queries.QueryBuilder` 

1853 Object that can be used to construct and perform advanced queries. 

1854 """ 

1855 doomed_by = list(doomed_by) 

1856 backend = queries.SqlQueryBackend(self._db, self._managers, self.dimension_record_cache) 

1857 context = backend.context() 

1858 relation: Relation | None = None 

1859 if doomed_by: 

1860 relation = LeafRelation.make_doomed(context.sql_engine, set(), doomed_by) 

1861 return queries.QueryBuilder( 

1862 summary, 

1863 backend=backend, 

1864 context=context, 

1865 relation=relation, 

1866 ) 

1867 

1868 def _standardize_query_data_id_args( 

1869 self, data_id: DataId | None, *, doomed_by: list[str], **kwargs: Any 

1870 ) -> DataCoordinate: 

1871 """Preprocess the data ID arguments passed to query* methods. 

1872 

1873 Parameters 

1874 ---------- 

1875 data_id : `DataId` or `None` 

1876 Data ID that constrains the query results. 

1877 doomed_by : `list` [ `str` ] 

1878 List to append messages indicating why the query is doomed to 

1879 yield no results. 

1880 **kwargs 

1881 Additional data ID key-value pairs, extending and overriding 

1882 ``data_id``. 

1883 

1884 Returns 

1885 ------- 

1886 data_id : `DataCoordinate` 

1887 Standardized data ID. Will be fully expanded unless expansion 

1888 fails, in which case a message will be appended to ``doomed_by`` 

1889 on return. 

1890 """ 

1891 try: 

1892 return self.expandDataId(data_id, **kwargs) 

1893 except DataIdValueError as err: 

1894 doomed_by.append(str(err)) 

1895 return DataCoordinate.standardize( 

1896 data_id, **kwargs, universe=self.dimensions, defaults=self.defaults.dataId 

1897 ) 

1898 

1899 def _standardize_query_dataset_args( 

1900 self, 

1901 datasets: Any, 

1902 collections: CollectionArgType | None, 

1903 mode: Literal["find_first"] | Literal["find_all"] | Literal["constrain"] = "constrain", 

1904 *, 

1905 doomed_by: list[str], 

1906 ) -> tuple[list[DatasetType], CollectionWildcard | None]: 

1907 """Preprocess dataset arguments passed to query* methods. 

1908 

1909 Parameters 

1910 ---------- 

1911 datasets : `DatasetType`, `str`, `re.Pattern`, or iterable of these 

1912 Expression identifying dataset types. See `queryDatasetTypes` for 

1913 details. 

1914 collections : `str`, `re.Pattern`, or iterable of these 

1915 Expression identifying collections to be searched. See 

1916 `queryCollections` for details. 

1917 mode : `str`, optional 

1918 The way in which datasets are being used in this query; one of: 

1919 

1920 - "find_first": this is a query for the first dataset in an 

1921 ordered list of collections. Prohibits collection wildcards, 

1922 but permits dataset type wildcards. 

1923 

1924 - "find_all": this is a query for all datasets in all matched 

1925 collections. Permits collection and dataset type wildcards. 

1926 

1927 - "constrain": this is a query for something other than datasets, 

1928 with results constrained by dataset existence. Permits 

1929 collection wildcards and prohibits ``...`` as a dataset type 

1930 wildcard. 

1931 doomed_by : `list` [ `str` ] 

1932 List to append messages indicating why the query is doomed to 

1933 yield no results. 

1934 

1935 Returns 

1936 ------- 

1937 dataset_types : `list` [ `DatasetType` ] 

1938 List of matched dataset types. 

1939 collections : `CollectionWildcard` 

1940 Processed collection expression. 

1941 """ 

1942 dataset_types: list[DatasetType] = [] 

1943 collection_wildcard: CollectionWildcard | None = None 

1944 if datasets is not None: 

1945 if collections is None: 

1946 if not self.defaults.collections: 

1947 raise NoDefaultCollectionError("No collections, and no registry default collections.") 

1948 collection_wildcard = CollectionWildcard.from_expression(self.defaults.collections) 

1949 else: 

1950 collection_wildcard = CollectionWildcard.from_expression(collections) 

1951 if mode == "find_first" and collection_wildcard.patterns: 

1952 raise TypeError( 

1953 f"Collection pattern(s) {collection_wildcard.patterns} not allowed in this context." 

1954 ) 

1955 missing: list[str] = [] 

1956 dataset_types = self._managers.datasets.resolve_wildcard( 

1957 datasets, missing=missing, explicit_only=(mode == "constrain") 

1958 ) 

1959 if missing and mode == "constrain": 

1960 raise MissingDatasetTypeError( 

1961 f"Dataset type(s) {missing} are not registered.", 

1962 ) 

1963 doomed_by.extend(f"Dataset type {name} is not registered." for name in missing) 

1964 elif collections: 

1965 # I think this check should actually be `collections is not None`, 

1966 # but it looks like some CLI scripts use empty tuple as default. 

1967 raise ArgumentError(f"Cannot pass 'collections' (='{collections}') without 'datasets'.") 

1968 return dataset_types, collection_wildcard 

1969 

1970 def queryDatasets( 

1971 self, 

1972 datasetType: Any, 

1973 *, 

1974 collections: CollectionArgType | None = None, 

1975 dimensions: Iterable[Dimension | str] | None = None, 

1976 dataId: DataId | None = None, 

1977 where: str = "", 

1978 findFirst: bool = False, 

1979 components: bool | _Marker = _DefaultMarker, 

1980 bind: Mapping[str, Any] | None = None, 

1981 check: bool = True, 

1982 **kwargs: Any, 

1983 ) -> queries.DatasetQueryResults: 

1984 """Query for and iterate over dataset references matching user-provided 

1985 criteria. 

1986 

1987 Parameters 

1988 ---------- 

1989 datasetType : dataset type expression 

1990 An expression that fully or partially identifies the dataset types 

1991 to be queried. Allowed types include `DatasetType`, `str`, 

1992 `re.Pattern`, and iterables thereof. The special value ``...`` can 

1993 be used to query all dataset types. See 

1994 :ref:`daf_butler_dataset_type_expressions` for more information. 

1995 collections : collection expression, optional 

1996 An expression that identifies the collections to search, such as a 

1997 `str` (for full matches or partial matches via globs), `re.Pattern` 

1998 (for partial matches), or iterable thereof. ``...`` can be used to 

1999 search all collections (actually just all `~CollectionType.RUN` 

2000 collections, because this will still find all datasets). 

2001 If not provided, ``self.default.collections`` is used. See 

2002 :ref:`daf_butler_collection_expressions` for more information. 

2003 dimensions : `~collections.abc.Iterable` of `Dimension` or `str` 

2004 Dimensions to include in the query (in addition to those used 

2005 to identify the queried dataset type(s)), either to constrain 

2006 the resulting datasets to those for which a matching dimension 

2007 exists, or to relate the dataset type's dimensions to dimensions 

2008 referenced by the ``dataId`` or ``where`` arguments. 

2009 dataId : `dict` or `DataCoordinate`, optional 

2010 A data ID whose key-value pairs are used as equality constraints 

2011 in the query. 

2012 where : `str`, optional 

2013 A string expression similar to a SQL WHERE clause. May involve 

2014 any column of a dimension table or (as a shortcut for the primary 

2015 key column of a dimension table) dimension name. See 

2016 :ref:`daf_butler_dimension_expressions` for more information. 

2017 findFirst : `bool`, optional 

2018 If `True` (`False` is default), for each result data ID, only 

2019 yield one `DatasetRef` of each `DatasetType`, from the first 

2020 collection in which a dataset of that dataset type appears 

2021 (according to the order of ``collections`` passed in). If `True`, 

2022 ``collections`` must not contain regular expressions and may not 

2023 be ``...``. 

2024 components : `bool`, optional 

2025 Must be `False`. Provided only for backwards compatibility. After 

2026 v27 this argument will be removed entirely. 

2027 bind : `~collections.abc.Mapping`, optional 

2028 Mapping containing literal values that should be injected into the 

2029 ``where`` expression, keyed by the identifiers they replace. 

2030 Values of collection type can be expanded in some cases; see 

2031 :ref:`daf_butler_dimension_expressions_identifiers` for more 

2032 information. 

2033 check : `bool`, optional 

2034 If `True` (default) check the query for consistency before 

2035 executing it. This may reject some valid queries that resemble 

2036 common mistakes (e.g. queries for visits without specifying an 

2037 instrument). 

2038 **kwargs 

2039 Additional keyword arguments are forwarded to 

2040 `DataCoordinate.standardize` when processing the ``dataId`` 

2041 argument (and may be used to provide a constraining data ID even 

2042 when the ``dataId`` argument is `None`). 

2043 

2044 Returns 

2045 ------- 

2046 refs : `.queries.DatasetQueryResults` 

2047 Dataset references matching the given query criteria. Nested data 

2048 IDs are guaranteed to include values for all implied dimensions 

2049 (i.e. `DataCoordinate.hasFull` will return `True`), but will not 

2050 include dimension records (`DataCoordinate.hasRecords` will be 

2051 `False`) unless `~.queries.DatasetQueryResults.expanded` is 

2052 called on the result object (which returns a new one). 

2053 

2054 Raises 

2055 ------ 

2056 lsst.daf.butler.registry.DatasetTypeExpressionError 

2057 Raised when ``datasetType`` expression is invalid. 

2058 TypeError 

2059 Raised when the arguments are incompatible, such as when a 

2060 collection wildcard is passed when ``findFirst`` is `True`, or 

2061 when ``collections`` is `None` and ``self.defaults.collections`` is 

2062 also `None`. 

2063 lsst.daf.butler.registry.DataIdError 

2064 Raised when ``dataId`` or keyword arguments specify unknown 

2065 dimensions or values, or when they contain inconsistent values. 

2066 lsst.daf.butler.registry.UserExpressionError 

2067 Raised when ``where`` expression is invalid. 

2068 

2069 Notes 

2070 ----- 

2071 When multiple dataset types are queried in a single call, the 

2072 results of this operation are equivalent to querying for each dataset 

2073 type separately in turn, and no information about the relationships 

2074 between datasets of different types is included. In contexts where 

2075 that kind of information is important, the recommended pattern is to 

2076 use `queryDataIds` to first obtain data IDs (possibly with the 

2077 desired dataset types and collections passed as constraints to the 

2078 query), and then use multiple (generally much simpler) calls to 

2079 `queryDatasets` with the returned data IDs passed as constraints. 

2080 """ 

2081 if components is not _DefaultMarker: 

2082 if components is not False: 

2083 raise DatasetTypeError( 

2084 "Dataset component queries are no longer supported by Registry. Use " 

2085 "DatasetType methods to obtain components from parent dataset types instead." 

2086 ) 

2087 else: 

2088 warnings.warn( 

2089 "The components parameter is ignored. It will be removed after v27.", 

2090 category=FutureWarning, 

2091 stacklevel=find_outside_stacklevel("lsst.daf.butler"), 

2092 ) 

2093 doomed_by: list[str] = [] 

2094 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs) 

2095 resolved_dataset_types, collection_wildcard = self._standardize_query_dataset_args( 

2096 datasetType, 

2097 collections, 

2098 mode="find_first" if findFirst else "find_all", 

2099 doomed_by=doomed_by, 

2100 ) 

2101 if collection_wildcard is not None and collection_wildcard.empty(): 

2102 doomed_by.append("No datasets can be found because collection list is empty.") 

2103 return queries.ChainedDatasetQueryResults([], doomed_by=doomed_by) 

2104 parent_results: list[queries.ParentDatasetQueryResults] = [] 

2105 for resolved_dataset_type in resolved_dataset_types: 

2106 # The full set of dimensions in the query is the combination of 

2107 # those needed for the DatasetType and those explicitly requested, 

2108 # if any. 

2109 dimension_names = set(resolved_dataset_type.dimensions.names) 

2110 if dimensions is not None: 

2111 dimension_names.update(self.dimensions.conform(dimensions).names) 

2112 # Construct the summary structure needed to construct a 

2113 # QueryBuilder. 

2114 summary = queries.QuerySummary( 

2115 requested=self.dimensions.conform(dimension_names), 

2116 column_types=self._managers.column_types, 

2117 data_id=data_id, 

2118 expression=where, 

2119 bind=bind, 

2120 defaults=self.defaults.dataId, 

2121 check=check, 

2122 datasets=[resolved_dataset_type], 

2123 ) 

2124 builder = self._makeQueryBuilder(summary) 

2125 # Add the dataset subquery to the query, telling the QueryBuilder 

2126 # to include the rank of the selected collection in the results 

2127 # only if we need to findFirst. Note that if any of the 

2128 # collections are actually wildcard expressions, and 

2129 # findFirst=True, this will raise TypeError for us. 

2130 builder.joinDataset( 

2131 resolved_dataset_type, collection_wildcard, isResult=True, findFirst=findFirst 

2132 ) 

2133 query = builder.finish() 

2134 parent_results.append( 

2135 queries.ParentDatasetQueryResults(query, resolved_dataset_type, components=[None]) 

2136 ) 

2137 if not parent_results: 

2138 doomed_by.extend( 

2139 f"No registered dataset type matching {t!r} found, so no matching datasets can " 

2140 "exist in any collection." 

2141 for t in ensure_iterable(datasetType) 

2142 ) 

2143 return queries.ChainedDatasetQueryResults([], doomed_by=doomed_by) 

2144 elif len(parent_results) == 1: 

2145 return parent_results[0] 

2146 else: 

2147 return queries.ChainedDatasetQueryResults(parent_results) 

2148 

2149 def queryDataIds( 

2150 self, 

2151 # TODO: Drop Dimension support on DM-41326. 

2152 dimensions: DimensionGroup | Iterable[Dimension | str] | Dimension | str, 

2153 *, 

2154 dataId: DataId | None = None, 

2155 datasets: Any = None, 

2156 collections: CollectionArgType | None = None, 

2157 where: str = "", 

2158 components: bool | _Marker = _DefaultMarker, 

2159 bind: Mapping[str, Any] | None = None, 

2160 check: bool = True, 

2161 **kwargs: Any, 

2162 ) -> queries.DataCoordinateQueryResults: 

2163 """Query for data IDs matching user-provided criteria. 

2164 

2165 Parameters 

2166 ---------- 

2167 dimensions : `DimensionGroup`, `Dimension`, or `str`, or \ 

2168 `~collections.abc.Iterable` [ `Dimension` or `str` ] 

2169 The dimensions of the data IDs to yield, as either `Dimension` 

2170 instances or `str`. Will be automatically expanded to a complete 

2171 `DimensionGroup`. Support for `Dimension` instances is deprecated 

2172 and will not be supported after v27. 

2173 dataId : `dict` or `DataCoordinate`, optional 

2174 A data ID whose key-value pairs are used as equality constraints 

2175 in the query. 

2176 datasets : dataset type expression, optional 

2177 An expression that fully or partially identifies dataset types 

2178 that should constrain the yielded data IDs. For example, including 

2179 "raw" here would constrain the yielded ``instrument``, 

2180 ``exposure``, ``detector``, and ``physical_filter`` values to only 

2181 those for which at least one "raw" dataset exists in 

2182 ``collections``. Allowed types include `DatasetType`, `str`, 

2183 and iterables thereof. Regular expression objects (i.e. 

2184 `re.Pattern`) are deprecated and will be removed after the v26 

2185 release. See :ref:`daf_butler_dataset_type_expressions` for more 

2186 information. 

2187 collections : collection expression, optional 

2188 An expression that identifies the collections to search for 

2189 datasets, such as a `str` (for full matches or partial matches 

2190 via globs), `re.Pattern` (for partial matches), or iterable 

2191 thereof. ``...`` can be used to search all collections (actually 

2192 just all `~CollectionType.RUN` collections, because this will 

2193 still find all datasets). If not provided, 

2194 ``self.default.collections`` is used. Ignored unless ``datasets`` 

2195 is also passed. See :ref:`daf_butler_collection_expressions` for 

2196 more information. 

2197 where : `str`, optional 

2198 A string expression similar to a SQL WHERE clause. May involve 

2199 any column of a dimension table or (as a shortcut for the primary 

2200 key column of a dimension table) dimension name. See 

2201 :ref:`daf_butler_dimension_expressions` for more information. 

2202 components : `bool`, optional 

2203 Must be `False`. Provided only for backwards compatibility. After 

2204 v27 this argument will be removed entirely. 

2205 bind : `~collections.abc.Mapping`, optional 

2206 Mapping containing literal values that should be injected into the 

2207 ``where`` expression, keyed by the identifiers they replace. 

2208 Values of collection type can be expanded in some cases; see 

2209 :ref:`daf_butler_dimension_expressions_identifiers` for more 

2210 information. 

2211 check : `bool`, optional 

2212 If `True` (default) check the query for consistency before 

2213 executing it. This may reject some valid queries that resemble 

2214 common mistakes (e.g. queries for visits without specifying an 

2215 instrument). 

2216 **kwargs 

2217 Additional keyword arguments are forwarded to 

2218 `DataCoordinate.standardize` when processing the ``dataId`` 

2219 argument (and may be used to provide a constraining data ID even 

2220 when the ``dataId`` argument is `None`). 

2221 

2222 Returns 

2223 ------- 

2224 dataIds : `.queries.DataCoordinateQueryResults` 

2225 Data IDs matching the given query parameters. These are guaranteed 

2226 to identify all dimensions (`DataCoordinate.hasFull` returns 

2227 `True`), but will not contain `DimensionRecord` objects 

2228 (`DataCoordinate.hasRecords` returns `False`). Call 

2229 `~.queries.DataCoordinateQueryResults.expanded` on the 

2230 returned object to fetch those (and consider using 

2231 `~.queries.DataCoordinateQueryResults.materialize` on the 

2232 returned object first if the expected number of rows is very 

2233 large). See documentation for those methods for additional 

2234 information. 

2235 

2236 Raises 

2237 ------ 

2238 lsst.daf.butler.registry.NoDefaultCollectionError 

2239 Raised if ``collections`` is `None` and 

2240 ``self.defaults.collections`` is `None`. 

2241 lsst.daf.butler.registry.CollectionExpressionError 

2242 Raised when ``collections`` expression is invalid. 

2243 lsst.daf.butler.registry.DataIdError 

2244 Raised when ``dataId`` or keyword arguments specify unknown 

2245 dimensions or values, or when they contain inconsistent values. 

2246 lsst.daf.butler.registry.DatasetTypeExpressionError 

2247 Raised when ``datasetType`` expression is invalid. 

2248 lsst.daf.butler.registry.UserExpressionError 

2249 Raised when ``where`` expression is invalid. 

2250 """ 

2251 if components is not _DefaultMarker: 

2252 if components is not False: 

2253 raise DatasetTypeError( 

2254 "Dataset component queries are no longer supported by Registry. Use " 

2255 "DatasetType methods to obtain components from parent dataset types instead." 

2256 ) 

2257 else: 

2258 warnings.warn( 

2259 "The components parameter is ignored. It will be removed after v27.", 

2260 category=FutureWarning, 

2261 stacklevel=find_outside_stacklevel("lsst.daf.butler"), 

2262 ) 

2263 requested_dimensions = self.dimensions.conform(dimensions) 

2264 doomed_by: list[str] = [] 

2265 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs) 

2266 resolved_dataset_types, collection_wildcard = self._standardize_query_dataset_args( 

2267 datasets, collections, doomed_by=doomed_by 

2268 ) 

2269 if collection_wildcard is not None and collection_wildcard.empty(): 

2270 doomed_by.append("No data coordinates can be found because collection list is empty.") 

2271 summary = queries.QuerySummary( 

2272 requested=requested_dimensions, 

2273 column_types=self._managers.column_types, 

2274 data_id=data_id, 

2275 expression=where, 

2276 bind=bind, 

2277 defaults=self.defaults.dataId, 

2278 check=check, 

2279 datasets=resolved_dataset_types, 

2280 ) 

2281 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by) 

2282 for datasetType in resolved_dataset_types: 

2283 builder.joinDataset(datasetType, collection_wildcard, isResult=False) 

2284 query = builder.finish() 

2285 

2286 return queries.DataCoordinateQueryResults(query) 

2287 

2288 def queryDimensionRecords( 

2289 self, 

2290 element: DimensionElement | str, 

2291 *, 

2292 dataId: DataId | None = None, 

2293 datasets: Any = None, 

2294 collections: CollectionArgType | None = None, 

2295 where: str = "", 

2296 components: bool | _Marker = _DefaultMarker, 

2297 bind: Mapping[str, Any] | None = None, 

2298 check: bool = True, 

2299 **kwargs: Any, 

2300 ) -> queries.DimensionRecordQueryResults: 

2301 """Query for dimension information matching user-provided criteria. 

2302 

2303 Parameters 

2304 ---------- 

2305 element : `DimensionElement` or `str` 

2306 The dimension element to obtain records for. 

2307 dataId : `dict` or `DataCoordinate`, optional 

2308 A data ID whose key-value pairs are used as equality constraints 

2309 in the query. 

2310 datasets : dataset type expression, optional 

2311 An expression that fully or partially identifies dataset types 

2312 that should constrain the yielded records. See `queryDataIds` and 

2313 :ref:`daf_butler_dataset_type_expressions` for more information. 

2314 collections : collection expression, optional 

2315 An expression that identifies the collections to search for 

2316 datasets, such as a `str` (for full matches or partial matches 

2317 via globs), `re.Pattern` (for partial matches), or iterable 

2318 thereof. ``...`` can be used to search all collections (actually 

2319 just all `~CollectionType.RUN` collections, because this will 

2320 still find all datasets). If not provided, 

2321 ``self.default.collections`` is used. Ignored unless ``datasets`` 

2322 is also passed. See :ref:`daf_butler_collection_expressions` for 

2323 more information. 

2324 where : `str`, optional 

2325 A string expression similar to a SQL WHERE clause. See 

2326 `queryDataIds` and :ref:`daf_butler_dimension_expressions` for more 

2327 information. 

2328 components : `bool`, optional 

2329 Whether to apply dataset expressions to components as well. 

2330 See `queryDataIds` for more information. 

2331 

2332 Must be `False`. Provided only for backwards compatibility. After 

2333 v27 this argument will be removed entirely. 

2334 bind : `~collections.abc.Mapping`, optional 

2335 Mapping containing literal values that should be injected into the 

2336 ``where`` expression, keyed by the identifiers they replace. 

2337 Values of collection type can be expanded in some cases; see 

2338 :ref:`daf_butler_dimension_expressions_identifiers` for more 

2339 information. 

2340 check : `bool`, optional 

2341 If `True` (default) check the query for consistency before 

2342 executing it. This may reject some valid queries that resemble 

2343 common mistakes (e.g. queries for visits without specifying an 

2344 instrument). 

2345 **kwargs 

2346 Additional keyword arguments are forwarded to 

2347 `DataCoordinate.standardize` when processing the ``dataId`` 

2348 argument (and may be used to provide a constraining data ID even 

2349 when the ``dataId`` argument is `None`). 

2350 

2351 Returns 

2352 ------- 

2353 dataIds : `.queries.DimensionRecordQueryResults` 

2354 Data IDs matching the given query parameters. 

2355 

2356 Raises 

2357 ------ 

2358 lsst.daf.butler.registry.NoDefaultCollectionError 

2359 Raised if ``collections`` is `None` and 

2360 ``self.defaults.collections`` is `None`. 

2361 lsst.daf.butler.registry.CollectionExpressionError 

2362 Raised when ``collections`` expression is invalid. 

2363 lsst.daf.butler.registry.DataIdError 

2364 Raised when ``dataId`` or keyword arguments specify unknown 

2365 dimensions or values, or when they contain inconsistent values. 

2366 lsst.daf.butler.registry.DatasetTypeExpressionError 

2367 Raised when ``datasetType`` expression is invalid. 

2368 lsst.daf.butler.registry.UserExpressionError 

2369 Raised when ``where`` expression is invalid. 

2370 """ 

2371 if components is not _DefaultMarker: 

2372 if components is not False: 

2373 raise DatasetTypeError( 

2374 "Dataset component queries are no longer supported by Registry. Use " 

2375 "DatasetType methods to obtain components from parent dataset types instead." 

2376 ) 

2377 else: 

2378 warnings.warn( 

2379 "The components parameter is ignored. It will be removed after v27.", 

2380 category=FutureWarning, 

2381 stacklevel=find_outside_stacklevel("lsst.daf.butler"), 

2382 ) 

2383 if not isinstance(element, DimensionElement): 

2384 try: 

2385 element = self.dimensions[element] 

2386 except KeyError as e: 

2387 raise DimensionNameError( 

2388 f"No such dimension '{element}', available dimensions: " + str(self.dimensions.elements) 

2389 ) from e 

2390 doomed_by: list[str] = [] 

2391 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs) 

2392 resolved_dataset_types, collection_wildcard = self._standardize_query_dataset_args( 

2393 datasets, collections, doomed_by=doomed_by 

2394 ) 

2395 if collection_wildcard is not None and collection_wildcard.empty(): 

2396 doomed_by.append("No dimension records can be found because collection list is empty.") 

2397 summary = queries.QuerySummary( 

2398 requested=element.minimal_group, 

2399 column_types=self._managers.column_types, 

2400 data_id=data_id, 

2401 expression=where, 

2402 bind=bind, 

2403 defaults=self.defaults.dataId, 

2404 check=check, 

2405 datasets=resolved_dataset_types, 

2406 ) 

2407 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by) 

2408 for datasetType in resolved_dataset_types: 

2409 builder.joinDataset(datasetType, collection_wildcard, isResult=False) 

2410 query = builder.finish().with_record_columns(element.name) 

2411 return queries.DatabaseDimensionRecordQueryResults(query, element) 

2412 

2413 def queryDatasetAssociations( 

2414 self, 

2415 datasetType: str | DatasetType, 

2416 collections: CollectionArgType | None = ..., 

2417 *, 

2418 collectionTypes: Iterable[CollectionType] = CollectionType.all(), 

2419 flattenChains: bool = False, 

2420 ) -> Iterator[DatasetAssociation]: 

2421 """Iterate over dataset-collection combinations where the dataset is in 

2422 the collection. 

2423 

2424 This method is a temporary placeholder for better support for 

2425 association results in `queryDatasets`. It will probably be 

2426 removed in the future, and should be avoided in production code 

2427 whenever possible. 

2428 

2429 Parameters 

2430 ---------- 

2431 datasetType : `DatasetType` or `str` 

2432 A dataset type object or the name of one. 

2433 collections : collection expression, optional 

2434 An expression that identifies the collections to search for 

2435 datasets, such as a `str` (for full matches or partial matches 

2436 via globs), `re.Pattern` (for partial matches), or iterable 

2437 thereof. ``...`` can be used to search all collections (actually 

2438 just all `~CollectionType.RUN` collections, because this will still 

2439 find all datasets). If not provided, ``self.default.collections`` 

2440 is used. See :ref:`daf_butler_collection_expressions` for more 

2441 information. 

2442 collectionTypes : `~collections.abc.Set` [ `CollectionType` ], optional 

2443 If provided, only yield associations from collections of these 

2444 types. 

2445 flattenChains : `bool`, optional 

2446 If `True`, search in the children of `~CollectionType.CHAINED` 

2447 collections. If `False`, ``CHAINED`` collections are ignored. 

2448 

2449 Yields 

2450 ------ 

2451 association : `.DatasetAssociation` 

2452 Object representing the relationship between a single dataset and 

2453 a single collection. 

2454 

2455 Raises 

2456 ------ 

2457 lsst.daf.butler.registry.NoDefaultCollectionError 

2458 Raised if ``collections`` is `None` and 

2459 ``self.defaults.collections`` is `None`. 

2460 lsst.daf.butler.registry.CollectionExpressionError 

2461 Raised when ``collections`` expression is invalid. 

2462 """ 

2463 if collections is None: 

2464 if not self.defaults.collections: 

2465 raise NoDefaultCollectionError( 

2466 "No collections provided to queryDatasetAssociations, " 

2467 "and no defaults from registry construction." 

2468 ) 

2469 collections = self.defaults.collections 

2470 collection_wildcard = CollectionWildcard.from_expression(collections) 

2471 backend = queries.SqlQueryBackend(self._db, self._managers, self.dimension_record_cache) 

2472 parent_dataset_type = backend.resolve_single_dataset_type_wildcard(datasetType) 

2473 timespan_tag = DatasetColumnTag(parent_dataset_type.name, "timespan") 

2474 collection_tag = DatasetColumnTag(parent_dataset_type.name, "collection") 

2475 for parent_collection_record in backend.resolve_collection_wildcard( 

2476 collection_wildcard, 

2477 collection_types=frozenset(collectionTypes), 

2478 flatten_chains=flattenChains, 

2479 ): 

2480 # Resolve this possibly-chained collection into a list of 

2481 # non-CHAINED collections that actually hold datasets of this 

2482 # type. 

2483 candidate_collection_records = backend.resolve_dataset_collections( 

2484 parent_dataset_type, 

2485 CollectionWildcard.from_names([parent_collection_record.name]), 

2486 allow_calibration_collections=True, 

2487 governor_constraints={}, 

2488 ) 

2489 if not candidate_collection_records: 

2490 continue 

2491 with backend.context() as context: 

2492 relation = backend.make_dataset_query_relation( 

2493 parent_dataset_type, 

2494 candidate_collection_records, 

2495 columns={"dataset_id", "run", "timespan", "collection"}, 

2496 context=context, 

2497 ) 

2498 reader = queries.DatasetRefReader( 

2499 parent_dataset_type, 

2500 translate_collection=lambda k: self._managers.collections[k].name, 

2501 full=False, 

2502 ) 

2503 for row in context.fetch_iterable(relation): 

2504 ref = reader.read(row) 

2505 collection_record = self._managers.collections[row[collection_tag]] 

2506 if collection_record.type is CollectionType.CALIBRATION: 

2507 timespan = row[timespan_tag] 

2508 else: 

2509 # For backwards compatibility and (possibly?) user 

2510 # convenience we continue to define the timespan of a 

2511 # DatasetAssociation row for a non-CALIBRATION 

2512 # collection to be None rather than a fully unbounded 

2513 # timespan. 

2514 timespan = None 

2515 yield DatasetAssociation(ref=ref, collection=collection_record.name, timespan=timespan) 

2516 

2517 def get_datastore_records(self, ref: DatasetRef) -> DatasetRef: 

2518 """Retrieve datastore records for given ref. 

2519 

2520 Parameters 

2521 ---------- 

2522 ref : `DatasetRef` 

2523 Dataset reference for which to retrieve its corresponding datastore 

2524 records. 

2525 

2526 Returns 

2527 ------- 

2528 updated_ref : `DatasetRef` 

2529 Dataset reference with filled datastore records. 

2530 

2531 Notes 

2532 ----- 

2533 If this method is called with the dataset ref that is not known to the 

2534 registry then the reference with an empty set of records is returned. 

2535 """ 

2536 datastore_records: dict[str, list[StoredDatastoreItemInfo]] = {} 

2537 for opaque, record_class in self._datastore_record_classes.items(): 

2538 records = self.fetchOpaqueData(opaque, dataset_id=ref.id) 

2539 datastore_records[opaque] = [record_class.from_record(record) for record in records] 

2540 return ref.replace(datastore_records=datastore_records) 

2541 

2542 def store_datastore_records(self, refs: Mapping[str, DatasetRef]) -> None: 

2543 """Store datastore records for given refs. 

2544 

2545 Parameters 

2546 ---------- 

2547 refs : `~collections.abc.Mapping` [`str`, `DatasetRef`] 

2548 Mapping of a datastore name to dataset reference stored in that 

2549 datastore, reference must include datastore records. 

2550 """ 

2551 for datastore_name, ref in refs.items(): 

2552 # Store ref IDs in the bridge table. 

2553 bridge = self._managers.datastores.register(datastore_name) 

2554 bridge.insert([ref]) 

2555 

2556 # store records in opaque tables 

2557 assert ref._datastore_records is not None, "Dataset ref must have datastore records" 

2558 for table_name, records in ref._datastore_records.items(): 

2559 opaque_table = self._managers.opaque.get(table_name) 

2560 assert opaque_table is not None, f"Unexpected opaque table name {table_name}" 

2561 opaque_table.insert(*(record.to_record(dataset_id=ref.id) for record in records)) 

2562 

2563 def make_datastore_tables(self, tables: Mapping[str, DatastoreOpaqueTable]) -> None: 

2564 """Create opaque tables used by datastores. 

2565 

2566 Parameters 

2567 ---------- 

2568 tables : `~collections.abc.Mapping` 

2569 Maps opaque table name to its definition. 

2570 

2571 Notes 

2572 ----- 

2573 This method should disappear in the future when opaque table 

2574 definitions will be provided during `Registry` construction. 

2575 """ 

2576 datastore_record_classes = {} 

2577 for table_name, table_def in tables.items(): 

2578 datastore_record_classes[table_name] = table_def.record_class 

2579 try: 

2580 self._managers.opaque.register(table_name, table_def.table_spec) 

2581 except ReadOnlyDatabaseError: 

2582 # If the database is read only and we just tried and failed to 

2583 # create a table, it means someone is trying to create a 

2584 # read-only butler client for an empty repo. That should be 

2585 # okay, as long as they then try to get any datasets before 

2586 # some other client creates the table. Chances are they're 

2587 # just validating configuration. 

2588 pass 

2589 self._datastore_record_classes = datastore_record_classes 

2590 

2591 def preload_cache(self) -> None: 

2592 """Immediately load caches that are used for common operations.""" 

2593 self.dimension_record_cache.preload_cache() 

2594 

2595 @property 

2596 def obsCoreTableManager(self) -> ObsCoreTableManager | None: 

2597 """The ObsCore manager instance for this registry 

2598 (`~.interfaces.ObsCoreTableManager` 

2599 or `None`). 

2600 

2601 ObsCore manager may not be implemented for all registry backend, or 

2602 may not be enabled for many repositories. 

2603 """ 

2604 return self._managers.obscore 

2605 

2606 storageClasses: StorageClassFactory 

2607 """All storage classes known to the registry (`StorageClassFactory`). 

2608 """ 

2609 

2610 _defaults: RegistryDefaults 

2611 """Default collections used for registry queries (`RegistryDefaults`)."""