Coverage for python/lsst/daf/butler/registry/sql_registry.py: 18%

580 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-18 09:55 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30from .. import ddl 

31 

32__all__ = ("SqlRegistry",) 

33 

34import contextlib 

35import logging 

36import warnings 

37from collections.abc import Iterable, Iterator, Mapping, Sequence 

38from typing import TYPE_CHECKING, Any, Literal, cast 

39 

40import sqlalchemy 

41from lsst.daf.relation import LeafRelation, Relation 

42from lsst.resources import ResourcePathExpression 

43from lsst.utils.introspection import find_outside_stacklevel 

44from lsst.utils.iteration import ensure_iterable 

45 

46from .._column_tags import DatasetColumnTag 

47from .._config import Config 

48from .._dataset_association import DatasetAssociation 

49from .._dataset_ref import DatasetId, DatasetIdGenEnum, DatasetRef 

50from .._dataset_type import DatasetType 

51from .._exceptions import CalibrationLookupError, DimensionNameError 

52from .._named import NamedKeyMapping, NameLookupMapping 

53from .._storage_class import StorageClassFactory 

54from .._timespan import Timespan 

55from ..dimensions import ( 

56 DataCoordinate, 

57 DataId, 

58 Dimension, 

59 DimensionConfig, 

60 DimensionElement, 

61 DimensionGraph, 

62 DimensionGroup, 

63 DimensionRecord, 

64 DimensionUniverse, 

65) 

66from ..dimensions.record_cache import DimensionRecordCache 

67from ..progress import Progress 

68from ..registry import ( 

69 ArgumentError, 

70 CollectionExpressionError, 

71 CollectionSummary, 

72 CollectionType, 

73 CollectionTypeError, 

74 ConflictingDefinitionError, 

75 DataIdValueError, 

76 DatasetTypeError, 

77 InconsistentDataIdError, 

78 MissingDatasetTypeError, 

79 NoDefaultCollectionError, 

80 OrphanedRecordError, 

81 RegistryConfig, 

82 RegistryConsistencyError, 

83 RegistryDefaults, 

84 queries, 

85) 

86from ..registry.interfaces import ChainedCollectionRecord, ReadOnlyDatabaseError, RunRecord 

87from ..registry.managers import RegistryManagerInstances, RegistryManagerTypes 

88from ..registry.wildcards import CollectionWildcard, DatasetTypeWildcard 

89from ..utils import _DefaultMarker, _Marker, transactional 

90 

91if TYPE_CHECKING: 

92 from .._butler_config import ButlerConfig 

93 from ..datastore._datastore import DatastoreOpaqueTable 

94 from ..datastore.stored_file_info import StoredDatastoreItemInfo 

95 from ..registry._registry import CollectionArgType 

96 from ..registry.interfaces import ( 

97 CollectionRecord, 

98 Database, 

99 DatastoreRegistryBridgeManager, 

100 ObsCoreTableManager, 

101 ) 

102 

103 

104_LOG = logging.getLogger(__name__) 

105 

106 

107class SqlRegistry: 

108 """Butler Registry implementation that uses SQL database as backend. 

109 

110 Parameters 

111 ---------- 

112 database : `Database` 

113 Database instance to store Registry. 

114 defaults : `RegistryDefaults` 

115 Default collection search path and/or output `~CollectionType.RUN` 

116 collection. 

117 managers : `RegistryManagerInstances` 

118 All the managers required for this registry. 

119 """ 

120 

121 defaultConfigFile: str | None = None 

122 """Path to configuration defaults. Accessed within the ``configs`` resource 

123 or relative to a search path. Can be None if no defaults specified. 

124 """ 

125 

126 @classmethod 

127 def forceRegistryConfig( 

128 cls, config: ButlerConfig | RegistryConfig | Config | str | None 

129 ) -> RegistryConfig: 

130 """Force the supplied config to a `RegistryConfig`. 

131 

132 Parameters 

133 ---------- 

134 config : `RegistryConfig`, `Config` or `str` or `None` 

135 Registry configuration, if missing then default configuration will 

136 be loaded from registry.yaml. 

137 

138 Returns 

139 ------- 

140 registry_config : `RegistryConfig` 

141 A registry config. 

142 """ 

143 if not isinstance(config, RegistryConfig): 

144 if isinstance(config, str | Config) or config is None: 

145 config = RegistryConfig(config) 

146 else: 

147 raise ValueError(f"Incompatible Registry configuration: {config}") 

148 return config 

149 

150 @classmethod 

151 def createFromConfig( 

152 cls, 

153 config: RegistryConfig | str | None = None, 

154 dimensionConfig: DimensionConfig | str | None = None, 

155 butlerRoot: ResourcePathExpression | None = None, 

156 ) -> SqlRegistry: 

157 """Create registry database and return `SqlRegistry` instance. 

158 

159 This method initializes database contents, database must be empty 

160 prior to calling this method. 

161 

162 Parameters 

163 ---------- 

164 config : `RegistryConfig` or `str`, optional 

165 Registry configuration, if missing then default configuration will 

166 be loaded from registry.yaml. 

167 dimensionConfig : `DimensionConfig` or `str`, optional 

168 Dimensions configuration, if missing then default configuration 

169 will be loaded from dimensions.yaml. 

170 butlerRoot : convertible to `lsst.resources.ResourcePath`, optional 

171 Path to the repository root this `SqlRegistry` will manage. 

172 

173 Returns 

174 ------- 

175 registry : `SqlRegistry` 

176 A new `SqlRegistry` instance. 

177 """ 

178 config = cls.forceRegistryConfig(config) 

179 config.replaceRoot(butlerRoot) 

180 

181 if isinstance(dimensionConfig, str): 

182 dimensionConfig = DimensionConfig(dimensionConfig) 

183 elif dimensionConfig is None: 

184 dimensionConfig = DimensionConfig() 

185 elif not isinstance(dimensionConfig, DimensionConfig): 

186 raise TypeError(f"Incompatible Dimension configuration type: {type(dimensionConfig)}") 

187 

188 DatabaseClass = config.getDatabaseClass() 

189 database = DatabaseClass.fromUri( 

190 config.connectionString, origin=config.get("origin", 0), namespace=config.get("namespace") 

191 ) 

192 managerTypes = RegistryManagerTypes.fromConfig(config) 

193 managers = managerTypes.makeRepo(database, dimensionConfig) 

194 return cls(database, RegistryDefaults(), managers) 

195 

196 @classmethod 

197 def fromConfig( 

198 cls, 

199 config: ButlerConfig | RegistryConfig | Config | str, 

200 butlerRoot: ResourcePathExpression | None = None, 

201 writeable: bool = True, 

202 defaults: RegistryDefaults | None = None, 

203 ) -> SqlRegistry: 

204 """Create `Registry` subclass instance from `config`. 

205 

206 Registry database must be initialized prior to calling this method. 

207 

208 Parameters 

209 ---------- 

210 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str` 

211 Registry configuration. 

212 butlerRoot : `lsst.resources.ResourcePathExpression`, optional 

213 Path to the repository root this `Registry` will manage. 

214 writeable : `bool`, optional 

215 If `True` (default) create a read-write connection to the database. 

216 defaults : `RegistryDefaults`, optional 

217 Default collection search path and/or output `~CollectionType.RUN` 

218 collection. 

219 

220 Returns 

221 ------- 

222 registry : `SqlRegistry` 

223 A new `SqlRegistry` subclass instance. 

224 """ 

225 config = cls.forceRegistryConfig(config) 

226 config.replaceRoot(butlerRoot) 

227 DatabaseClass = config.getDatabaseClass() 

228 database = DatabaseClass.fromUri( 

229 config.connectionString, 

230 origin=config.get("origin", 0), 

231 namespace=config.get("namespace"), 

232 writeable=writeable, 

233 ) 

234 managerTypes = RegistryManagerTypes.fromConfig(config) 

235 with database.session(): 

236 managers = managerTypes.loadRepo(database) 

237 if defaults is None: 

238 defaults = RegistryDefaults() 

239 return cls(database, defaults, managers) 

240 

241 def __init__( 

242 self, 

243 database: Database, 

244 defaults: RegistryDefaults, 

245 managers: RegistryManagerInstances, 

246 ): 

247 self._db = database 

248 self._managers = managers 

249 self.storageClasses = StorageClassFactory() 

250 # This is public to SqlRegistry's internal-to-daf_butler callers, but 

251 # it is intentionally not part of RegistryShim. 

252 self.dimension_record_cache = DimensionRecordCache( 

253 self._managers.dimensions.universe, 

254 fetch=self._managers.dimensions.fetch_cache_dict, 

255 ) 

256 # Intentionally invoke property setter to initialize defaults. This 

257 # can only be done after most of the rest of Registry has already been 

258 # initialized, and must be done before the property getter is used. 

259 self.defaults = defaults 

260 # TODO: This is currently initialized by `make_datastore_tables`, 

261 # eventually we'll need to do it during construction. 

262 # The mapping is indexed by the opaque table name. 

263 self._datastore_record_classes: Mapping[str, type[StoredDatastoreItemInfo]] = {} 

264 

265 def __str__(self) -> str: 

266 return str(self._db) 

267 

268 def __repr__(self) -> str: 

269 return f"SqlRegistry({self._db!r}, {self.dimensions!r})" 

270 

271 def isWriteable(self) -> bool: 

272 """Return `True` if this registry allows write operations, and `False` 

273 otherwise. 

274 """ 

275 return self._db.isWriteable() 

276 

277 def copy(self, defaults: RegistryDefaults | None = None) -> SqlRegistry: 

278 """Create a new `SqlRegistry` backed by the same data repository 

279 as this one and sharing a database connection pool with it, but with 

280 independent defaults and database sessions. 

281 

282 Parameters 

283 ---------- 

284 defaults : `~lsst.daf.butler.registry.RegistryDefaults`, optional 

285 Default collections and data ID values for the new registry. If 

286 not provided, ``self.defaults`` will be used (but future changes 

287 to either registry's defaults will not affect the other). 

288 

289 Returns 

290 ------- 

291 copy : `SqlRegistry` 

292 A new `SqlRegistry` instance with its own defaults. 

293 """ 

294 if defaults is None: 

295 # No need to copy, because `RegistryDefaults` is immutable; we 

296 # effectively copy on write. 

297 defaults = self.defaults 

298 db = self._db.clone() 

299 result = SqlRegistry(db, defaults, self._managers.clone(db)) 

300 result._datastore_record_classes = dict(self._datastore_record_classes) 

301 result.dimension_record_cache.load_from(self.dimension_record_cache) 

302 return result 

303 

304 @property 

305 def dimensions(self) -> DimensionUniverse: 

306 """Definitions of all dimensions recognized by this `Registry` 

307 (`DimensionUniverse`). 

308 """ 

309 return self._managers.dimensions.universe 

310 

311 @property 

312 def defaults(self) -> RegistryDefaults: 

313 """Default collection search path and/or output `~CollectionType.RUN` 

314 collection (`~lsst.daf.butler.registry.RegistryDefaults`). 

315 

316 This is an immutable struct whose components may not be set 

317 individually, but the entire struct can be set by assigning to this 

318 property. 

319 """ 

320 return self._defaults 

321 

322 @defaults.setter 

323 def defaults(self, value: RegistryDefaults) -> None: 

324 if value.run is not None: 

325 self.registerRun(value.run) 

326 value.finish(self) 

327 self._defaults = value 

328 

329 def refresh(self) -> None: 

330 """Refresh all in-memory state by querying the database. 

331 

332 This may be necessary to enable querying for entities added by other 

333 registry instances after this one was constructed. 

334 """ 

335 self.dimension_record_cache.reset() 

336 with self._db.transaction(): 

337 self._managers.refresh() 

338 

339 def caching_context(self) -> contextlib.AbstractContextManager[None]: 

340 """Return context manager that enables caching. 

341 

342 Returns 

343 ------- 

344 manager 

345 A context manager that enables client-side caching. Entering 

346 the context returns `None`. 

347 """ 

348 return self._managers.caching_context_manager() 

349 

350 @contextlib.contextmanager 

351 def transaction(self, *, savepoint: bool = False) -> Iterator[None]: 

352 """Return a context manager that represents a transaction. 

353 

354 Parameters 

355 ---------- 

356 savepoint : `bool` 

357 Whether to issue a SAVEPOINT in the database. 

358 

359 Yields 

360 ------ 

361 `None` 

362 """ 

363 with self._db.transaction(savepoint=savepoint): 

364 yield 

365 

366 def resetConnectionPool(self) -> None: 

367 """Reset SQLAlchemy connection pool for `SqlRegistry` database. 

368 

369 This operation is useful when using registry with fork-based 

370 multiprocessing. To use registry across fork boundary one has to make 

371 sure that there are no currently active connections (no session or 

372 transaction is in progress) and connection pool is reset using this 

373 method. This method should be called by the child process immediately 

374 after the fork. 

375 """ 

376 self._db._engine.dispose() 

377 

378 def registerOpaqueTable(self, tableName: str, spec: ddl.TableSpec) -> None: 

379 """Add an opaque (to the `Registry`) table for use by a `Datastore` or 

380 other data repository client. 

381 

382 Opaque table records can be added via `insertOpaqueData`, retrieved via 

383 `fetchOpaqueData`, and removed via `deleteOpaqueData`. 

384 

385 Parameters 

386 ---------- 

387 tableName : `str` 

388 Logical name of the opaque table. This may differ from the 

389 actual name used in the database by a prefix and/or suffix. 

390 spec : `ddl.TableSpec` 

391 Specification for the table to be added. 

392 """ 

393 self._managers.opaque.register(tableName, spec) 

394 

395 @transactional 

396 def insertOpaqueData(self, tableName: str, *data: dict) -> None: 

397 """Insert records into an opaque table. 

398 

399 Parameters 

400 ---------- 

401 tableName : `str` 

402 Logical name of the opaque table. Must match the name used in a 

403 previous call to `registerOpaqueTable`. 

404 *data 

405 Each additional positional argument is a dictionary that represents 

406 a single row to be added. 

407 """ 

408 self._managers.opaque[tableName].insert(*data) 

409 

410 def fetchOpaqueData(self, tableName: str, **where: Any) -> Iterator[Mapping[str, Any]]: 

411 """Retrieve records from an opaque table. 

412 

413 Parameters 

414 ---------- 

415 tableName : `str` 

416 Logical name of the opaque table. Must match the name used in a 

417 previous call to `registerOpaqueTable`. 

418 **where 

419 Additional keyword arguments are interpreted as equality 

420 constraints that restrict the returned rows (combined with AND); 

421 keyword arguments are column names and values are the values they 

422 must have. 

423 

424 Yields 

425 ------ 

426 row : `dict` 

427 A dictionary representing a single result row. 

428 """ 

429 yield from self._managers.opaque[tableName].fetch(**where) 

430 

431 @transactional 

432 def deleteOpaqueData(self, tableName: str, **where: Any) -> None: 

433 """Remove records from an opaque table. 

434 

435 Parameters 

436 ---------- 

437 tableName : `str` 

438 Logical name of the opaque table. Must match the name used in a 

439 previous call to `registerOpaqueTable`. 

440 **where 

441 Additional keyword arguments are interpreted as equality 

442 constraints that restrict the deleted rows (combined with AND); 

443 keyword arguments are column names and values are the values they 

444 must have. 

445 """ 

446 self._managers.opaque[tableName].delete(where.keys(), where) 

447 

448 def registerCollection( 

449 self, name: str, type: CollectionType = CollectionType.TAGGED, doc: str | None = None 

450 ) -> bool: 

451 """Add a new collection if one with the given name does not exist. 

452 

453 Parameters 

454 ---------- 

455 name : `str` 

456 The name of the collection to create. 

457 type : `CollectionType` 

458 Enum value indicating the type of collection to create. 

459 doc : `str`, optional 

460 Documentation string for the collection. 

461 

462 Returns 

463 ------- 

464 registered : `bool` 

465 Boolean indicating whether the collection was already registered 

466 or was created by this call. 

467 

468 Notes 

469 ----- 

470 This method cannot be called within transactions, as it needs to be 

471 able to perform its own transaction to be concurrent. 

472 """ 

473 _, registered = self._managers.collections.register(name, type, doc=doc) 

474 return registered 

475 

476 def getCollectionType(self, name: str) -> CollectionType: 

477 """Return an enumeration value indicating the type of the given 

478 collection. 

479 

480 Parameters 

481 ---------- 

482 name : `str` 

483 The name of the collection. 

484 

485 Returns 

486 ------- 

487 type : `CollectionType` 

488 Enum value indicating the type of this collection. 

489 

490 Raises 

491 ------ 

492 lsst.daf.butler.registry.MissingCollectionError 

493 Raised if no collection with the given name exists. 

494 """ 

495 return self._managers.collections.find(name).type 

496 

497 def get_collection_record(self, name: str) -> CollectionRecord: 

498 """Return the record for this collection. 

499 

500 Parameters 

501 ---------- 

502 name : `str` 

503 Name of the collection for which the record is to be retrieved. 

504 

505 Returns 

506 ------- 

507 record : `CollectionRecord` 

508 The record for this collection. 

509 """ 

510 return self._managers.collections.find(name) 

511 

512 def registerRun(self, name: str, doc: str | None = None) -> bool: 

513 """Add a new run if one with the given name does not exist. 

514 

515 Parameters 

516 ---------- 

517 name : `str` 

518 The name of the run to create. 

519 doc : `str`, optional 

520 Documentation string for the collection. 

521 

522 Returns 

523 ------- 

524 registered : `bool` 

525 Boolean indicating whether a new run was registered. `False` 

526 if it already existed. 

527 

528 Notes 

529 ----- 

530 This method cannot be called within transactions, as it needs to be 

531 able to perform its own transaction to be concurrent. 

532 """ 

533 _, registered = self._managers.collections.register(name, CollectionType.RUN, doc=doc) 

534 return registered 

535 

536 @transactional 

537 def removeCollection(self, name: str) -> None: 

538 """Remove the given collection from the registry. 

539 

540 Parameters 

541 ---------- 

542 name : `str` 

543 The name of the collection to remove. 

544 

545 Raises 

546 ------ 

547 lsst.daf.butler.registry.MissingCollectionError 

548 Raised if no collection with the given name exists. 

549 sqlalchemy.exc.IntegrityError 

550 Raised if the database rows associated with the collection are 

551 still referenced by some other table, such as a dataset in a 

552 datastore (for `~CollectionType.RUN` collections only) or a 

553 `~CollectionType.CHAINED` collection of which this collection is 

554 a child. 

555 

556 Notes 

557 ----- 

558 If this is a `~CollectionType.RUN` collection, all datasets and quanta 

559 in it will removed from the `Registry` database. This requires that 

560 those datasets be removed (or at least trashed) from any datastores 

561 that hold them first. 

562 

563 A collection may not be deleted as long as it is referenced by a 

564 `~CollectionType.CHAINED` collection; the ``CHAINED`` collection must 

565 be deleted or redefined first. 

566 """ 

567 self._managers.collections.remove(name) 

568 

569 def getCollectionChain(self, parent: str) -> tuple[str, ...]: 

570 """Return the child collections in a `~CollectionType.CHAINED` 

571 collection. 

572 

573 Parameters 

574 ---------- 

575 parent : `str` 

576 Name of the chained collection. Must have already been added via 

577 a call to `Registry.registerCollection`. 

578 

579 Returns 

580 ------- 

581 children : `~collections.abc.Sequence` [ `str` ] 

582 An ordered sequence of collection names that are searched when the 

583 given chained collection is searched. 

584 

585 Raises 

586 ------ 

587 lsst.daf.butler.registry.MissingCollectionError 

588 Raised if ``parent`` does not exist in the `Registry`. 

589 lsst.daf.butler.registry.CollectionTypeError 

590 Raised if ``parent`` does not correspond to a 

591 `~CollectionType.CHAINED` collection. 

592 """ 

593 record = self._managers.collections.find(parent) 

594 if record.type is not CollectionType.CHAINED: 

595 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

596 assert isinstance(record, ChainedCollectionRecord) 

597 return record.children 

598 

599 @transactional 

600 def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None: 

601 """Define or redefine a `~CollectionType.CHAINED` collection. 

602 

603 Parameters 

604 ---------- 

605 parent : `str` 

606 Name of the chained collection. Must have already been added via 

607 a call to `Registry.registerCollection`. 

608 children : collection expression 

609 An expression defining an ordered search of child collections, 

610 generally an iterable of `str`; see 

611 :ref:`daf_butler_collection_expressions` for more information. 

612 flatten : `bool`, optional 

613 If `True` (`False` is default), recursively flatten out any nested 

614 `~CollectionType.CHAINED` collections in ``children`` first. 

615 

616 Raises 

617 ------ 

618 lsst.daf.butler.registry.MissingCollectionError 

619 Raised when any of the given collections do not exist in the 

620 `Registry`. 

621 lsst.daf.butler.registry.CollectionTypeError 

622 Raised if ``parent`` does not correspond to a 

623 `~CollectionType.CHAINED` collection. 

624 CollectionCycleError 

625 Raised if the given collections contains a cycle. 

626 

627 Notes 

628 ----- 

629 If this function is called within a call to ``Butler.transaction``, it 

630 will hold a lock that prevents other processes from modifying the 

631 parent collection until the end of the transaction. Keep these 

632 transactions short. 

633 """ 

634 children = CollectionWildcard.from_expression(children).require_ordered() 

635 if flatten: 

636 children = self.queryCollections(children, flattenChains=True) 

637 

638 self._managers.collections.update_chain(parent, list(children), allow_use_in_caching_context=True) 

639 

640 def getCollectionParentChains(self, collection: str) -> set[str]: 

641 """Return the CHAINED collections that directly contain the given one. 

642 

643 Parameters 

644 ---------- 

645 collection : `str` 

646 Name of the collection. 

647 

648 Returns 

649 ------- 

650 chains : `set` of `str` 

651 Set of `~CollectionType.CHAINED` collection names. 

652 """ 

653 return self._managers.collections.getParentChains(self._managers.collections.find(collection).key) 

654 

655 def getCollectionDocumentation(self, collection: str) -> str | None: 

656 """Retrieve the documentation string for a collection. 

657 

658 Parameters 

659 ---------- 

660 collection : `str` 

661 Name of the collection. 

662 

663 Returns 

664 ------- 

665 docs : `str` or `None` 

666 Docstring for the collection with the given name. 

667 """ 

668 return self._managers.collections.getDocumentation(self._managers.collections.find(collection).key) 

669 

670 def setCollectionDocumentation(self, collection: str, doc: str | None) -> None: 

671 """Set the documentation string for a collection. 

672 

673 Parameters 

674 ---------- 

675 collection : `str` 

676 Name of the collection. 

677 doc : `str` or `None` 

678 Docstring for the collection with the given name; will replace any 

679 existing docstring. Passing `None` will remove any existing 

680 docstring. 

681 """ 

682 self._managers.collections.setDocumentation(self._managers.collections.find(collection).key, doc) 

683 

684 def getCollectionSummary(self, collection: str) -> CollectionSummary: 

685 """Return a summary for the given collection. 

686 

687 Parameters 

688 ---------- 

689 collection : `str` 

690 Name of the collection for which a summary is to be retrieved. 

691 

692 Returns 

693 ------- 

694 summary : `~lsst.daf.butler.registry.CollectionSummary` 

695 Summary of the dataset types and governor dimension values in 

696 this collection. 

697 """ 

698 record = self._managers.collections.find(collection) 

699 return self._managers.datasets.getCollectionSummary(record) 

700 

701 def registerDatasetType(self, datasetType: DatasetType) -> bool: 

702 """Add a new `DatasetType` to the Registry. 

703 

704 It is not an error to register the same `DatasetType` twice. 

705 

706 Parameters 

707 ---------- 

708 datasetType : `DatasetType` 

709 The `DatasetType` to be added. 

710 

711 Returns 

712 ------- 

713 inserted : `bool` 

714 `True` if ``datasetType`` was inserted, `False` if an identical 

715 existing `DatasetType` was found. Note that in either case the 

716 DatasetType is guaranteed to be defined in the Registry 

717 consistently with the given definition. 

718 

719 Raises 

720 ------ 

721 ValueError 

722 Raised if the dimensions or storage class are invalid. 

723 lsst.daf.butler.registry.ConflictingDefinitionError 

724 Raised if this `DatasetType` is already registered with a different 

725 definition. 

726 

727 Notes 

728 ----- 

729 This method cannot be called within transactions, as it needs to be 

730 able to perform its own transaction to be concurrent. 

731 """ 

732 return self._managers.datasets.register(datasetType) 

733 

734 def removeDatasetType(self, name: str | tuple[str, ...]) -> None: 

735 """Remove the named `DatasetType` from the registry. 

736 

737 .. warning:: 

738 

739 Registry implementations can cache the dataset type definitions. 

740 This means that deleting the dataset type definition may result in 

741 unexpected behavior from other butler processes that are active 

742 that have not seen the deletion. 

743 

744 Parameters 

745 ---------- 

746 name : `str` or `tuple` [`str`] 

747 Name of the type to be removed or tuple containing a list of type 

748 names to be removed. Wildcards are allowed. 

749 

750 Raises 

751 ------ 

752 lsst.daf.butler.registry.OrphanedRecordError 

753 Raised if an attempt is made to remove the dataset type definition 

754 when there are already datasets associated with it. 

755 

756 Notes 

757 ----- 

758 If the dataset type is not registered the method will return without 

759 action. 

760 """ 

761 for datasetTypeExpression in ensure_iterable(name): 

762 # Catch any warnings from the caller specifying a component 

763 # dataset type. This will result in an error later but the 

764 # warning could be confusing when the caller is not querying 

765 # anything. 

766 with warnings.catch_warnings(): 

767 warnings.simplefilter("ignore", category=FutureWarning) 

768 datasetTypes = list(self.queryDatasetTypes(datasetTypeExpression)) 

769 if not datasetTypes: 

770 _LOG.info("Dataset type %r not defined", datasetTypeExpression) 

771 else: 

772 for datasetType in datasetTypes: 

773 self._managers.datasets.remove(datasetType.name) 

774 _LOG.info("Removed dataset type %r", datasetType.name) 

775 

776 def getDatasetType(self, name: str) -> DatasetType: 

777 """Get the `DatasetType`. 

778 

779 Parameters 

780 ---------- 

781 name : `str` 

782 Name of the type. 

783 

784 Returns 

785 ------- 

786 type : `DatasetType` 

787 The `DatasetType` associated with the given name. 

788 

789 Raises 

790 ------ 

791 lsst.daf.butler.registry.MissingDatasetTypeError 

792 Raised if the requested dataset type has not been registered. 

793 

794 Notes 

795 ----- 

796 This method handles component dataset types automatically, though most 

797 other registry operations do not. 

798 """ 

799 parent_name, component = DatasetType.splitDatasetTypeName(name) 

800 storage = self._managers.datasets[parent_name] 

801 if component is None: 

802 return storage.datasetType 

803 else: 

804 return storage.datasetType.makeComponentDatasetType(component) 

805 

806 def supportsIdGenerationMode(self, mode: DatasetIdGenEnum) -> bool: 

807 """Test whether the given dataset ID generation mode is supported by 

808 `insertDatasets`. 

809 

810 Parameters 

811 ---------- 

812 mode : `DatasetIdGenEnum` 

813 Enum value for the mode to test. 

814 

815 Returns 

816 ------- 

817 supported : `bool` 

818 Whether the given mode is supported. 

819 """ 

820 return self._managers.datasets.supportsIdGenerationMode(mode) 

821 

822 def findDataset( 

823 self, 

824 datasetType: DatasetType | str, 

825 dataId: DataId | None = None, 

826 *, 

827 collections: CollectionArgType | None = None, 

828 timespan: Timespan | None = None, 

829 datastore_records: bool = False, 

830 **kwargs: Any, 

831 ) -> DatasetRef | None: 

832 """Find a dataset given its `DatasetType` and data ID. 

833 

834 This can be used to obtain a `DatasetRef` that permits the dataset to 

835 be read from a `Datastore`. If the dataset is a component and can not 

836 be found using the provided dataset type, a dataset ref for the parent 

837 will be returned instead but with the correct dataset type. 

838 

839 Parameters 

840 ---------- 

841 datasetType : `DatasetType` or `str` 

842 A `DatasetType` or the name of one. If this is a `DatasetType` 

843 instance, its storage class will be respected and propagated to 

844 the output, even if it differs from the dataset type definition 

845 in the registry, as long as the storage classes are convertible. 

846 dataId : `dict` or `DataCoordinate`, optional 

847 A `dict`-like object containing the `Dimension` links that identify 

848 the dataset within a collection. 

849 collections : collection expression, optional 

850 An expression that fully or partially identifies the collections to 

851 search for the dataset; see 

852 :ref:`daf_butler_collection_expressions` for more information. 

853 Defaults to ``self.defaults.collections``. 

854 timespan : `Timespan`, optional 

855 A timespan that the validity range of the dataset must overlap. 

856 If not provided, any `~CollectionType.CALIBRATION` collections 

857 matched by the ``collections`` argument will not be searched. 

858 datastore_records : `bool`, optional 

859 Whether to attach datastore records to the `DatasetRef`. 

860 **kwargs 

861 Additional keyword arguments passed to 

862 `DataCoordinate.standardize` to convert ``dataId`` to a true 

863 `DataCoordinate` or augment an existing one. 

864 

865 Returns 

866 ------- 

867 ref : `DatasetRef` 

868 A reference to the dataset, or `None` if no matching Dataset 

869 was found. 

870 

871 Raises 

872 ------ 

873 lsst.daf.butler.registry.NoDefaultCollectionError 

874 Raised if ``collections`` is `None` and 

875 ``self.defaults.collections`` is `None`. 

876 LookupError 

877 Raised if one or more data ID keys are missing. 

878 lsst.daf.butler.registry.MissingDatasetTypeError 

879 Raised if the dataset type does not exist. 

880 lsst.daf.butler.registry.MissingCollectionError 

881 Raised if any of ``collections`` does not exist in the registry. 

882 

883 Notes 

884 ----- 

885 This method simply returns `None` and does not raise an exception even 

886 when the set of collections searched is intrinsically incompatible with 

887 the dataset type, e.g. if ``datasetType.isCalibration() is False``, but 

888 only `~CollectionType.CALIBRATION` collections are being searched. 

889 This may make it harder to debug some lookup failures, but the behavior 

890 is intentional; we consider it more important that failed searches are 

891 reported consistently, regardless of the reason, and that adding 

892 additional collections that do not contain a match to the search path 

893 never changes the behavior. 

894 

895 This method handles component dataset types automatically, though most 

896 other registry operations do not. 

897 """ 

898 if collections is None: 

899 if not self.defaults.collections: 

900 raise NoDefaultCollectionError( 

901 "No collections provided to findDataset, and no defaults from registry construction." 

902 ) 

903 collections = self.defaults.collections 

904 backend = queries.SqlQueryBackend(self._db, self._managers, self.dimension_record_cache) 

905 with backend.caching_context(): 

906 collection_wildcard = CollectionWildcard.from_expression(collections, require_ordered=True) 

907 if collection_wildcard.empty(): 

908 return None 

909 matched_collections = backend.resolve_collection_wildcard(collection_wildcard) 

910 resolved_dataset_type = backend.resolve_single_dataset_type_wildcard(datasetType) 

911 dataId = DataCoordinate.standardize( 

912 dataId, 

913 dimensions=resolved_dataset_type.dimensions, 

914 universe=self.dimensions, 

915 defaults=self.defaults.dataId, 

916 **kwargs, 

917 ) 

918 governor_constraints = {name: {cast(str, dataId[name])} for name in dataId.dimensions.governors} 

919 (filtered_collections,) = backend.filter_dataset_collections( 

920 [resolved_dataset_type], 

921 matched_collections, 

922 governor_constraints=governor_constraints, 

923 ).values() 

924 if not filtered_collections: 

925 return None 

926 if timespan is None: 

927 filtered_collections = [ 

928 collection_record 

929 for collection_record in filtered_collections 

930 if collection_record.type is not CollectionType.CALIBRATION 

931 ] 

932 if filtered_collections: 

933 requested_columns = {"dataset_id", "run", "collection"} 

934 with backend.context() as context: 

935 predicate = context.make_data_coordinate_predicate( 

936 dataId.subset(resolved_dataset_type.dimensions), full=False 

937 ) 

938 if timespan is not None: 

939 requested_columns.add("timespan") 

940 predicate = predicate.logical_and( 

941 context.make_timespan_overlap_predicate( 

942 DatasetColumnTag(resolved_dataset_type.name, "timespan"), timespan 

943 ) 

944 ) 

945 relation = backend.make_dataset_query_relation( 

946 resolved_dataset_type, filtered_collections, requested_columns, context 

947 ).with_rows_satisfying(predicate) 

948 rows = list(context.fetch_iterable(relation)) 

949 else: 

950 rows = [] 

951 if not rows: 

952 return None 

953 elif len(rows) == 1: 

954 best_row = rows[0] 

955 else: 

956 rank_by_collection_key = {record.key: n for n, record in enumerate(filtered_collections)} 

957 collection_tag = DatasetColumnTag(resolved_dataset_type.name, "collection") 

958 row_iter = iter(rows) 

959 best_row = next(row_iter) 

960 best_rank = rank_by_collection_key[best_row[collection_tag]] 

961 have_tie = False 

962 for row in row_iter: 

963 if (rank := rank_by_collection_key[row[collection_tag]]) < best_rank: 

964 best_row = row 

965 best_rank = rank 

966 have_tie = False 

967 elif rank == best_rank: 

968 have_tie = True 

969 assert timespan is not None, "Rank ties should be impossible given DB constraints." 

970 if have_tie: 

971 raise CalibrationLookupError( 

972 f"Ambiguous calibration lookup for {resolved_dataset_type.name} in collections " 

973 f"{collection_wildcard.strings} with timespan {timespan}." 

974 ) 

975 reader = queries.DatasetRefReader( 

976 resolved_dataset_type, 

977 translate_collection=lambda k: self._managers.collections[k].name, 

978 ) 

979 ref = reader.read(best_row, data_id=dataId) 

980 if datastore_records: 

981 ref = self.get_datastore_records(ref) 

982 

983 return ref 

984 

985 @transactional 

986 def insertDatasets( 

987 self, 

988 datasetType: DatasetType | str, 

989 dataIds: Iterable[DataId], 

990 run: str | None = None, 

991 expand: bool = True, 

992 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

993 ) -> list[DatasetRef]: 

994 """Insert one or more datasets into the `Registry`. 

995 

996 This always adds new datasets; to associate existing datasets with 

997 a new collection, use ``associate``. 

998 

999 Parameters 

1000 ---------- 

1001 datasetType : `DatasetType` or `str` 

1002 A `DatasetType` or the name of one. 

1003 dataIds : `~collections.abc.Iterable` of `dict` or `DataCoordinate` 

1004 Dimension-based identifiers for the new datasets. 

1005 run : `str`, optional 

1006 The name of the run that produced the datasets. Defaults to 

1007 ``self.defaults.run``. 

1008 expand : `bool`, optional 

1009 If `True` (default), expand data IDs as they are inserted. This is 

1010 necessary in general to allow datastore to generate file templates, 

1011 but it may be disabled if the caller can guarantee this is 

1012 unnecessary. 

1013 idGenerationMode : `DatasetIdGenEnum`, optional 

1014 Specifies option for generating dataset IDs. By default unique IDs 

1015 are generated for each inserted dataset. 

1016 

1017 Returns 

1018 ------- 

1019 refs : `list` of `DatasetRef` 

1020 Resolved `DatasetRef` instances for all given data IDs (in the same 

1021 order). 

1022 

1023 Raises 

1024 ------ 

1025 lsst.daf.butler.registry.DatasetTypeError 

1026 Raised if ``datasetType`` is not known to registry. 

1027 lsst.daf.butler.registry.CollectionTypeError 

1028 Raised if ``run`` collection type is not `~CollectionType.RUN`. 

1029 lsst.daf.butler.registry.NoDefaultCollectionError 

1030 Raised if ``run`` is `None` and ``self.defaults.run`` is `None`. 

1031 lsst.daf.butler.registry.ConflictingDefinitionError 

1032 If a dataset with the same dataset type and data ID as one of those 

1033 given already exists in ``run``. 

1034 lsst.daf.butler.registry.MissingCollectionError 

1035 Raised if ``run`` does not exist in the registry. 

1036 """ 

1037 if isinstance(datasetType, DatasetType): 

1038 storage = self._managers.datasets.find(datasetType.name) 

1039 if storage is None: 

1040 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.") 

1041 else: 

1042 storage = self._managers.datasets.find(datasetType) 

1043 if storage is None: 

1044 raise DatasetTypeError(f"DatasetType with name '{datasetType}' has not been registered.") 

1045 if run is None: 

1046 if self.defaults.run is None: 

1047 raise NoDefaultCollectionError( 

1048 "No run provided to insertDatasets, and no default from registry construction." 

1049 ) 

1050 run = self.defaults.run 

1051 runRecord = self._managers.collections.find(run) 

1052 if runRecord.type is not CollectionType.RUN: 

1053 raise CollectionTypeError( 

1054 f"Given collection is of type {runRecord.type.name}; RUN collection required." 

1055 ) 

1056 assert isinstance(runRecord, RunRecord) 

1057 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG) 

1058 if expand: 

1059 expandedDataIds = [ 

1060 self.expandDataId(dataId, dimensions=storage.datasetType.dimensions) 

1061 for dataId in progress.wrap(dataIds, f"Expanding {storage.datasetType.name} data IDs") 

1062 ] 

1063 else: 

1064 expandedDataIds = [ 

1065 DataCoordinate.standardize(dataId, graph=storage.datasetType.dimensions) for dataId in dataIds 

1066 ] 

1067 try: 

1068 refs = list(storage.insert(runRecord, expandedDataIds, idGenerationMode)) 

1069 if self._managers.obscore: 

1070 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

1071 self._managers.obscore.add_datasets(refs, context) 

1072 except sqlalchemy.exc.IntegrityError as err: 

1073 raise ConflictingDefinitionError( 

1074 "A database constraint failure was triggered by inserting " 

1075 f"one or more datasets of type {storage.datasetType} into " 

1076 f"collection '{run}'. " 

1077 "This probably means a dataset with the same data ID " 

1078 "and dataset type already exists, but it may also mean a " 

1079 "dimension row is missing." 

1080 ) from err 

1081 return refs 

1082 

1083 @transactional 

1084 def _importDatasets( 

1085 self, 

1086 datasets: Iterable[DatasetRef], 

1087 expand: bool = True, 

1088 ) -> list[DatasetRef]: 

1089 """Import one or more datasets into the `Registry`. 

1090 

1091 Difference from `insertDatasets` method is that this method accepts 

1092 `DatasetRef` instances which should already be resolved and have a 

1093 dataset ID. If registry supports globally-unique dataset IDs (e.g. 

1094 `uuid.UUID`) then datasets which already exist in the registry will be 

1095 ignored if imported again. 

1096 

1097 Parameters 

1098 ---------- 

1099 datasets : `~collections.abc.Iterable` of `DatasetRef` 

1100 Datasets to be inserted. All `DatasetRef` instances must have 

1101 identical ``datasetType`` and ``run`` attributes. ``run`` 

1102 attribute can be `None` and defaults to ``self.defaults.run``. 

1103 Datasets can specify ``id`` attribute which will be used for 

1104 inserted datasets. All dataset IDs must have the same type 

1105 (`int` or `uuid.UUID`), if type of dataset IDs does not match 

1106 configured backend then IDs will be ignored and new IDs will be 

1107 generated by backend. 

1108 expand : `bool`, optional 

1109 If `True` (default), expand data IDs as they are inserted. This is 

1110 necessary in general, but it may be disabled if the caller can 

1111 guarantee this is unnecessary. 

1112 

1113 Returns 

1114 ------- 

1115 refs : `list` of `DatasetRef` 

1116 Resolved `DatasetRef` instances for all given data IDs (in the same 

1117 order). If any of ``datasets`` has an ID which already exists in 

1118 the database then it will not be inserted or updated, but a 

1119 resolved `DatasetRef` will be returned for it in any case. 

1120 

1121 Raises 

1122 ------ 

1123 lsst.daf.butler.registry.NoDefaultCollectionError 

1124 Raised if ``run`` is `None` and ``self.defaults.run`` is `None`. 

1125 lsst.daf.butler.registry.DatasetTypeError 

1126 Raised if datasets correspond to more than one dataset type or 

1127 dataset type is not known to registry. 

1128 lsst.daf.butler.registry.ConflictingDefinitionError 

1129 If a dataset with the same dataset type and data ID as one of those 

1130 given already exists in ``run``. 

1131 lsst.daf.butler.registry.MissingCollectionError 

1132 Raised if ``run`` does not exist in the registry. 

1133 

1134 Notes 

1135 ----- 

1136 This method is considered package-private and internal to Butler 

1137 implementation. Clients outside daf_butler package should not use this 

1138 method. 

1139 """ 

1140 datasets = list(datasets) 

1141 if not datasets: 

1142 # nothing to do 

1143 return [] 

1144 

1145 # find dataset type 

1146 datasetTypes = {dataset.datasetType for dataset in datasets} 

1147 if len(datasetTypes) != 1: 

1148 raise DatasetTypeError(f"Multiple dataset types in input datasets: {datasetTypes}") 

1149 datasetType = datasetTypes.pop() 

1150 

1151 # get storage handler for this dataset type 

1152 storage = self._managers.datasets.find(datasetType.name) 

1153 if storage is None: 

1154 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.") 

1155 

1156 # find run name 

1157 runs = {dataset.run for dataset in datasets} 

1158 if len(runs) != 1: 

1159 raise ValueError(f"Multiple run names in input datasets: {runs}") 

1160 run = runs.pop() 

1161 

1162 runRecord = self._managers.collections.find(run) 

1163 if runRecord.type is not CollectionType.RUN: 

1164 raise CollectionTypeError( 

1165 f"Given collection '{runRecord.name}' is of type {runRecord.type.name};" 

1166 " RUN collection required." 

1167 ) 

1168 assert isinstance(runRecord, RunRecord) 

1169 

1170 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG) 

1171 if expand: 

1172 expandedDatasets = [ 

1173 dataset.expanded(self.expandDataId(dataset.dataId, dimensions=storage.datasetType.dimensions)) 

1174 for dataset in progress.wrap(datasets, f"Expanding {storage.datasetType.name} data IDs") 

1175 ] 

1176 else: 

1177 expandedDatasets = [ 

1178 DatasetRef(datasetType, dataset.dataId, id=dataset.id, run=dataset.run, conform=True) 

1179 for dataset in datasets 

1180 ] 

1181 

1182 try: 

1183 refs = list(storage.import_(runRecord, expandedDatasets)) 

1184 if self._managers.obscore: 

1185 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

1186 self._managers.obscore.add_datasets(refs, context) 

1187 except sqlalchemy.exc.IntegrityError as err: 

1188 raise ConflictingDefinitionError( 

1189 "A database constraint failure was triggered by inserting " 

1190 f"one or more datasets of type {storage.datasetType} into " 

1191 f"collection '{run}'. " 

1192 "This probably means a dataset with the same data ID " 

1193 "and dataset type already exists, but it may also mean a " 

1194 "dimension row is missing." 

1195 ) from err 

1196 # Check that imported dataset IDs match the input 

1197 for imported_ref, input_ref in zip(refs, datasets, strict=True): 

1198 if imported_ref.id != input_ref.id: 

1199 raise RegistryConsistencyError( 

1200 "Imported dataset ID differs from input dataset ID, " 

1201 f"input ref: {input_ref}, imported ref: {imported_ref}" 

1202 ) 

1203 return refs 

1204 

1205 def getDataset(self, id: DatasetId) -> DatasetRef | None: 

1206 """Retrieve a Dataset entry. 

1207 

1208 Parameters 

1209 ---------- 

1210 id : `DatasetId` 

1211 The unique identifier for the dataset. 

1212 

1213 Returns 

1214 ------- 

1215 ref : `DatasetRef` or `None` 

1216 A ref to the Dataset, or `None` if no matching Dataset 

1217 was found. 

1218 """ 

1219 return self._managers.datasets.getDatasetRef(id) 

1220 

1221 @transactional 

1222 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None: 

1223 """Remove datasets from the Registry. 

1224 

1225 The datasets will be removed unconditionally from all collections, and 

1226 any `Quantum` that consumed this dataset will instead be marked with 

1227 having a NULL input. `Datastore` records will *not* be deleted; the 

1228 caller is responsible for ensuring that the dataset has already been 

1229 removed from all Datastores. 

1230 

1231 Parameters 

1232 ---------- 

1233 refs : `~collections.abc.Iterable` [`DatasetRef`] 

1234 References to the datasets to be removed. Must include a valid 

1235 ``id`` attribute, and should be considered invalidated upon return. 

1236 

1237 Raises 

1238 ------ 

1239 lsst.daf.butler.AmbiguousDatasetError 

1240 Raised if any ``ref.id`` is `None`. 

1241 lsst.daf.butler.registry.OrphanedRecordError 

1242 Raised if any dataset is still present in any `Datastore`. 

1243 """ 

1244 progress = Progress("lsst.daf.butler.Registry.removeDatasets", level=logging.DEBUG) 

1245 for datasetType, refsForType in progress.iter_item_chunks( 

1246 DatasetRef.iter_by_type(refs), desc="Removing datasets by type" 

1247 ): 

1248 storage = self._managers.datasets[datasetType.name] 

1249 try: 

1250 storage.delete(refsForType) 

1251 except sqlalchemy.exc.IntegrityError as err: 

1252 raise OrphanedRecordError( 

1253 "One or more datasets is still present in one or more Datastores." 

1254 ) from err 

1255 

1256 @transactional 

1257 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

1258 """Add existing datasets to a `~CollectionType.TAGGED` collection. 

1259 

1260 If a DatasetRef with the same exact ID is already in a collection 

1261 nothing is changed. If a `DatasetRef` with the same `DatasetType` and 

1262 data ID but with different ID exists in the collection, 

1263 `~lsst.daf.butler.registry.ConflictingDefinitionError` is raised. 

1264 

1265 Parameters 

1266 ---------- 

1267 collection : `str` 

1268 Indicates the collection the datasets should be associated with. 

1269 refs : `~collections.abc.Iterable` [ `DatasetRef` ] 

1270 An iterable of resolved `DatasetRef` instances that already exist 

1271 in this `Registry`. 

1272 

1273 Raises 

1274 ------ 

1275 lsst.daf.butler.registry.ConflictingDefinitionError 

1276 If a Dataset with the given `DatasetRef` already exists in the 

1277 given collection. 

1278 lsst.daf.butler.registry.MissingCollectionError 

1279 Raised if ``collection`` does not exist in the registry. 

1280 lsst.daf.butler.registry.CollectionTypeError 

1281 Raise adding new datasets to the given ``collection`` is not 

1282 allowed. 

1283 """ 

1284 progress = Progress("lsst.daf.butler.Registry.associate", level=logging.DEBUG) 

1285 collectionRecord = self._managers.collections.find(collection) 

1286 if collectionRecord.type is not CollectionType.TAGGED: 

1287 raise CollectionTypeError( 

1288 f"Collection '{collection}' has type {collectionRecord.type.name}, not TAGGED." 

1289 ) 

1290 for datasetType, refsForType in progress.iter_item_chunks( 

1291 DatasetRef.iter_by_type(refs), desc="Associating datasets by type" 

1292 ): 

1293 storage = self._managers.datasets[datasetType.name] 

1294 try: 

1295 storage.associate(collectionRecord, refsForType) 

1296 if self._managers.obscore: 

1297 # If a TAGGED collection is being monitored by ObsCore 

1298 # manager then we may need to save the dataset. 

1299 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

1300 self._managers.obscore.associate(refsForType, collectionRecord, context) 

1301 except sqlalchemy.exc.IntegrityError as err: 

1302 raise ConflictingDefinitionError( 

1303 f"Constraint violation while associating dataset of type {datasetType.name} with " 

1304 f"collection {collection}. This probably means that one or more datasets with the same " 

1305 "dataset type and data ID already exist in the collection, but it may also indicate " 

1306 "that the datasets do not exist." 

1307 ) from err 

1308 

1309 @transactional 

1310 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

1311 """Remove existing datasets from a `~CollectionType.TAGGED` collection. 

1312 

1313 ``collection`` and ``ref`` combinations that are not currently 

1314 associated are silently ignored. 

1315 

1316 Parameters 

1317 ---------- 

1318 collection : `str` 

1319 The collection the datasets should no longer be associated with. 

1320 refs : `~collections.abc.Iterable` [ `DatasetRef` ] 

1321 An iterable of resolved `DatasetRef` instances that already exist 

1322 in this `Registry`. 

1323 

1324 Raises 

1325 ------ 

1326 lsst.daf.butler.AmbiguousDatasetError 

1327 Raised if any of the given dataset references is unresolved. 

1328 lsst.daf.butler.registry.MissingCollectionError 

1329 Raised if ``collection`` does not exist in the registry. 

1330 lsst.daf.butler.registry.CollectionTypeError 

1331 Raise adding new datasets to the given ``collection`` is not 

1332 allowed. 

1333 """ 

1334 progress = Progress("lsst.daf.butler.Registry.disassociate", level=logging.DEBUG) 

1335 collectionRecord = self._managers.collections.find(collection) 

1336 if collectionRecord.type is not CollectionType.TAGGED: 

1337 raise CollectionTypeError( 

1338 f"Collection '{collection}' has type {collectionRecord.type.name}; expected TAGGED." 

1339 ) 

1340 for datasetType, refsForType in progress.iter_item_chunks( 

1341 DatasetRef.iter_by_type(refs), desc="Disassociating datasets by type" 

1342 ): 

1343 storage = self._managers.datasets[datasetType.name] 

1344 storage.disassociate(collectionRecord, refsForType) 

1345 if self._managers.obscore: 

1346 self._managers.obscore.disassociate(refsForType, collectionRecord) 

1347 

1348 @transactional 

1349 def certify(self, collection: str, refs: Iterable[DatasetRef], timespan: Timespan) -> None: 

1350 """Associate one or more datasets with a calibration collection and a 

1351 validity range within it. 

1352 

1353 Parameters 

1354 ---------- 

1355 collection : `str` 

1356 The name of an already-registered `~CollectionType.CALIBRATION` 

1357 collection. 

1358 refs : `~collections.abc.Iterable` [ `DatasetRef` ] 

1359 Datasets to be associated. 

1360 timespan : `Timespan` 

1361 The validity range for these datasets within the collection. 

1362 

1363 Raises 

1364 ------ 

1365 lsst.daf.butler.AmbiguousDatasetError 

1366 Raised if any of the given `DatasetRef` instances is unresolved. 

1367 lsst.daf.butler.registry.ConflictingDefinitionError 

1368 Raised if the collection already contains a different dataset with 

1369 the same `DatasetType` and data ID and an overlapping validity 

1370 range. 

1371 lsst.daf.butler.registry.CollectionTypeError 

1372 Raised if ``collection`` is not a `~CollectionType.CALIBRATION` 

1373 collection or if one or more datasets are of a dataset type for 

1374 which `DatasetType.isCalibration` returns `False`. 

1375 """ 

1376 progress = Progress("lsst.daf.butler.Registry.certify", level=logging.DEBUG) 

1377 collectionRecord = self._managers.collections.find(collection) 

1378 for datasetType, refsForType in progress.iter_item_chunks( 

1379 DatasetRef.iter_by_type(refs), desc="Certifying datasets by type" 

1380 ): 

1381 storage = self._managers.datasets[datasetType.name] 

1382 storage.certify( 

1383 collectionRecord, 

1384 refsForType, 

1385 timespan, 

1386 context=queries.SqlQueryContext(self._db, self._managers.column_types), 

1387 ) 

1388 

1389 @transactional 

1390 def decertify( 

1391 self, 

1392 collection: str, 

1393 datasetType: str | DatasetType, 

1394 timespan: Timespan, 

1395 *, 

1396 dataIds: Iterable[DataId] | None = None, 

1397 ) -> None: 

1398 """Remove or adjust datasets to clear a validity range within a 

1399 calibration collection. 

1400 

1401 Parameters 

1402 ---------- 

1403 collection : `str` 

1404 The name of an already-registered `~CollectionType.CALIBRATION` 

1405 collection. 

1406 datasetType : `str` or `DatasetType` 

1407 Name or `DatasetType` instance for the datasets to be decertified. 

1408 timespan : `Timespan`, optional 

1409 The validity range to remove datasets from within the collection. 

1410 Datasets that overlap this range but are not contained by it will 

1411 have their validity ranges adjusted to not overlap it, which may 

1412 split a single dataset validity range into two. 

1413 dataIds : iterable [`dict` or `DataCoordinate`], optional 

1414 Data IDs that should be decertified within the given validity range 

1415 If `None`, all data IDs for ``self.datasetType`` will be 

1416 decertified. 

1417 

1418 Raises 

1419 ------ 

1420 lsst.daf.butler.registry.CollectionTypeError 

1421 Raised if ``collection`` is not a `~CollectionType.CALIBRATION` 

1422 collection or if ``datasetType.isCalibration() is False``. 

1423 """ 

1424 collectionRecord = self._managers.collections.find(collection) 

1425 if isinstance(datasetType, str): 

1426 storage = self._managers.datasets[datasetType] 

1427 else: 

1428 storage = self._managers.datasets[datasetType.name] 

1429 standardizedDataIds = None 

1430 if dataIds is not None: 

1431 standardizedDataIds = [ 

1432 DataCoordinate.standardize(d, dimensions=storage.datasetType.dimensions) for d in dataIds 

1433 ] 

1434 storage.decertify( 

1435 collectionRecord, 

1436 timespan, 

1437 dataIds=standardizedDataIds, 

1438 context=queries.SqlQueryContext(self._db, self._managers.column_types), 

1439 ) 

1440 

1441 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager: 

1442 """Return an object that allows a new `Datastore` instance to 

1443 communicate with this `Registry`. 

1444 

1445 Returns 

1446 ------- 

1447 manager : `~.interfaces.DatastoreRegistryBridgeManager` 

1448 Object that mediates communication between this `Registry` and its 

1449 associated datastores. 

1450 """ 

1451 return self._managers.datastores 

1452 

1453 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]: 

1454 """Retrieve datastore locations for a given dataset. 

1455 

1456 Parameters 

1457 ---------- 

1458 ref : `DatasetRef` 

1459 A reference to the dataset for which to retrieve storage 

1460 information. 

1461 

1462 Returns 

1463 ------- 

1464 datastores : `~collections.abc.Iterable` [ `str` ] 

1465 All the matching datastores holding this dataset. 

1466 

1467 Raises 

1468 ------ 

1469 lsst.daf.butler.AmbiguousDatasetError 

1470 Raised if ``ref.id`` is `None`. 

1471 """ 

1472 return self._managers.datastores.findDatastores(ref) 

1473 

1474 def expandDataId( 

1475 self, 

1476 dataId: DataId | None = None, 

1477 *, 

1478 dimensions: Iterable[str] | DimensionGroup | DimensionGraph | None = None, 

1479 graph: DimensionGraph | None = None, 

1480 records: NameLookupMapping[DimensionElement, DimensionRecord | None] | None = None, 

1481 withDefaults: bool = True, 

1482 **kwargs: Any, 

1483 ) -> DataCoordinate: 

1484 """Expand a dimension-based data ID to include additional information. 

1485 

1486 Parameters 

1487 ---------- 

1488 dataId : `DataCoordinate` or `dict`, optional 

1489 Data ID to be expanded; augmented and overridden by ``kwargs``. 

1490 dimensions : `~collections.abc.Iterable` [ `str` ], \ 

1491 `DimensionGroup`, or `DimensionGraph`, optional 

1492 The dimensions to be identified by the new `DataCoordinate`. 

1493 If not provided, will be inferred from the keys of ``mapping`` and 

1494 ``**kwargs``, and ``universe`` must be provided unless ``mapping`` 

1495 is already a `DataCoordinate`. 

1496 graph : `DimensionGraph`, optional 

1497 Like ``dimensions``, but as a ``DimensionGraph`` instance. Ignored 

1498 if ``dimensions`` is provided. Deprecated and will be removed 

1499 after v27. 

1500 records : `~collections.abc.Mapping` [`str`, `DimensionRecord`], \ 

1501 optional 

1502 Dimension record data to use before querying the database for that 

1503 data, keyed by element name. 

1504 withDefaults : `bool`, optional 

1505 Utilize ``self.defaults.dataId`` to fill in missing governor 

1506 dimension key-value pairs. Defaults to `True` (i.e. defaults are 

1507 used). 

1508 **kwargs 

1509 Additional keywords are treated like additional key-value pairs for 

1510 ``dataId``, extending and overriding. 

1511 

1512 Returns 

1513 ------- 

1514 expanded : `DataCoordinate` 

1515 A data ID that includes full metadata for all of the dimensions it 

1516 identifies, i.e. guarantees that ``expanded.hasRecords()`` and 

1517 ``expanded.hasFull()`` both return `True`. 

1518 

1519 Raises 

1520 ------ 

1521 lsst.daf.butler.registry.DataIdError 

1522 Raised when ``dataId`` or keyword arguments specify unknown 

1523 dimensions or values, or when a resulting data ID contains 

1524 contradictory key-value pairs, according to dimension 

1525 relationships. 

1526 

1527 Notes 

1528 ----- 

1529 This method cannot be relied upon to reject invalid data ID values 

1530 for dimensions that do actually not have any record columns. For 

1531 efficiency reasons the records for these dimensions (which have only 

1532 dimension key values that are given by the caller) may be constructed 

1533 directly rather than obtained from the registry database. 

1534 """ 

1535 if not withDefaults: 

1536 defaults = None 

1537 else: 

1538 defaults = self.defaults.dataId 

1539 standardized = DataCoordinate.standardize( 

1540 dataId, 

1541 graph=graph, 

1542 dimensions=dimensions, 

1543 universe=self.dimensions, 

1544 defaults=defaults, 

1545 **kwargs, 

1546 ) 

1547 if standardized.hasRecords(): 

1548 return standardized 

1549 if records is None: 

1550 records = {} 

1551 elif isinstance(records, NamedKeyMapping): 

1552 records = records.byName() 

1553 else: 

1554 records = dict(records) 

1555 if isinstance(dataId, DataCoordinate) and dataId.hasRecords(): 

1556 for element_name in dataId.dimensions.elements: 

1557 records[element_name] = dataId.records[element_name] 

1558 keys = dict(standardized.mapping) 

1559 for element_name in standardized.dimensions.lookup_order: 

1560 element = self.dimensions[element_name] 

1561 record = records.get(element_name, ...) # Use ... to mean not found; None might mean NULL 

1562 if record is ...: 

1563 if element_name in self.dimensions.dimensions.names and keys.get(element_name) is None: 

1564 if element_name in standardized.dimensions.required: 

1565 raise DimensionNameError( 

1566 f"No value or null value for required dimension {element_name}." 

1567 ) 

1568 keys[element_name] = None 

1569 record = None 

1570 else: 

1571 record = self._managers.dimensions.fetch_one( 

1572 element_name, 

1573 DataCoordinate.standardize(keys, dimensions=element.minimal_group), 

1574 self.dimension_record_cache, 

1575 ) 

1576 records[element_name] = record 

1577 if record is not None: 

1578 for d in element.implied: 

1579 value = getattr(record, d.name) 

1580 if keys.setdefault(d.name, value) != value: 

1581 raise InconsistentDataIdError( 

1582 f"Data ID {standardized} has {d.name}={keys[d.name]!r}, " 

1583 f"but {element_name} implies {d.name}={value!r}." 

1584 ) 

1585 else: 

1586 if element_name in standardized.dimensions.required: 

1587 raise DataIdValueError( 

1588 f"Could not fetch record for required dimension {element.name} via keys {keys}." 

1589 ) 

1590 if element.defines_relationships: 

1591 raise InconsistentDataIdError( 

1592 f"Could not fetch record for element {element_name} via keys {keys}, ", 

1593 "but it is marked as defining relationships; this means one or more dimensions are " 

1594 "have inconsistent values.", 

1595 ) 

1596 for d in element.implied: 

1597 keys.setdefault(d.name, None) 

1598 records.setdefault(d.name, None) 

1599 return DataCoordinate.standardize(keys, dimensions=standardized.dimensions).expanded(records=records) 

1600 

1601 def insertDimensionData( 

1602 self, 

1603 element: DimensionElement | str, 

1604 *data: Mapping[str, Any] | DimensionRecord, 

1605 conform: bool = True, 

1606 replace: bool = False, 

1607 skip_existing: bool = False, 

1608 ) -> None: 

1609 """Insert one or more dimension records into the database. 

1610 

1611 Parameters 

1612 ---------- 

1613 element : `DimensionElement` or `str` 

1614 The `DimensionElement` or name thereof that identifies the table 

1615 records will be inserted into. 

1616 *data : `dict` or `DimensionRecord` 

1617 One or more records to insert. 

1618 conform : `bool`, optional 

1619 If `False` (`True` is default) perform no checking or conversions, 

1620 and assume that ``element`` is a `DimensionElement` instance and 

1621 ``data`` is a one or more `DimensionRecord` instances of the 

1622 appropriate subclass. 

1623 replace : `bool`, optional 

1624 If `True` (`False` is default), replace existing records in the 

1625 database if there is a conflict. 

1626 skip_existing : `bool`, optional 

1627 If `True` (`False` is default), skip insertion if a record with 

1628 the same primary key values already exists. Unlike 

1629 `syncDimensionData`, this will not detect when the given record 

1630 differs from what is in the database, and should not be used when 

1631 this is a concern. 

1632 """ 

1633 if isinstance(element, str): 

1634 element = self.dimensions[element] 

1635 if conform: 

1636 records = [ 

1637 row if isinstance(row, DimensionRecord) else element.RecordClass(**row) for row in data 

1638 ] 

1639 else: 

1640 # Ignore typing since caller said to trust them with conform=False. 

1641 records = data # type: ignore 

1642 if element.name in self.dimension_record_cache: 

1643 self.dimension_record_cache.reset() 

1644 self._managers.dimensions.insert( 

1645 element, 

1646 *records, 

1647 replace=replace, 

1648 skip_existing=skip_existing, 

1649 ) 

1650 

1651 def syncDimensionData( 

1652 self, 

1653 element: DimensionElement | str, 

1654 row: Mapping[str, Any] | DimensionRecord, 

1655 conform: bool = True, 

1656 update: bool = False, 

1657 ) -> bool | dict[str, Any]: 

1658 """Synchronize the given dimension record with the database, inserting 

1659 if it does not already exist and comparing values if it does. 

1660 

1661 Parameters 

1662 ---------- 

1663 element : `DimensionElement` or `str` 

1664 The `DimensionElement` or name thereof that identifies the table 

1665 records will be inserted into. 

1666 row : `dict` or `DimensionRecord` 

1667 The record to insert. 

1668 conform : `bool`, optional 

1669 If `False` (`True` is default) perform no checking or conversions, 

1670 and assume that ``element`` is a `DimensionElement` instance and 

1671 ``data`` is a `DimensionRecord` instances of the appropriate 

1672 subclass. 

1673 update : `bool`, optional 

1674 If `True` (`False` is default), update the existing record in the 

1675 database if there is a conflict. 

1676 

1677 Returns 

1678 ------- 

1679 inserted_or_updated : `bool` or `dict` 

1680 `True` if a new row was inserted, `False` if no changes were 

1681 needed, or a `dict` mapping updated column names to their old 

1682 values if an update was performed (only possible if 

1683 ``update=True``). 

1684 

1685 Raises 

1686 ------ 

1687 lsst.daf.butler.registry.ConflictingDefinitionError 

1688 Raised if the record exists in the database (according to primary 

1689 key lookup) but is inconsistent with the given one. 

1690 """ 

1691 if conform: 

1692 if isinstance(element, str): 

1693 element = self.dimensions[element] 

1694 record = row if isinstance(row, DimensionRecord) else element.RecordClass(**row) 

1695 else: 

1696 # Ignore typing since caller said to trust them with conform=False. 

1697 record = row # type: ignore 

1698 if record.definition.name in self.dimension_record_cache: 

1699 self.dimension_record_cache.reset() 

1700 return self._managers.dimensions.sync(record, update=update) 

1701 

1702 def queryDatasetTypes( 

1703 self, 

1704 expression: Any = ..., 

1705 *, 

1706 components: bool | _Marker = _DefaultMarker, 

1707 missing: list[str] | None = None, 

1708 ) -> Iterable[DatasetType]: 

1709 """Iterate over the dataset types whose names match an expression. 

1710 

1711 Parameters 

1712 ---------- 

1713 expression : dataset type expression, optional 

1714 An expression that fully or partially identifies the dataset types 

1715 to return, such as a `str`, `re.Pattern`, or iterable thereof. 

1716 ``...`` can be used to return all dataset types, and is the 

1717 default. See :ref:`daf_butler_dataset_type_expressions` for more 

1718 information. 

1719 components : `bool`, optional 

1720 Must be `False`. Provided only for backwards compatibility. After 

1721 v27 this argument will be removed entirely. 

1722 missing : `list` of `str`, optional 

1723 String dataset type names that were explicitly given (i.e. not 

1724 regular expression patterns) but not found will be appended to this 

1725 list, if it is provided. 

1726 

1727 Returns 

1728 ------- 

1729 dataset_types : `~collections.abc.Iterable` [ `DatasetType`] 

1730 An `~collections.abc.Iterable` of `DatasetType` instances whose 

1731 names match ``expression``. 

1732 

1733 Raises 

1734 ------ 

1735 lsst.daf.butler.registry.DatasetTypeExpressionError 

1736 Raised when ``expression`` is invalid. 

1737 """ 

1738 if components is not _DefaultMarker: 

1739 if components is not False: 

1740 raise DatasetTypeError( 

1741 "Dataset component queries are no longer supported by Registry. Use " 

1742 "DatasetType methods to obtain components from parent dataset types instead." 

1743 ) 

1744 else: 

1745 warnings.warn( 

1746 "The components parameter is ignored. It will be removed after v27.", 

1747 category=FutureWarning, 

1748 stacklevel=find_outside_stacklevel("lsst.daf.butler"), 

1749 ) 

1750 wildcard = DatasetTypeWildcard.from_expression(expression) 

1751 return self._managers.datasets.resolve_wildcard(wildcard, missing=missing) 

1752 

1753 def queryCollections( 

1754 self, 

1755 expression: Any = ..., 

1756 datasetType: DatasetType | None = None, 

1757 collectionTypes: Iterable[CollectionType] | CollectionType = CollectionType.all(), 

1758 flattenChains: bool = False, 

1759 includeChains: bool | None = None, 

1760 ) -> Sequence[str]: 

1761 """Iterate over the collections whose names match an expression. 

1762 

1763 Parameters 

1764 ---------- 

1765 expression : collection expression, optional 

1766 An expression that identifies the collections to return, such as 

1767 a `str` (for full matches or partial matches via globs), 

1768 `re.Pattern` (for partial matches), or iterable thereof. ``...`` 

1769 can be used to return all collections, and is the default. 

1770 See :ref:`daf_butler_collection_expressions` for more information. 

1771 datasetType : `DatasetType`, optional 

1772 If provided, only yield collections that may contain datasets of 

1773 this type. This is a conservative approximation in general; it may 

1774 yield collections that do not have any such datasets. 

1775 collectionTypes : `~collections.abc.Set` [`CollectionType`] or \ 

1776 `CollectionType`, optional 

1777 If provided, only yield collections of these types. 

1778 flattenChains : `bool`, optional 

1779 If `True` (`False` is default), recursively yield the child 

1780 collections of matching `~CollectionType.CHAINED` collections. 

1781 includeChains : `bool`, optional 

1782 If `True`, yield records for matching `~CollectionType.CHAINED` 

1783 collections. Default is the opposite of ``flattenChains``: include 

1784 either CHAINED collections or their children, but not both. 

1785 

1786 Returns 

1787 ------- 

1788 collections : `~collections.abc.Sequence` [ `str` ] 

1789 The names of collections that match ``expression``. 

1790 

1791 Raises 

1792 ------ 

1793 lsst.daf.butler.registry.CollectionExpressionError 

1794 Raised when ``expression`` is invalid. 

1795 

1796 Notes 

1797 ----- 

1798 The order in which collections are returned is unspecified, except that 

1799 the children of a `~CollectionType.CHAINED` collection are guaranteed 

1800 to be in the order in which they are searched. When multiple parent 

1801 `~CollectionType.CHAINED` collections match the same criteria, the 

1802 order in which the two lists appear is unspecified, and the lists of 

1803 children may be incomplete if a child has multiple parents. 

1804 """ 

1805 # Right now the datasetTypes argument is completely ignored, but that 

1806 # is consistent with its [lack of] guarantees. DM-24939 or a follow-up 

1807 # ticket will take care of that. 

1808 try: 

1809 wildcard = CollectionWildcard.from_expression(expression) 

1810 except TypeError as exc: 

1811 raise CollectionExpressionError(f"Invalid collection expression '{expression}'") from exc 

1812 collectionTypes = ensure_iterable(collectionTypes) 

1813 return [ 

1814 record.name 

1815 for record in self._managers.collections.resolve_wildcard( 

1816 wildcard, 

1817 collection_types=frozenset(collectionTypes), 

1818 flatten_chains=flattenChains, 

1819 include_chains=includeChains, 

1820 ) 

1821 ] 

1822 

1823 def _makeQueryBuilder( 

1824 self, 

1825 summary: queries.QuerySummary, 

1826 doomed_by: Iterable[str] = (), 

1827 ) -> queries.QueryBuilder: 

1828 """Return a `QueryBuilder` instance capable of constructing and 

1829 managing more complex queries than those obtainable via `Registry` 

1830 interfaces. 

1831 

1832 This is an advanced interface; downstream code should prefer 

1833 `Registry.queryDataIds` and `Registry.queryDatasets` whenever those 

1834 are sufficient. 

1835 

1836 Parameters 

1837 ---------- 

1838 summary : `queries.QuerySummary` 

1839 Object describing and categorizing the full set of dimensions that 

1840 will be included in the query. 

1841 doomed_by : `~collections.abc.Iterable` of `str`, optional 

1842 A list of diagnostic messages that indicate why the query is going 

1843 to yield no results and should not even be executed. If an empty 

1844 container (default) the query will be executed unless other code 

1845 determines that it is doomed. 

1846 

1847 Returns 

1848 ------- 

1849 builder : `queries.QueryBuilder` 

1850 Object that can be used to construct and perform advanced queries. 

1851 """ 

1852 doomed_by = list(doomed_by) 

1853 backend = queries.SqlQueryBackend(self._db, self._managers, self.dimension_record_cache) 

1854 context = backend.context() 

1855 relation: Relation | None = None 

1856 if doomed_by: 

1857 relation = LeafRelation.make_doomed(context.sql_engine, set(), doomed_by) 

1858 return queries.QueryBuilder( 

1859 summary, 

1860 backend=backend, 

1861 context=context, 

1862 relation=relation, 

1863 ) 

1864 

1865 def _standardize_query_data_id_args( 

1866 self, data_id: DataId | None, *, doomed_by: list[str], **kwargs: Any 

1867 ) -> DataCoordinate: 

1868 """Preprocess the data ID arguments passed to query* methods. 

1869 

1870 Parameters 

1871 ---------- 

1872 data_id : `DataId` or `None` 

1873 Data ID that constrains the query results. 

1874 doomed_by : `list` [ `str` ] 

1875 List to append messages indicating why the query is doomed to 

1876 yield no results. 

1877 **kwargs 

1878 Additional data ID key-value pairs, extending and overriding 

1879 ``data_id``. 

1880 

1881 Returns 

1882 ------- 

1883 data_id : `DataCoordinate` 

1884 Standardized data ID. Will be fully expanded unless expansion 

1885 fails, in which case a message will be appended to ``doomed_by`` 

1886 on return. 

1887 """ 

1888 try: 

1889 return self.expandDataId(data_id, **kwargs) 

1890 except DataIdValueError as err: 

1891 doomed_by.append(str(err)) 

1892 return DataCoordinate.standardize( 

1893 data_id, **kwargs, universe=self.dimensions, defaults=self.defaults.dataId 

1894 ) 

1895 

1896 def _standardize_query_dataset_args( 

1897 self, 

1898 datasets: Any, 

1899 collections: CollectionArgType | None, 

1900 mode: Literal["find_first"] | Literal["find_all"] | Literal["constrain"] = "constrain", 

1901 *, 

1902 doomed_by: list[str], 

1903 ) -> tuple[list[DatasetType], CollectionWildcard | None]: 

1904 """Preprocess dataset arguments passed to query* methods. 

1905 

1906 Parameters 

1907 ---------- 

1908 datasets : `DatasetType`, `str`, `re.Pattern`, or iterable of these 

1909 Expression identifying dataset types. See `queryDatasetTypes` for 

1910 details. 

1911 collections : `str`, `re.Pattern`, or iterable of these 

1912 Expression identifying collections to be searched. See 

1913 `queryCollections` for details. 

1914 mode : `str`, optional 

1915 The way in which datasets are being used in this query; one of: 

1916 

1917 - "find_first": this is a query for the first dataset in an 

1918 ordered list of collections. Prohibits collection wildcards, 

1919 but permits dataset type wildcards. 

1920 

1921 - "find_all": this is a query for all datasets in all matched 

1922 collections. Permits collection and dataset type wildcards. 

1923 

1924 - "constrain": this is a query for something other than datasets, 

1925 with results constrained by dataset existence. Permits 

1926 collection wildcards and prohibits ``...`` as a dataset type 

1927 wildcard. 

1928 doomed_by : `list` [ `str` ] 

1929 List to append messages indicating why the query is doomed to 

1930 yield no results. 

1931 

1932 Returns 

1933 ------- 

1934 dataset_types : `list` [ `DatasetType` ] 

1935 List of matched dataset types. 

1936 collections : `CollectionWildcard` 

1937 Processed collection expression. 

1938 """ 

1939 dataset_types: list[DatasetType] = [] 

1940 collection_wildcard: CollectionWildcard | None = None 

1941 if datasets is not None: 

1942 if collections is None: 

1943 if not self.defaults.collections: 

1944 raise NoDefaultCollectionError("No collections, and no registry default collections.") 

1945 collection_wildcard = CollectionWildcard.from_expression(self.defaults.collections) 

1946 else: 

1947 collection_wildcard = CollectionWildcard.from_expression(collections) 

1948 if mode == "find_first" and collection_wildcard.patterns: 

1949 raise TypeError( 

1950 f"Collection pattern(s) {collection_wildcard.patterns} not allowed in this context." 

1951 ) 

1952 missing: list[str] = [] 

1953 dataset_types = self._managers.datasets.resolve_wildcard( 

1954 datasets, missing=missing, explicit_only=(mode == "constrain") 

1955 ) 

1956 if missing and mode == "constrain": 

1957 raise MissingDatasetTypeError( 

1958 f"Dataset type(s) {missing} are not registered.", 

1959 ) 

1960 doomed_by.extend(f"Dataset type {name} is not registered." for name in missing) 

1961 elif collections: 

1962 # I think this check should actually be `collections is not None`, 

1963 # but it looks like some CLI scripts use empty tuple as default. 

1964 raise ArgumentError(f"Cannot pass 'collections' (='{collections}') without 'datasets'.") 

1965 return dataset_types, collection_wildcard 

1966 

1967 def queryDatasets( 

1968 self, 

1969 datasetType: Any, 

1970 *, 

1971 collections: CollectionArgType | None = None, 

1972 dimensions: Iterable[Dimension | str] | None = None, 

1973 dataId: DataId | None = None, 

1974 where: str = "", 

1975 findFirst: bool = False, 

1976 components: bool | _Marker = _DefaultMarker, 

1977 bind: Mapping[str, Any] | None = None, 

1978 check: bool = True, 

1979 **kwargs: Any, 

1980 ) -> queries.DatasetQueryResults: 

1981 """Query for and iterate over dataset references matching user-provided 

1982 criteria. 

1983 

1984 Parameters 

1985 ---------- 

1986 datasetType : dataset type expression 

1987 An expression that fully or partially identifies the dataset types 

1988 to be queried. Allowed types include `DatasetType`, `str`, 

1989 `re.Pattern`, and iterables thereof. The special value ``...`` can 

1990 be used to query all dataset types. See 

1991 :ref:`daf_butler_dataset_type_expressions` for more information. 

1992 collections : collection expression, optional 

1993 An expression that identifies the collections to search, such as a 

1994 `str` (for full matches or partial matches via globs), `re.Pattern` 

1995 (for partial matches), or iterable thereof. ``...`` can be used to 

1996 search all collections (actually just all `~CollectionType.RUN` 

1997 collections, because this will still find all datasets). 

1998 If not provided, ``self.default.collections`` is used. See 

1999 :ref:`daf_butler_collection_expressions` for more information. 

2000 dimensions : `~collections.abc.Iterable` of `Dimension` or `str` 

2001 Dimensions to include in the query (in addition to those used 

2002 to identify the queried dataset type(s)), either to constrain 

2003 the resulting datasets to those for which a matching dimension 

2004 exists, or to relate the dataset type's dimensions to dimensions 

2005 referenced by the ``dataId`` or ``where`` arguments. 

2006 dataId : `dict` or `DataCoordinate`, optional 

2007 A data ID whose key-value pairs are used as equality constraints 

2008 in the query. 

2009 where : `str`, optional 

2010 A string expression similar to a SQL WHERE clause. May involve 

2011 any column of a dimension table or (as a shortcut for the primary 

2012 key column of a dimension table) dimension name. See 

2013 :ref:`daf_butler_dimension_expressions` for more information. 

2014 findFirst : `bool`, optional 

2015 If `True` (`False` is default), for each result data ID, only 

2016 yield one `DatasetRef` of each `DatasetType`, from the first 

2017 collection in which a dataset of that dataset type appears 

2018 (according to the order of ``collections`` passed in). If `True`, 

2019 ``collections`` must not contain regular expressions and may not 

2020 be ``...``. 

2021 components : `bool`, optional 

2022 Must be `False`. Provided only for backwards compatibility. After 

2023 v27 this argument will be removed entirely. 

2024 bind : `~collections.abc.Mapping`, optional 

2025 Mapping containing literal values that should be injected into the 

2026 ``where`` expression, keyed by the identifiers they replace. 

2027 Values of collection type can be expanded in some cases; see 

2028 :ref:`daf_butler_dimension_expressions_identifiers` for more 

2029 information. 

2030 check : `bool`, optional 

2031 If `True` (default) check the query for consistency before 

2032 executing it. This may reject some valid queries that resemble 

2033 common mistakes (e.g. queries for visits without specifying an 

2034 instrument). 

2035 **kwargs 

2036 Additional keyword arguments are forwarded to 

2037 `DataCoordinate.standardize` when processing the ``dataId`` 

2038 argument (and may be used to provide a constraining data ID even 

2039 when the ``dataId`` argument is `None`). 

2040 

2041 Returns 

2042 ------- 

2043 refs : `.queries.DatasetQueryResults` 

2044 Dataset references matching the given query criteria. Nested data 

2045 IDs are guaranteed to include values for all implied dimensions 

2046 (i.e. `DataCoordinate.hasFull` will return `True`), but will not 

2047 include dimension records (`DataCoordinate.hasRecords` will be 

2048 `False`) unless `~.queries.DatasetQueryResults.expanded` is 

2049 called on the result object (which returns a new one). 

2050 

2051 Raises 

2052 ------ 

2053 lsst.daf.butler.registry.DatasetTypeExpressionError 

2054 Raised when ``datasetType`` expression is invalid. 

2055 TypeError 

2056 Raised when the arguments are incompatible, such as when a 

2057 collection wildcard is passed when ``findFirst`` is `True`, or 

2058 when ``collections`` is `None` and ``self.defaults.collections`` is 

2059 also `None`. 

2060 lsst.daf.butler.registry.DataIdError 

2061 Raised when ``dataId`` or keyword arguments specify unknown 

2062 dimensions or values, or when they contain inconsistent values. 

2063 lsst.daf.butler.registry.UserExpressionError 

2064 Raised when ``where`` expression is invalid. 

2065 

2066 Notes 

2067 ----- 

2068 When multiple dataset types are queried in a single call, the 

2069 results of this operation are equivalent to querying for each dataset 

2070 type separately in turn, and no information about the relationships 

2071 between datasets of different types is included. In contexts where 

2072 that kind of information is important, the recommended pattern is to 

2073 use `queryDataIds` to first obtain data IDs (possibly with the 

2074 desired dataset types and collections passed as constraints to the 

2075 query), and then use multiple (generally much simpler) calls to 

2076 `queryDatasets` with the returned data IDs passed as constraints. 

2077 """ 

2078 if components is not _DefaultMarker: 

2079 if components is not False: 

2080 raise DatasetTypeError( 

2081 "Dataset component queries are no longer supported by Registry. Use " 

2082 "DatasetType methods to obtain components from parent dataset types instead." 

2083 ) 

2084 else: 

2085 warnings.warn( 

2086 "The components parameter is ignored. It will be removed after v27.", 

2087 category=FutureWarning, 

2088 stacklevel=find_outside_stacklevel("lsst.daf.butler"), 

2089 ) 

2090 doomed_by: list[str] = [] 

2091 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs) 

2092 resolved_dataset_types, collection_wildcard = self._standardize_query_dataset_args( 

2093 datasetType, 

2094 collections, 

2095 mode="find_first" if findFirst else "find_all", 

2096 doomed_by=doomed_by, 

2097 ) 

2098 if collection_wildcard is not None and collection_wildcard.empty(): 

2099 doomed_by.append("No datasets can be found because collection list is empty.") 

2100 return queries.ChainedDatasetQueryResults([], doomed_by=doomed_by) 

2101 parent_results: list[queries.ParentDatasetQueryResults] = [] 

2102 for resolved_dataset_type in resolved_dataset_types: 

2103 # The full set of dimensions in the query is the combination of 

2104 # those needed for the DatasetType and those explicitly requested, 

2105 # if any. 

2106 dimension_names = set(resolved_dataset_type.dimensions.names) 

2107 if dimensions is not None: 

2108 dimension_names.update(self.dimensions.conform(dimensions).names) 

2109 # Construct the summary structure needed to construct a 

2110 # QueryBuilder. 

2111 summary = queries.QuerySummary( 

2112 requested=self.dimensions.conform(dimension_names), 

2113 column_types=self._managers.column_types, 

2114 data_id=data_id, 

2115 expression=where, 

2116 bind=bind, 

2117 defaults=self.defaults.dataId, 

2118 check=check, 

2119 datasets=[resolved_dataset_type], 

2120 ) 

2121 builder = self._makeQueryBuilder(summary) 

2122 # Add the dataset subquery to the query, telling the QueryBuilder 

2123 # to include the rank of the selected collection in the results 

2124 # only if we need to findFirst. Note that if any of the 

2125 # collections are actually wildcard expressions, and 

2126 # findFirst=True, this will raise TypeError for us. 

2127 builder.joinDataset( 

2128 resolved_dataset_type, collection_wildcard, isResult=True, findFirst=findFirst 

2129 ) 

2130 query = builder.finish() 

2131 parent_results.append( 

2132 queries.ParentDatasetQueryResults(query, resolved_dataset_type, components=[None]) 

2133 ) 

2134 if not parent_results: 

2135 doomed_by.extend( 

2136 f"No registered dataset type matching {t!r} found, so no matching datasets can " 

2137 "exist in any collection." 

2138 for t in ensure_iterable(datasetType) 

2139 ) 

2140 return queries.ChainedDatasetQueryResults([], doomed_by=doomed_by) 

2141 elif len(parent_results) == 1: 

2142 return parent_results[0] 

2143 else: 

2144 return queries.ChainedDatasetQueryResults(parent_results) 

2145 

2146 def queryDataIds( 

2147 self, 

2148 # TODO: Drop Dimension support on DM-41326. 

2149 dimensions: DimensionGroup | Iterable[Dimension | str] | Dimension | str, 

2150 *, 

2151 dataId: DataId | None = None, 

2152 datasets: Any = None, 

2153 collections: CollectionArgType | None = None, 

2154 where: str = "", 

2155 components: bool | _Marker = _DefaultMarker, 

2156 bind: Mapping[str, Any] | None = None, 

2157 check: bool = True, 

2158 **kwargs: Any, 

2159 ) -> queries.DataCoordinateQueryResults: 

2160 """Query for data IDs matching user-provided criteria. 

2161 

2162 Parameters 

2163 ---------- 

2164 dimensions : `DimensionGroup`, `Dimension`, or `str`, or \ 

2165 `~collections.abc.Iterable` [ `Dimension` or `str` ] 

2166 The dimensions of the data IDs to yield, as either `Dimension` 

2167 instances or `str`. Will be automatically expanded to a complete 

2168 `DimensionGroup`. Support for `Dimension` instances is deprecated 

2169 and will not be supported after v27. 

2170 dataId : `dict` or `DataCoordinate`, optional 

2171 A data ID whose key-value pairs are used as equality constraints 

2172 in the query. 

2173 datasets : dataset type expression, optional 

2174 An expression that fully or partially identifies dataset types 

2175 that should constrain the yielded data IDs. For example, including 

2176 "raw" here would constrain the yielded ``instrument``, 

2177 ``exposure``, ``detector``, and ``physical_filter`` values to only 

2178 those for which at least one "raw" dataset exists in 

2179 ``collections``. Allowed types include `DatasetType`, `str`, 

2180 and iterables thereof. Regular expression objects (i.e. 

2181 `re.Pattern`) are deprecated and will be removed after the v26 

2182 release. See :ref:`daf_butler_dataset_type_expressions` for more 

2183 information. 

2184 collections : collection expression, optional 

2185 An expression that identifies the collections to search for 

2186 datasets, such as a `str` (for full matches or partial matches 

2187 via globs), `re.Pattern` (for partial matches), or iterable 

2188 thereof. ``...`` can be used to search all collections (actually 

2189 just all `~CollectionType.RUN` collections, because this will 

2190 still find all datasets). If not provided, 

2191 ``self.default.collections`` is used. Ignored unless ``datasets`` 

2192 is also passed. See :ref:`daf_butler_collection_expressions` for 

2193 more information. 

2194 where : `str`, optional 

2195 A string expression similar to a SQL WHERE clause. May involve 

2196 any column of a dimension table or (as a shortcut for the primary 

2197 key column of a dimension table) dimension name. See 

2198 :ref:`daf_butler_dimension_expressions` for more information. 

2199 components : `bool`, optional 

2200 Must be `False`. Provided only for backwards compatibility. After 

2201 v27 this argument will be removed entirely. 

2202 bind : `~collections.abc.Mapping`, optional 

2203 Mapping containing literal values that should be injected into the 

2204 ``where`` expression, keyed by the identifiers they replace. 

2205 Values of collection type can be expanded in some cases; see 

2206 :ref:`daf_butler_dimension_expressions_identifiers` for more 

2207 information. 

2208 check : `bool`, optional 

2209 If `True` (default) check the query for consistency before 

2210 executing it. This may reject some valid queries that resemble 

2211 common mistakes (e.g. queries for visits without specifying an 

2212 instrument). 

2213 **kwargs 

2214 Additional keyword arguments are forwarded to 

2215 `DataCoordinate.standardize` when processing the ``dataId`` 

2216 argument (and may be used to provide a constraining data ID even 

2217 when the ``dataId`` argument is `None`). 

2218 

2219 Returns 

2220 ------- 

2221 dataIds : `.queries.DataCoordinateQueryResults` 

2222 Data IDs matching the given query parameters. These are guaranteed 

2223 to identify all dimensions (`DataCoordinate.hasFull` returns 

2224 `True`), but will not contain `DimensionRecord` objects 

2225 (`DataCoordinate.hasRecords` returns `False`). Call 

2226 `~.queries.DataCoordinateQueryResults.expanded` on the 

2227 returned object to fetch those (and consider using 

2228 `~.queries.DataCoordinateQueryResults.materialize` on the 

2229 returned object first if the expected number of rows is very 

2230 large). See documentation for those methods for additional 

2231 information. 

2232 

2233 Raises 

2234 ------ 

2235 lsst.daf.butler.registry.NoDefaultCollectionError 

2236 Raised if ``collections`` is `None` and 

2237 ``self.defaults.collections`` is `None`. 

2238 lsst.daf.butler.registry.CollectionExpressionError 

2239 Raised when ``collections`` expression is invalid. 

2240 lsst.daf.butler.registry.DataIdError 

2241 Raised when ``dataId`` or keyword arguments specify unknown 

2242 dimensions or values, or when they contain inconsistent values. 

2243 lsst.daf.butler.registry.DatasetTypeExpressionError 

2244 Raised when ``datasetType`` expression is invalid. 

2245 lsst.daf.butler.registry.UserExpressionError 

2246 Raised when ``where`` expression is invalid. 

2247 """ 

2248 if components is not _DefaultMarker: 

2249 if components is not False: 

2250 raise DatasetTypeError( 

2251 "Dataset component queries are no longer supported by Registry. Use " 

2252 "DatasetType methods to obtain components from parent dataset types instead." 

2253 ) 

2254 else: 

2255 warnings.warn( 

2256 "The components parameter is ignored. It will be removed after v27.", 

2257 category=FutureWarning, 

2258 stacklevel=find_outside_stacklevel("lsst.daf.butler"), 

2259 ) 

2260 requested_dimensions = self.dimensions.conform(dimensions) 

2261 doomed_by: list[str] = [] 

2262 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs) 

2263 resolved_dataset_types, collection_wildcard = self._standardize_query_dataset_args( 

2264 datasets, collections, doomed_by=doomed_by 

2265 ) 

2266 if collection_wildcard is not None and collection_wildcard.empty(): 

2267 doomed_by.append("No data coordinates can be found because collection list is empty.") 

2268 summary = queries.QuerySummary( 

2269 requested=requested_dimensions, 

2270 column_types=self._managers.column_types, 

2271 data_id=data_id, 

2272 expression=where, 

2273 bind=bind, 

2274 defaults=self.defaults.dataId, 

2275 check=check, 

2276 datasets=resolved_dataset_types, 

2277 ) 

2278 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by) 

2279 for datasetType in resolved_dataset_types: 

2280 builder.joinDataset(datasetType, collection_wildcard, isResult=False) 

2281 query = builder.finish() 

2282 

2283 return queries.DataCoordinateQueryResults(query) 

2284 

2285 def queryDimensionRecords( 

2286 self, 

2287 element: DimensionElement | str, 

2288 *, 

2289 dataId: DataId | None = None, 

2290 datasets: Any = None, 

2291 collections: CollectionArgType | None = None, 

2292 where: str = "", 

2293 components: bool | _Marker = _DefaultMarker, 

2294 bind: Mapping[str, Any] | None = None, 

2295 check: bool = True, 

2296 **kwargs: Any, 

2297 ) -> queries.DimensionRecordQueryResults: 

2298 """Query for dimension information matching user-provided criteria. 

2299 

2300 Parameters 

2301 ---------- 

2302 element : `DimensionElement` or `str` 

2303 The dimension element to obtain records for. 

2304 dataId : `dict` or `DataCoordinate`, optional 

2305 A data ID whose key-value pairs are used as equality constraints 

2306 in the query. 

2307 datasets : dataset type expression, optional 

2308 An expression that fully or partially identifies dataset types 

2309 that should constrain the yielded records. See `queryDataIds` and 

2310 :ref:`daf_butler_dataset_type_expressions` for more information. 

2311 collections : collection expression, optional 

2312 An expression that identifies the collections to search for 

2313 datasets, such as a `str` (for full matches or partial matches 

2314 via globs), `re.Pattern` (for partial matches), or iterable 

2315 thereof. ``...`` can be used to search all collections (actually 

2316 just all `~CollectionType.RUN` collections, because this will 

2317 still find all datasets). If not provided, 

2318 ``self.default.collections`` is used. Ignored unless ``datasets`` 

2319 is also passed. See :ref:`daf_butler_collection_expressions` for 

2320 more information. 

2321 where : `str`, optional 

2322 A string expression similar to a SQL WHERE clause. See 

2323 `queryDataIds` and :ref:`daf_butler_dimension_expressions` for more 

2324 information. 

2325 components : `bool`, optional 

2326 Whether to apply dataset expressions to components as well. 

2327 See `queryDataIds` for more information. 

2328 

2329 Must be `False`. Provided only for backwards compatibility. After 

2330 v27 this argument will be removed entirely. 

2331 bind : `~collections.abc.Mapping`, optional 

2332 Mapping containing literal values that should be injected into the 

2333 ``where`` expression, keyed by the identifiers they replace. 

2334 Values of collection type can be expanded in some cases; see 

2335 :ref:`daf_butler_dimension_expressions_identifiers` for more 

2336 information. 

2337 check : `bool`, optional 

2338 If `True` (default) check the query for consistency before 

2339 executing it. This may reject some valid queries that resemble 

2340 common mistakes (e.g. queries for visits without specifying an 

2341 instrument). 

2342 **kwargs 

2343 Additional keyword arguments are forwarded to 

2344 `DataCoordinate.standardize` when processing the ``dataId`` 

2345 argument (and may be used to provide a constraining data ID even 

2346 when the ``dataId`` argument is `None`). 

2347 

2348 Returns 

2349 ------- 

2350 dataIds : `.queries.DimensionRecordQueryResults` 

2351 Data IDs matching the given query parameters. 

2352 

2353 Raises 

2354 ------ 

2355 lsst.daf.butler.registry.NoDefaultCollectionError 

2356 Raised if ``collections`` is `None` and 

2357 ``self.defaults.collections`` is `None`. 

2358 lsst.daf.butler.registry.CollectionExpressionError 

2359 Raised when ``collections`` expression is invalid. 

2360 lsst.daf.butler.registry.DataIdError 

2361 Raised when ``dataId`` or keyword arguments specify unknown 

2362 dimensions or values, or when they contain inconsistent values. 

2363 lsst.daf.butler.registry.DatasetTypeExpressionError 

2364 Raised when ``datasetType`` expression is invalid. 

2365 lsst.daf.butler.registry.UserExpressionError 

2366 Raised when ``where`` expression is invalid. 

2367 """ 

2368 if components is not _DefaultMarker: 

2369 if components is not False: 

2370 raise DatasetTypeError( 

2371 "Dataset component queries are no longer supported by Registry. Use " 

2372 "DatasetType methods to obtain components from parent dataset types instead." 

2373 ) 

2374 else: 

2375 warnings.warn( 

2376 "The components parameter is ignored. It will be removed after v27.", 

2377 category=FutureWarning, 

2378 stacklevel=find_outside_stacklevel("lsst.daf.butler"), 

2379 ) 

2380 if not isinstance(element, DimensionElement): 

2381 try: 

2382 element = self.dimensions[element] 

2383 except KeyError as e: 

2384 raise DimensionNameError( 

2385 f"No such dimension '{element}', available dimensions: " + str(self.dimensions.elements) 

2386 ) from e 

2387 doomed_by: list[str] = [] 

2388 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs) 

2389 resolved_dataset_types, collection_wildcard = self._standardize_query_dataset_args( 

2390 datasets, collections, doomed_by=doomed_by 

2391 ) 

2392 if collection_wildcard is not None and collection_wildcard.empty(): 

2393 doomed_by.append("No dimension records can be found because collection list is empty.") 

2394 summary = queries.QuerySummary( 

2395 requested=element.minimal_group, 

2396 column_types=self._managers.column_types, 

2397 data_id=data_id, 

2398 expression=where, 

2399 bind=bind, 

2400 defaults=self.defaults.dataId, 

2401 check=check, 

2402 datasets=resolved_dataset_types, 

2403 ) 

2404 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by) 

2405 for datasetType in resolved_dataset_types: 

2406 builder.joinDataset(datasetType, collection_wildcard, isResult=False) 

2407 query = builder.finish().with_record_columns(element.name) 

2408 return queries.DatabaseDimensionRecordQueryResults(query, element) 

2409 

2410 def queryDatasetAssociations( 

2411 self, 

2412 datasetType: str | DatasetType, 

2413 collections: CollectionArgType | None = ..., 

2414 *, 

2415 collectionTypes: Iterable[CollectionType] = CollectionType.all(), 

2416 flattenChains: bool = False, 

2417 ) -> Iterator[DatasetAssociation]: 

2418 """Iterate over dataset-collection combinations where the dataset is in 

2419 the collection. 

2420 

2421 This method is a temporary placeholder for better support for 

2422 association results in `queryDatasets`. It will probably be 

2423 removed in the future, and should be avoided in production code 

2424 whenever possible. 

2425 

2426 Parameters 

2427 ---------- 

2428 datasetType : `DatasetType` or `str` 

2429 A dataset type object or the name of one. 

2430 collections : collection expression, optional 

2431 An expression that identifies the collections to search for 

2432 datasets, such as a `str` (for full matches or partial matches 

2433 via globs), `re.Pattern` (for partial matches), or iterable 

2434 thereof. ``...`` can be used to search all collections (actually 

2435 just all `~CollectionType.RUN` collections, because this will still 

2436 find all datasets). If not provided, ``self.default.collections`` 

2437 is used. See :ref:`daf_butler_collection_expressions` for more 

2438 information. 

2439 collectionTypes : `~collections.abc.Set` [ `CollectionType` ], optional 

2440 If provided, only yield associations from collections of these 

2441 types. 

2442 flattenChains : `bool`, optional 

2443 If `True`, search in the children of `~CollectionType.CHAINED` 

2444 collections. If `False`, ``CHAINED`` collections are ignored. 

2445 

2446 Yields 

2447 ------ 

2448 association : `.DatasetAssociation` 

2449 Object representing the relationship between a single dataset and 

2450 a single collection. 

2451 

2452 Raises 

2453 ------ 

2454 lsst.daf.butler.registry.NoDefaultCollectionError 

2455 Raised if ``collections`` is `None` and 

2456 ``self.defaults.collections`` is `None`. 

2457 lsst.daf.butler.registry.CollectionExpressionError 

2458 Raised when ``collections`` expression is invalid. 

2459 """ 

2460 if collections is None: 

2461 if not self.defaults.collections: 

2462 raise NoDefaultCollectionError( 

2463 "No collections provided to queryDatasetAssociations, " 

2464 "and no defaults from registry construction." 

2465 ) 

2466 collections = self.defaults.collections 

2467 collection_wildcard = CollectionWildcard.from_expression(collections) 

2468 backend = queries.SqlQueryBackend(self._db, self._managers, self.dimension_record_cache) 

2469 parent_dataset_type = backend.resolve_single_dataset_type_wildcard(datasetType) 

2470 timespan_tag = DatasetColumnTag(parent_dataset_type.name, "timespan") 

2471 collection_tag = DatasetColumnTag(parent_dataset_type.name, "collection") 

2472 for parent_collection_record in backend.resolve_collection_wildcard( 

2473 collection_wildcard, 

2474 collection_types=frozenset(collectionTypes), 

2475 flatten_chains=flattenChains, 

2476 ): 

2477 # Resolve this possibly-chained collection into a list of 

2478 # non-CHAINED collections that actually hold datasets of this 

2479 # type. 

2480 candidate_collection_records = backend.resolve_dataset_collections( 

2481 parent_dataset_type, 

2482 CollectionWildcard.from_names([parent_collection_record.name]), 

2483 allow_calibration_collections=True, 

2484 governor_constraints={}, 

2485 ) 

2486 if not candidate_collection_records: 

2487 continue 

2488 with backend.context() as context: 

2489 relation = backend.make_dataset_query_relation( 

2490 parent_dataset_type, 

2491 candidate_collection_records, 

2492 columns={"dataset_id", "run", "timespan", "collection"}, 

2493 context=context, 

2494 ) 

2495 reader = queries.DatasetRefReader( 

2496 parent_dataset_type, 

2497 translate_collection=lambda k: self._managers.collections[k].name, 

2498 full=False, 

2499 ) 

2500 for row in context.fetch_iterable(relation): 

2501 ref = reader.read(row) 

2502 collection_record = self._managers.collections[row[collection_tag]] 

2503 if collection_record.type is CollectionType.CALIBRATION: 

2504 timespan = row[timespan_tag] 

2505 else: 

2506 # For backwards compatibility and (possibly?) user 

2507 # convenience we continue to define the timespan of a 

2508 # DatasetAssociation row for a non-CALIBRATION 

2509 # collection to be None rather than a fully unbounded 

2510 # timespan. 

2511 timespan = None 

2512 yield DatasetAssociation(ref=ref, collection=collection_record.name, timespan=timespan) 

2513 

2514 def get_datastore_records(self, ref: DatasetRef) -> DatasetRef: 

2515 """Retrieve datastore records for given ref. 

2516 

2517 Parameters 

2518 ---------- 

2519 ref : `DatasetRef` 

2520 Dataset reference for which to retrieve its corresponding datastore 

2521 records. 

2522 

2523 Returns 

2524 ------- 

2525 updated_ref : `DatasetRef` 

2526 Dataset reference with filled datastore records. 

2527 

2528 Notes 

2529 ----- 

2530 If this method is called with the dataset ref that is not known to the 

2531 registry then the reference with an empty set of records is returned. 

2532 """ 

2533 datastore_records: dict[str, list[StoredDatastoreItemInfo]] = {} 

2534 for opaque, record_class in self._datastore_record_classes.items(): 

2535 records = self.fetchOpaqueData(opaque, dataset_id=ref.id) 

2536 datastore_records[opaque] = [record_class.from_record(record) for record in records] 

2537 return ref.replace(datastore_records=datastore_records) 

2538 

2539 def store_datastore_records(self, refs: Mapping[str, DatasetRef]) -> None: 

2540 """Store datastore records for given refs. 

2541 

2542 Parameters 

2543 ---------- 

2544 refs : `~collections.abc.Mapping` [`str`, `DatasetRef`] 

2545 Mapping of a datastore name to dataset reference stored in that 

2546 datastore, reference must include datastore records. 

2547 """ 

2548 for datastore_name, ref in refs.items(): 

2549 # Store ref IDs in the bridge table. 

2550 bridge = self._managers.datastores.register(datastore_name) 

2551 bridge.insert([ref]) 

2552 

2553 # store records in opaque tables 

2554 assert ref._datastore_records is not None, "Dataset ref must have datastore records" 

2555 for table_name, records in ref._datastore_records.items(): 

2556 opaque_table = self._managers.opaque.get(table_name) 

2557 assert opaque_table is not None, f"Unexpected opaque table name {table_name}" 

2558 opaque_table.insert(*(record.to_record(dataset_id=ref.id) for record in records)) 

2559 

2560 def make_datastore_tables(self, tables: Mapping[str, DatastoreOpaqueTable]) -> None: 

2561 """Create opaque tables used by datastores. 

2562 

2563 Parameters 

2564 ---------- 

2565 tables : `~collections.abc.Mapping` 

2566 Maps opaque table name to its definition. 

2567 

2568 Notes 

2569 ----- 

2570 This method should disappear in the future when opaque table 

2571 definitions will be provided during `Registry` construction. 

2572 """ 

2573 datastore_record_classes = {} 

2574 for table_name, table_def in tables.items(): 

2575 datastore_record_classes[table_name] = table_def.record_class 

2576 try: 

2577 self._managers.opaque.register(table_name, table_def.table_spec) 

2578 except ReadOnlyDatabaseError: 

2579 # If the database is read only and we just tried and failed to 

2580 # create a table, it means someone is trying to create a 

2581 # read-only butler client for an empty repo. That should be 

2582 # okay, as long as they then try to get any datasets before 

2583 # some other client creates the table. Chances are they're 

2584 # just validating configuration. 

2585 pass 

2586 self._datastore_record_classes = datastore_record_classes 

2587 

2588 def preload_cache(self) -> None: 

2589 """Immediately load caches that are used for common operations.""" 

2590 self.dimension_record_cache.preload_cache() 

2591 

2592 @property 

2593 def obsCoreTableManager(self) -> ObsCoreTableManager | None: 

2594 """The ObsCore manager instance for this registry 

2595 (`~.interfaces.ObsCoreTableManager` 

2596 or `None`). 

2597 

2598 ObsCore manager may not be implemented for all registry backend, or 

2599 may not be enabled for many repositories. 

2600 """ 

2601 return self._managers.obscore 

2602 

2603 storageClasses: StorageClassFactory 

2604 """All storage classes known to the registry (`StorageClassFactory`). 

2605 """ 

2606 

2607 _defaults: RegistryDefaults 

2608 """Default collections used for registry queries (`RegistryDefaults`)."""