Coverage for python/lsst/daf/butler/registry/sql_registry.py: 18%

583 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-04 02:55 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30from .. import ddl 

31 

32__all__ = ("SqlRegistry",) 

33 

34import contextlib 

35import logging 

36import warnings 

37from collections.abc import Iterable, Iterator, Mapping, Sequence 

38from typing import TYPE_CHECKING, Any, Literal, cast 

39 

40import sqlalchemy 

41from lsst.daf.relation import LeafRelation, Relation 

42from lsst.resources import ResourcePathExpression 

43from lsst.utils.introspection import find_outside_stacklevel 

44from lsst.utils.iteration import ensure_iterable 

45 

46from .._column_tags import DatasetColumnTag 

47from .._config import Config 

48from .._dataset_association import DatasetAssociation 

49from .._dataset_ref import DatasetId, DatasetIdGenEnum, DatasetRef 

50from .._dataset_type import DatasetType 

51from .._exceptions import CalibrationLookupError, DimensionNameError 

52from .._named import NamedKeyMapping, NameLookupMapping 

53from .._storage_class import StorageClassFactory 

54from .._timespan import Timespan 

55from ..dimensions import ( 

56 DataCoordinate, 

57 DataId, 

58 Dimension, 

59 DimensionConfig, 

60 DimensionElement, 

61 DimensionGraph, 

62 DimensionGroup, 

63 DimensionRecord, 

64 DimensionUniverse, 

65) 

66from ..dimensions.record_cache import DimensionRecordCache 

67from ..progress import Progress 

68from ..registry import ( 

69 ArgumentError, 

70 CollectionExpressionError, 

71 CollectionSummary, 

72 CollectionType, 

73 CollectionTypeError, 

74 ConflictingDefinitionError, 

75 DataIdValueError, 

76 DatasetTypeError, 

77 InconsistentDataIdError, 

78 MissingDatasetTypeError, 

79 NoDefaultCollectionError, 

80 OrphanedRecordError, 

81 RegistryConfig, 

82 RegistryConsistencyError, 

83 RegistryDefaults, 

84 queries, 

85) 

86from ..registry.interfaces import ChainedCollectionRecord, ReadOnlyDatabaseError, RunRecord 

87from ..registry.managers import RegistryManagerInstances, RegistryManagerTypes 

88from ..registry.wildcards import CollectionWildcard, DatasetTypeWildcard 

89from ..utils import _DefaultMarker, _Marker, transactional 

90 

91if TYPE_CHECKING: 

92 from .._butler_config import ButlerConfig 

93 from ..datastore._datastore import DatastoreOpaqueTable 

94 from ..datastore.stored_file_info import StoredDatastoreItemInfo 

95 from ..registry._registry import CollectionArgType 

96 from ..registry.interfaces import ( 

97 CollectionRecord, 

98 Database, 

99 DatastoreRegistryBridgeManager, 

100 ObsCoreTableManager, 

101 ) 

102 

103 

104_LOG = logging.getLogger(__name__) 

105 

106 

107class SqlRegistry: 

108 """Butler Registry implementation that uses SQL database as backend. 

109 

110 Parameters 

111 ---------- 

112 database : `Database` 

113 Database instance to store Registry. 

114 defaults : `RegistryDefaults` 

115 Default collection search path and/or output `~CollectionType.RUN` 

116 collection. 

117 managers : `RegistryManagerInstances` 

118 All the managers required for this registry. 

119 """ 

120 

121 defaultConfigFile: str | None = None 

122 """Path to configuration defaults. Accessed within the ``configs`` resource 

123 or relative to a search path. Can be None if no defaults specified. 

124 """ 

125 

126 @classmethod 

127 def forceRegistryConfig( 

128 cls, config: ButlerConfig | RegistryConfig | Config | str | None 

129 ) -> RegistryConfig: 

130 """Force the supplied config to a `RegistryConfig`. 

131 

132 Parameters 

133 ---------- 

134 config : `RegistryConfig`, `Config` or `str` or `None` 

135 Registry configuration, if missing then default configuration will 

136 be loaded from registry.yaml. 

137 

138 Returns 

139 ------- 

140 registry_config : `RegistryConfig` 

141 A registry config. 

142 """ 

143 if not isinstance(config, RegistryConfig): 

144 if isinstance(config, str | Config) or config is None: 

145 config = RegistryConfig(config) 

146 else: 

147 raise ValueError(f"Incompatible Registry configuration: {config}") 

148 return config 

149 

150 @classmethod 

151 def createFromConfig( 

152 cls, 

153 config: RegistryConfig | str | None = None, 

154 dimensionConfig: DimensionConfig | str | None = None, 

155 butlerRoot: ResourcePathExpression | None = None, 

156 ) -> SqlRegistry: 

157 """Create registry database and return `SqlRegistry` instance. 

158 

159 This method initializes database contents, database must be empty 

160 prior to calling this method. 

161 

162 Parameters 

163 ---------- 

164 config : `RegistryConfig` or `str`, optional 

165 Registry configuration, if missing then default configuration will 

166 be loaded from registry.yaml. 

167 dimensionConfig : `DimensionConfig` or `str`, optional 

168 Dimensions configuration, if missing then default configuration 

169 will be loaded from dimensions.yaml. 

170 butlerRoot : convertible to `lsst.resources.ResourcePath`, optional 

171 Path to the repository root this `SqlRegistry` will manage. 

172 

173 Returns 

174 ------- 

175 registry : `SqlRegistry` 

176 A new `SqlRegistry` instance. 

177 """ 

178 config = cls.forceRegistryConfig(config) 

179 config.replaceRoot(butlerRoot) 

180 

181 if isinstance(dimensionConfig, str): 

182 dimensionConfig = DimensionConfig(dimensionConfig) 

183 elif dimensionConfig is None: 

184 dimensionConfig = DimensionConfig() 

185 elif not isinstance(dimensionConfig, DimensionConfig): 

186 raise TypeError(f"Incompatible Dimension configuration type: {type(dimensionConfig)}") 

187 

188 DatabaseClass = config.getDatabaseClass() 

189 database = DatabaseClass.fromUri( 

190 config.connectionString, origin=config.get("origin", 0), namespace=config.get("namespace") 

191 ) 

192 managerTypes = RegistryManagerTypes.fromConfig(config) 

193 managers = managerTypes.makeRepo(database, dimensionConfig) 

194 return cls(database, RegistryDefaults(), managers) 

195 

196 @classmethod 

197 def fromConfig( 

198 cls, 

199 config: ButlerConfig | RegistryConfig | Config | str, 

200 butlerRoot: ResourcePathExpression | None = None, 

201 writeable: bool = True, 

202 defaults: RegistryDefaults | None = None, 

203 ) -> SqlRegistry: 

204 """Create `Registry` subclass instance from `config`. 

205 

206 Registry database must be initialized prior to calling this method. 

207 

208 Parameters 

209 ---------- 

210 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str` 

211 Registry configuration. 

212 butlerRoot : `lsst.resources.ResourcePathExpression`, optional 

213 Path to the repository root this `Registry` will manage. 

214 writeable : `bool`, optional 

215 If `True` (default) create a read-write connection to the database. 

216 defaults : `RegistryDefaults`, optional 

217 Default collection search path and/or output `~CollectionType.RUN` 

218 collection. 

219 

220 Returns 

221 ------- 

222 registry : `SqlRegistry` 

223 A new `SqlRegistry` subclass instance. 

224 """ 

225 config = cls.forceRegistryConfig(config) 

226 config.replaceRoot(butlerRoot) 

227 DatabaseClass = config.getDatabaseClass() 

228 database = DatabaseClass.fromUri( 

229 config.connectionString, 

230 origin=config.get("origin", 0), 

231 namespace=config.get("namespace"), 

232 writeable=writeable, 

233 ) 

234 managerTypes = RegistryManagerTypes.fromConfig(config) 

235 with database.session(): 

236 managers = managerTypes.loadRepo(database) 

237 if defaults is None: 

238 defaults = RegistryDefaults() 

239 return cls(database, defaults, managers) 

240 

241 def __init__( 

242 self, 

243 database: Database, 

244 defaults: RegistryDefaults, 

245 managers: RegistryManagerInstances, 

246 ): 

247 self._db = database 

248 self._managers = managers 

249 self.storageClasses = StorageClassFactory() 

250 # This is public to SqlRegistry's internal-to-daf_butler callers, but 

251 # it is intentionally not part of RegistryShim. 

252 self.dimension_record_cache = DimensionRecordCache( 

253 self._managers.dimensions.universe, 

254 fetch=self._managers.dimensions.fetch_cache_dict, 

255 ) 

256 # Intentionally invoke property setter to initialize defaults. This 

257 # can only be done after most of the rest of Registry has already been 

258 # initialized, and must be done before the property getter is used. 

259 self.defaults = defaults 

260 # TODO: This is currently initialized by `make_datastore_tables`, 

261 # eventually we'll need to do it during construction. 

262 # The mapping is indexed by the opaque table name. 

263 self._datastore_record_classes: Mapping[str, type[StoredDatastoreItemInfo]] = {} 

264 

265 def __str__(self) -> str: 

266 return str(self._db) 

267 

268 def __repr__(self) -> str: 

269 return f"SqlRegistry({self._db!r}, {self.dimensions!r})" 

270 

271 def isWriteable(self) -> bool: 

272 """Return `True` if this registry allows write operations, and `False` 

273 otherwise. 

274 """ 

275 return self._db.isWriteable() 

276 

277 def copy(self, defaults: RegistryDefaults | None = None) -> SqlRegistry: 

278 """Create a new `SqlRegistry` backed by the same data repository 

279 as this one and sharing a database connection pool with it, but with 

280 independent defaults and database sessions. 

281 

282 Parameters 

283 ---------- 

284 defaults : `~lsst.daf.butler.registry.RegistryDefaults`, optional 

285 Default collections and data ID values for the new registry. If 

286 not provided, ``self.defaults`` will be used (but future changes 

287 to either registry's defaults will not affect the other). 

288 

289 Returns 

290 ------- 

291 copy : `SqlRegistry` 

292 A new `SqlRegistry` instance with its own defaults. 

293 """ 

294 if defaults is None: 

295 # No need to copy, because `RegistryDefaults` is immutable; we 

296 # effectively copy on write. 

297 defaults = self.defaults 

298 db = self._db.clone() 

299 result = SqlRegistry(db, defaults, self._managers.clone(db)) 

300 result._datastore_record_classes = dict(self._datastore_record_classes) 

301 result.dimension_record_cache.load_from(self.dimension_record_cache) 

302 return result 

303 

304 @property 

305 def dimensions(self) -> DimensionUniverse: 

306 """Definitions of all dimensions recognized by this `Registry` 

307 (`DimensionUniverse`). 

308 """ 

309 return self._managers.dimensions.universe 

310 

311 @property 

312 def defaults(self) -> RegistryDefaults: 

313 """Default collection search path and/or output `~CollectionType.RUN` 

314 collection (`~lsst.daf.butler.registry.RegistryDefaults`). 

315 

316 This is an immutable struct whose components may not be set 

317 individually, but the entire struct can be set by assigning to this 

318 property. 

319 """ 

320 return self._defaults 

321 

322 @defaults.setter 

323 def defaults(self, value: RegistryDefaults) -> None: 

324 if value.run is not None: 

325 self.registerRun(value.run) 

326 value.finish(self) 

327 self._defaults = value 

328 

329 def refresh(self) -> None: 

330 """Refresh all in-memory state by querying the database. 

331 

332 This may be necessary to enable querying for entities added by other 

333 registry instances after this one was constructed. 

334 """ 

335 self.dimension_record_cache.reset() 

336 with self._db.transaction(): 

337 self._managers.refresh() 

338 

339 def caching_context(self) -> contextlib.AbstractContextManager[None]: 

340 """Return context manager that enables caching. 

341 

342 Returns 

343 ------- 

344 manager 

345 A context manager that enables client-side caching. Entering 

346 the context returns `None`. 

347 """ 

348 return self._managers.caching_context_manager() 

349 

350 @contextlib.contextmanager 

351 def transaction(self, *, savepoint: bool = False) -> Iterator[None]: 

352 """Return a context manager that represents a transaction. 

353 

354 Parameters 

355 ---------- 

356 savepoint : `bool` 

357 Whether to issue a SAVEPOINT in the database. 

358 

359 Yields 

360 ------ 

361 `None` 

362 """ 

363 with self._db.transaction(savepoint=savepoint): 

364 yield 

365 

366 def resetConnectionPool(self) -> None: 

367 """Reset SQLAlchemy connection pool for `SqlRegistry` database. 

368 

369 This operation is useful when using registry with fork-based 

370 multiprocessing. To use registry across fork boundary one has to make 

371 sure that there are no currently active connections (no session or 

372 transaction is in progress) and connection pool is reset using this 

373 method. This method should be called by the child process immediately 

374 after the fork. 

375 """ 

376 self._db._engine.dispose() 

377 

378 def registerOpaqueTable(self, tableName: str, spec: ddl.TableSpec) -> None: 

379 """Add an opaque (to the `Registry`) table for use by a `Datastore` or 

380 other data repository client. 

381 

382 Opaque table records can be added via `insertOpaqueData`, retrieved via 

383 `fetchOpaqueData`, and removed via `deleteOpaqueData`. 

384 

385 Parameters 

386 ---------- 

387 tableName : `str` 

388 Logical name of the opaque table. This may differ from the 

389 actual name used in the database by a prefix and/or suffix. 

390 spec : `ddl.TableSpec` 

391 Specification for the table to be added. 

392 """ 

393 self._managers.opaque.register(tableName, spec) 

394 

395 @transactional 

396 def insertOpaqueData(self, tableName: str, *data: dict) -> None: 

397 """Insert records into an opaque table. 

398 

399 Parameters 

400 ---------- 

401 tableName : `str` 

402 Logical name of the opaque table. Must match the name used in a 

403 previous call to `registerOpaqueTable`. 

404 *data 

405 Each additional positional argument is a dictionary that represents 

406 a single row to be added. 

407 """ 

408 self._managers.opaque[tableName].insert(*data) 

409 

410 def fetchOpaqueData(self, tableName: str, **where: Any) -> Iterator[Mapping[str, Any]]: 

411 """Retrieve records from an opaque table. 

412 

413 Parameters 

414 ---------- 

415 tableName : `str` 

416 Logical name of the opaque table. Must match the name used in a 

417 previous call to `registerOpaqueTable`. 

418 **where 

419 Additional keyword arguments are interpreted as equality 

420 constraints that restrict the returned rows (combined with AND); 

421 keyword arguments are column names and values are the values they 

422 must have. 

423 

424 Yields 

425 ------ 

426 row : `dict` 

427 A dictionary representing a single result row. 

428 """ 

429 yield from self._managers.opaque[tableName].fetch(**where) 

430 

431 @transactional 

432 def deleteOpaqueData(self, tableName: str, **where: Any) -> None: 

433 """Remove records from an opaque table. 

434 

435 Parameters 

436 ---------- 

437 tableName : `str` 

438 Logical name of the opaque table. Must match the name used in a 

439 previous call to `registerOpaqueTable`. 

440 **where 

441 Additional keyword arguments are interpreted as equality 

442 constraints that restrict the deleted rows (combined with AND); 

443 keyword arguments are column names and values are the values they 

444 must have. 

445 """ 

446 self._managers.opaque[tableName].delete(where.keys(), where) 

447 

448 def registerCollection( 

449 self, name: str, type: CollectionType = CollectionType.TAGGED, doc: str | None = None 

450 ) -> bool: 

451 """Add a new collection if one with the given name does not exist. 

452 

453 Parameters 

454 ---------- 

455 name : `str` 

456 The name of the collection to create. 

457 type : `CollectionType` 

458 Enum value indicating the type of collection to create. 

459 doc : `str`, optional 

460 Documentation string for the collection. 

461 

462 Returns 

463 ------- 

464 registered : `bool` 

465 Boolean indicating whether the collection was already registered 

466 or was created by this call. 

467 

468 Notes 

469 ----- 

470 This method cannot be called within transactions, as it needs to be 

471 able to perform its own transaction to be concurrent. 

472 """ 

473 _, registered = self._managers.collections.register(name, type, doc=doc) 

474 return registered 

475 

476 def getCollectionType(self, name: str) -> CollectionType: 

477 """Return an enumeration value indicating the type of the given 

478 collection. 

479 

480 Parameters 

481 ---------- 

482 name : `str` 

483 The name of the collection. 

484 

485 Returns 

486 ------- 

487 type : `CollectionType` 

488 Enum value indicating the type of this collection. 

489 

490 Raises 

491 ------ 

492 lsst.daf.butler.registry.MissingCollectionError 

493 Raised if no collection with the given name exists. 

494 """ 

495 return self._managers.collections.find(name).type 

496 

497 def get_collection_record(self, name: str) -> CollectionRecord: 

498 """Return the record for this collection. 

499 

500 Parameters 

501 ---------- 

502 name : `str` 

503 Name of the collection for which the record is to be retrieved. 

504 

505 Returns 

506 ------- 

507 record : `CollectionRecord` 

508 The record for this collection. 

509 """ 

510 return self._managers.collections.find(name) 

511 

512 def registerRun(self, name: str, doc: str | None = None) -> bool: 

513 """Add a new run if one with the given name does not exist. 

514 

515 Parameters 

516 ---------- 

517 name : `str` 

518 The name of the run to create. 

519 doc : `str`, optional 

520 Documentation string for the collection. 

521 

522 Returns 

523 ------- 

524 registered : `bool` 

525 Boolean indicating whether a new run was registered. `False` 

526 if it already existed. 

527 

528 Notes 

529 ----- 

530 This method cannot be called within transactions, as it needs to be 

531 able to perform its own transaction to be concurrent. 

532 """ 

533 _, registered = self._managers.collections.register(name, CollectionType.RUN, doc=doc) 

534 return registered 

535 

536 @transactional 

537 def removeCollection(self, name: str) -> None: 

538 """Remove the given collection from the registry. 

539 

540 Parameters 

541 ---------- 

542 name : `str` 

543 The name of the collection to remove. 

544 

545 Raises 

546 ------ 

547 lsst.daf.butler.registry.MissingCollectionError 

548 Raised if no collection with the given name exists. 

549 sqlalchemy.exc.IntegrityError 

550 Raised if the database rows associated with the collection are 

551 still referenced by some other table, such as a dataset in a 

552 datastore (for `~CollectionType.RUN` collections only) or a 

553 `~CollectionType.CHAINED` collection of which this collection is 

554 a child. 

555 

556 Notes 

557 ----- 

558 If this is a `~CollectionType.RUN` collection, all datasets and quanta 

559 in it will removed from the `Registry` database. This requires that 

560 those datasets be removed (or at least trashed) from any datastores 

561 that hold them first. 

562 

563 A collection may not be deleted as long as it is referenced by a 

564 `~CollectionType.CHAINED` collection; the ``CHAINED`` collection must 

565 be deleted or redefined first. 

566 """ 

567 self._managers.collections.remove(name) 

568 

569 def getCollectionChain(self, parent: str) -> tuple[str, ...]: 

570 """Return the child collections in a `~CollectionType.CHAINED` 

571 collection. 

572 

573 Parameters 

574 ---------- 

575 parent : `str` 

576 Name of the chained collection. Must have already been added via 

577 a call to `Registry.registerCollection`. 

578 

579 Returns 

580 ------- 

581 children : `~collections.abc.Sequence` [ `str` ] 

582 An ordered sequence of collection names that are searched when the 

583 given chained collection is searched. 

584 

585 Raises 

586 ------ 

587 lsst.daf.butler.registry.MissingCollectionError 

588 Raised if ``parent`` does not exist in the `Registry`. 

589 lsst.daf.butler.registry.CollectionTypeError 

590 Raised if ``parent`` does not correspond to a 

591 `~CollectionType.CHAINED` collection. 

592 """ 

593 record = self._managers.collections.find(parent) 

594 if record.type is not CollectionType.CHAINED: 

595 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

596 assert isinstance(record, ChainedCollectionRecord) 

597 return record.children 

598 

599 @transactional 

600 def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None: 

601 """Define or redefine a `~CollectionType.CHAINED` collection. 

602 

603 Parameters 

604 ---------- 

605 parent : `str` 

606 Name of the chained collection. Must have already been added via 

607 a call to `Registry.registerCollection`. 

608 children : collection expression 

609 An expression defining an ordered search of child collections, 

610 generally an iterable of `str`; see 

611 :ref:`daf_butler_collection_expressions` for more information. 

612 flatten : `bool`, optional 

613 If `True` (`False` is default), recursively flatten out any nested 

614 `~CollectionType.CHAINED` collections in ``children`` first. 

615 

616 Raises 

617 ------ 

618 lsst.daf.butler.registry.MissingCollectionError 

619 Raised when any of the given collections do not exist in the 

620 `Registry`. 

621 lsst.daf.butler.registry.CollectionTypeError 

622 Raised if ``parent`` does not correspond to a 

623 `~CollectionType.CHAINED` collection. 

624 ValueError 

625 Raised if the given collections contains a cycle. 

626 """ 

627 record = self._managers.collections.find(parent) 

628 if record.type is not CollectionType.CHAINED: 

629 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

630 assert isinstance(record, ChainedCollectionRecord) 

631 children = CollectionWildcard.from_expression(children).require_ordered() 

632 if children != record.children or flatten: 

633 self._managers.collections.update_chain(record, children, flatten=flatten) 

634 

635 def getCollectionParentChains(self, collection: str) -> set[str]: 

636 """Return the CHAINED collections that directly contain the given one. 

637 

638 Parameters 

639 ---------- 

640 collection : `str` 

641 Name of the collection. 

642 

643 Returns 

644 ------- 

645 chains : `set` of `str` 

646 Set of `~CollectionType.CHAINED` collection names. 

647 """ 

648 return self._managers.collections.getParentChains(self._managers.collections.find(collection).key) 

649 

650 def getCollectionDocumentation(self, collection: str) -> str | None: 

651 """Retrieve the documentation string for a collection. 

652 

653 Parameters 

654 ---------- 

655 collection : `str` 

656 Name of the collection. 

657 

658 Returns 

659 ------- 

660 docs : `str` or `None` 

661 Docstring for the collection with the given name. 

662 """ 

663 return self._managers.collections.getDocumentation(self._managers.collections.find(collection).key) 

664 

665 def setCollectionDocumentation(self, collection: str, doc: str | None) -> None: 

666 """Set the documentation string for a collection. 

667 

668 Parameters 

669 ---------- 

670 collection : `str` 

671 Name of the collection. 

672 doc : `str` or `None` 

673 Docstring for the collection with the given name; will replace any 

674 existing docstring. Passing `None` will remove any existing 

675 docstring. 

676 """ 

677 self._managers.collections.setDocumentation(self._managers.collections.find(collection).key, doc) 

678 

679 def getCollectionSummary(self, collection: str) -> CollectionSummary: 

680 """Return a summary for the given collection. 

681 

682 Parameters 

683 ---------- 

684 collection : `str` 

685 Name of the collection for which a summary is to be retrieved. 

686 

687 Returns 

688 ------- 

689 summary : `~lsst.daf.butler.registry.CollectionSummary` 

690 Summary of the dataset types and governor dimension values in 

691 this collection. 

692 """ 

693 record = self._managers.collections.find(collection) 

694 return self._managers.datasets.getCollectionSummary(record) 

695 

696 def registerDatasetType(self, datasetType: DatasetType) -> bool: 

697 """Add a new `DatasetType` to the Registry. 

698 

699 It is not an error to register the same `DatasetType` twice. 

700 

701 Parameters 

702 ---------- 

703 datasetType : `DatasetType` 

704 The `DatasetType` to be added. 

705 

706 Returns 

707 ------- 

708 inserted : `bool` 

709 `True` if ``datasetType`` was inserted, `False` if an identical 

710 existing `DatasetType` was found. Note that in either case the 

711 DatasetType is guaranteed to be defined in the Registry 

712 consistently with the given definition. 

713 

714 Raises 

715 ------ 

716 ValueError 

717 Raised if the dimensions or storage class are invalid. 

718 lsst.daf.butler.registry.ConflictingDefinitionError 

719 Raised if this `DatasetType` is already registered with a different 

720 definition. 

721 

722 Notes 

723 ----- 

724 This method cannot be called within transactions, as it needs to be 

725 able to perform its own transaction to be concurrent. 

726 """ 

727 return self._managers.datasets.register(datasetType) 

728 

729 def removeDatasetType(self, name: str | tuple[str, ...]) -> None: 

730 """Remove the named `DatasetType` from the registry. 

731 

732 .. warning:: 

733 

734 Registry implementations can cache the dataset type definitions. 

735 This means that deleting the dataset type definition may result in 

736 unexpected behavior from other butler processes that are active 

737 that have not seen the deletion. 

738 

739 Parameters 

740 ---------- 

741 name : `str` or `tuple` [`str`] 

742 Name of the type to be removed or tuple containing a list of type 

743 names to be removed. Wildcards are allowed. 

744 

745 Raises 

746 ------ 

747 lsst.daf.butler.registry.OrphanedRecordError 

748 Raised if an attempt is made to remove the dataset type definition 

749 when there are already datasets associated with it. 

750 

751 Notes 

752 ----- 

753 If the dataset type is not registered the method will return without 

754 action. 

755 """ 

756 for datasetTypeExpression in ensure_iterable(name): 

757 # Catch any warnings from the caller specifying a component 

758 # dataset type. This will result in an error later but the 

759 # warning could be confusing when the caller is not querying 

760 # anything. 

761 with warnings.catch_warnings(): 

762 warnings.simplefilter("ignore", category=FutureWarning) 

763 datasetTypes = list(self.queryDatasetTypes(datasetTypeExpression)) 

764 if not datasetTypes: 

765 _LOG.info("Dataset type %r not defined", datasetTypeExpression) 

766 else: 

767 for datasetType in datasetTypes: 

768 self._managers.datasets.remove(datasetType.name) 

769 _LOG.info("Removed dataset type %r", datasetType.name) 

770 

771 def getDatasetType(self, name: str) -> DatasetType: 

772 """Get the `DatasetType`. 

773 

774 Parameters 

775 ---------- 

776 name : `str` 

777 Name of the type. 

778 

779 Returns 

780 ------- 

781 type : `DatasetType` 

782 The `DatasetType` associated with the given name. 

783 

784 Raises 

785 ------ 

786 lsst.daf.butler.registry.MissingDatasetTypeError 

787 Raised if the requested dataset type has not been registered. 

788 

789 Notes 

790 ----- 

791 This method handles component dataset types automatically, though most 

792 other registry operations do not. 

793 """ 

794 parent_name, component = DatasetType.splitDatasetTypeName(name) 

795 storage = self._managers.datasets[parent_name] 

796 if component is None: 

797 return storage.datasetType 

798 else: 

799 return storage.datasetType.makeComponentDatasetType(component) 

800 

801 def supportsIdGenerationMode(self, mode: DatasetIdGenEnum) -> bool: 

802 """Test whether the given dataset ID generation mode is supported by 

803 `insertDatasets`. 

804 

805 Parameters 

806 ---------- 

807 mode : `DatasetIdGenEnum` 

808 Enum value for the mode to test. 

809 

810 Returns 

811 ------- 

812 supported : `bool` 

813 Whether the given mode is supported. 

814 """ 

815 return self._managers.datasets.supportsIdGenerationMode(mode) 

816 

817 def findDataset( 

818 self, 

819 datasetType: DatasetType | str, 

820 dataId: DataId | None = None, 

821 *, 

822 collections: CollectionArgType | None = None, 

823 timespan: Timespan | None = None, 

824 datastore_records: bool = False, 

825 **kwargs: Any, 

826 ) -> DatasetRef | None: 

827 """Find a dataset given its `DatasetType` and data ID. 

828 

829 This can be used to obtain a `DatasetRef` that permits the dataset to 

830 be read from a `Datastore`. If the dataset is a component and can not 

831 be found using the provided dataset type, a dataset ref for the parent 

832 will be returned instead but with the correct dataset type. 

833 

834 Parameters 

835 ---------- 

836 datasetType : `DatasetType` or `str` 

837 A `DatasetType` or the name of one. If this is a `DatasetType` 

838 instance, its storage class will be respected and propagated to 

839 the output, even if it differs from the dataset type definition 

840 in the registry, as long as the storage classes are convertible. 

841 dataId : `dict` or `DataCoordinate`, optional 

842 A `dict`-like object containing the `Dimension` links that identify 

843 the dataset within a collection. 

844 collections : collection expression, optional 

845 An expression that fully or partially identifies the collections to 

846 search for the dataset; see 

847 :ref:`daf_butler_collection_expressions` for more information. 

848 Defaults to ``self.defaults.collections``. 

849 timespan : `Timespan`, optional 

850 A timespan that the validity range of the dataset must overlap. 

851 If not provided, any `~CollectionType.CALIBRATION` collections 

852 matched by the ``collections`` argument will not be searched. 

853 datastore_records : `bool`, optional 

854 Whether to attach datastore records to the `DatasetRef`. 

855 **kwargs 

856 Additional keyword arguments passed to 

857 `DataCoordinate.standardize` to convert ``dataId`` to a true 

858 `DataCoordinate` or augment an existing one. 

859 

860 Returns 

861 ------- 

862 ref : `DatasetRef` 

863 A reference to the dataset, or `None` if no matching Dataset 

864 was found. 

865 

866 Raises 

867 ------ 

868 lsst.daf.butler.registry.NoDefaultCollectionError 

869 Raised if ``collections`` is `None` and 

870 ``self.defaults.collections`` is `None`. 

871 LookupError 

872 Raised if one or more data ID keys are missing. 

873 lsst.daf.butler.registry.MissingDatasetTypeError 

874 Raised if the dataset type does not exist. 

875 lsst.daf.butler.registry.MissingCollectionError 

876 Raised if any of ``collections`` does not exist in the registry. 

877 

878 Notes 

879 ----- 

880 This method simply returns `None` and does not raise an exception even 

881 when the set of collections searched is intrinsically incompatible with 

882 the dataset type, e.g. if ``datasetType.isCalibration() is False``, but 

883 only `~CollectionType.CALIBRATION` collections are being searched. 

884 This may make it harder to debug some lookup failures, but the behavior 

885 is intentional; we consider it more important that failed searches are 

886 reported consistently, regardless of the reason, and that adding 

887 additional collections that do not contain a match to the search path 

888 never changes the behavior. 

889 

890 This method handles component dataset types automatically, though most 

891 other registry operations do not. 

892 """ 

893 if collections is None: 

894 if not self.defaults.collections: 

895 raise NoDefaultCollectionError( 

896 "No collections provided to findDataset, and no defaults from registry construction." 

897 ) 

898 collections = self.defaults.collections 

899 backend = queries.SqlQueryBackend(self._db, self._managers, self.dimension_record_cache) 

900 with backend.caching_context(): 

901 collection_wildcard = CollectionWildcard.from_expression(collections, require_ordered=True) 

902 if collection_wildcard.empty(): 

903 return None 

904 matched_collections = backend.resolve_collection_wildcard(collection_wildcard) 

905 resolved_dataset_type = backend.resolve_single_dataset_type_wildcard(datasetType) 

906 dataId = DataCoordinate.standardize( 

907 dataId, 

908 dimensions=resolved_dataset_type.dimensions, 

909 universe=self.dimensions, 

910 defaults=self.defaults.dataId, 

911 **kwargs, 

912 ) 

913 governor_constraints = {name: {cast(str, dataId[name])} for name in dataId.dimensions.governors} 

914 (filtered_collections,) = backend.filter_dataset_collections( 

915 [resolved_dataset_type], 

916 matched_collections, 

917 governor_constraints=governor_constraints, 

918 ).values() 

919 if not filtered_collections: 

920 return None 

921 if timespan is None: 

922 filtered_collections = [ 

923 collection_record 

924 for collection_record in filtered_collections 

925 if collection_record.type is not CollectionType.CALIBRATION 

926 ] 

927 if filtered_collections: 

928 requested_columns = {"dataset_id", "run", "collection"} 

929 with backend.context() as context: 

930 predicate = context.make_data_coordinate_predicate( 

931 dataId.subset(resolved_dataset_type.dimensions), full=False 

932 ) 

933 if timespan is not None: 

934 requested_columns.add("timespan") 

935 predicate = predicate.logical_and( 

936 context.make_timespan_overlap_predicate( 

937 DatasetColumnTag(resolved_dataset_type.name, "timespan"), timespan 

938 ) 

939 ) 

940 relation = backend.make_dataset_query_relation( 

941 resolved_dataset_type, filtered_collections, requested_columns, context 

942 ).with_rows_satisfying(predicate) 

943 rows = list(context.fetch_iterable(relation)) 

944 else: 

945 rows = [] 

946 if not rows: 

947 return None 

948 elif len(rows) == 1: 

949 best_row = rows[0] 

950 else: 

951 rank_by_collection_key = {record.key: n for n, record in enumerate(filtered_collections)} 

952 collection_tag = DatasetColumnTag(resolved_dataset_type.name, "collection") 

953 row_iter = iter(rows) 

954 best_row = next(row_iter) 

955 best_rank = rank_by_collection_key[best_row[collection_tag]] 

956 have_tie = False 

957 for row in row_iter: 

958 if (rank := rank_by_collection_key[row[collection_tag]]) < best_rank: 

959 best_row = row 

960 best_rank = rank 

961 have_tie = False 

962 elif rank == best_rank: 

963 have_tie = True 

964 assert timespan is not None, "Rank ties should be impossible given DB constraints." 

965 if have_tie: 

966 raise CalibrationLookupError( 

967 f"Ambiguous calibration lookup for {resolved_dataset_type.name} in collections " 

968 f"{collection_wildcard.strings} with timespan {timespan}." 

969 ) 

970 reader = queries.DatasetRefReader( 

971 resolved_dataset_type, 

972 translate_collection=lambda k: self._managers.collections[k].name, 

973 ) 

974 ref = reader.read(best_row, data_id=dataId) 

975 if datastore_records: 

976 ref = self.get_datastore_records(ref) 

977 

978 return ref 

979 

980 @transactional 

981 def insertDatasets( 

982 self, 

983 datasetType: DatasetType | str, 

984 dataIds: Iterable[DataId], 

985 run: str | None = None, 

986 expand: bool = True, 

987 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

988 ) -> list[DatasetRef]: 

989 """Insert one or more datasets into the `Registry`. 

990 

991 This always adds new datasets; to associate existing datasets with 

992 a new collection, use ``associate``. 

993 

994 Parameters 

995 ---------- 

996 datasetType : `DatasetType` or `str` 

997 A `DatasetType` or the name of one. 

998 dataIds : `~collections.abc.Iterable` of `dict` or `DataCoordinate` 

999 Dimension-based identifiers for the new datasets. 

1000 run : `str`, optional 

1001 The name of the run that produced the datasets. Defaults to 

1002 ``self.defaults.run``. 

1003 expand : `bool`, optional 

1004 If `True` (default), expand data IDs as they are inserted. This is 

1005 necessary in general to allow datastore to generate file templates, 

1006 but it may be disabled if the caller can guarantee this is 

1007 unnecessary. 

1008 idGenerationMode : `DatasetIdGenEnum`, optional 

1009 Specifies option for generating dataset IDs. By default unique IDs 

1010 are generated for each inserted dataset. 

1011 

1012 Returns 

1013 ------- 

1014 refs : `list` of `DatasetRef` 

1015 Resolved `DatasetRef` instances for all given data IDs (in the same 

1016 order). 

1017 

1018 Raises 

1019 ------ 

1020 lsst.daf.butler.registry.DatasetTypeError 

1021 Raised if ``datasetType`` is not known to registry. 

1022 lsst.daf.butler.registry.CollectionTypeError 

1023 Raised if ``run`` collection type is not `~CollectionType.RUN`. 

1024 lsst.daf.butler.registry.NoDefaultCollectionError 

1025 Raised if ``run`` is `None` and ``self.defaults.run`` is `None`. 

1026 lsst.daf.butler.registry.ConflictingDefinitionError 

1027 If a dataset with the same dataset type and data ID as one of those 

1028 given already exists in ``run``. 

1029 lsst.daf.butler.registry.MissingCollectionError 

1030 Raised if ``run`` does not exist in the registry. 

1031 """ 

1032 if isinstance(datasetType, DatasetType): 

1033 storage = self._managers.datasets.find(datasetType.name) 

1034 if storage is None: 

1035 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.") 

1036 else: 

1037 storage = self._managers.datasets.find(datasetType) 

1038 if storage is None: 

1039 raise DatasetTypeError(f"DatasetType with name '{datasetType}' has not been registered.") 

1040 if run is None: 

1041 if self.defaults.run is None: 

1042 raise NoDefaultCollectionError( 

1043 "No run provided to insertDatasets, and no default from registry construction." 

1044 ) 

1045 run = self.defaults.run 

1046 runRecord = self._managers.collections.find(run) 

1047 if runRecord.type is not CollectionType.RUN: 

1048 raise CollectionTypeError( 

1049 f"Given collection is of type {runRecord.type.name}; RUN collection required." 

1050 ) 

1051 assert isinstance(runRecord, RunRecord) 

1052 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG) 

1053 if expand: 

1054 expandedDataIds = [ 

1055 self.expandDataId(dataId, dimensions=storage.datasetType.dimensions) 

1056 for dataId in progress.wrap(dataIds, f"Expanding {storage.datasetType.name} data IDs") 

1057 ] 

1058 else: 

1059 expandedDataIds = [ 

1060 DataCoordinate.standardize(dataId, graph=storage.datasetType.dimensions) for dataId in dataIds 

1061 ] 

1062 try: 

1063 refs = list(storage.insert(runRecord, expandedDataIds, idGenerationMode)) 

1064 if self._managers.obscore: 

1065 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

1066 self._managers.obscore.add_datasets(refs, context) 

1067 except sqlalchemy.exc.IntegrityError as err: 

1068 raise ConflictingDefinitionError( 

1069 "A database constraint failure was triggered by inserting " 

1070 f"one or more datasets of type {storage.datasetType} into " 

1071 f"collection '{run}'. " 

1072 "This probably means a dataset with the same data ID " 

1073 "and dataset type already exists, but it may also mean a " 

1074 "dimension row is missing." 

1075 ) from err 

1076 return refs 

1077 

1078 @transactional 

1079 def _importDatasets( 

1080 self, 

1081 datasets: Iterable[DatasetRef], 

1082 expand: bool = True, 

1083 ) -> list[DatasetRef]: 

1084 """Import one or more datasets into the `Registry`. 

1085 

1086 Difference from `insertDatasets` method is that this method accepts 

1087 `DatasetRef` instances which should already be resolved and have a 

1088 dataset ID. If registry supports globally-unique dataset IDs (e.g. 

1089 `uuid.UUID`) then datasets which already exist in the registry will be 

1090 ignored if imported again. 

1091 

1092 Parameters 

1093 ---------- 

1094 datasets : `~collections.abc.Iterable` of `DatasetRef` 

1095 Datasets to be inserted. All `DatasetRef` instances must have 

1096 identical ``datasetType`` and ``run`` attributes. ``run`` 

1097 attribute can be `None` and defaults to ``self.defaults.run``. 

1098 Datasets can specify ``id`` attribute which will be used for 

1099 inserted datasets. All dataset IDs must have the same type 

1100 (`int` or `uuid.UUID`), if type of dataset IDs does not match 

1101 configured backend then IDs will be ignored and new IDs will be 

1102 generated by backend. 

1103 expand : `bool`, optional 

1104 If `True` (default), expand data IDs as they are inserted. This is 

1105 necessary in general, but it may be disabled if the caller can 

1106 guarantee this is unnecessary. 

1107 

1108 Returns 

1109 ------- 

1110 refs : `list` of `DatasetRef` 

1111 Resolved `DatasetRef` instances for all given data IDs (in the same 

1112 order). If any of ``datasets`` has an ID which already exists in 

1113 the database then it will not be inserted or updated, but a 

1114 resolved `DatasetRef` will be returned for it in any case. 

1115 

1116 Raises 

1117 ------ 

1118 lsst.daf.butler.registry.NoDefaultCollectionError 

1119 Raised if ``run`` is `None` and ``self.defaults.run`` is `None`. 

1120 lsst.daf.butler.registry.DatasetTypeError 

1121 Raised if datasets correspond to more than one dataset type or 

1122 dataset type is not known to registry. 

1123 lsst.daf.butler.registry.ConflictingDefinitionError 

1124 If a dataset with the same dataset type and data ID as one of those 

1125 given already exists in ``run``. 

1126 lsst.daf.butler.registry.MissingCollectionError 

1127 Raised if ``run`` does not exist in the registry. 

1128 

1129 Notes 

1130 ----- 

1131 This method is considered package-private and internal to Butler 

1132 implementation. Clients outside daf_butler package should not use this 

1133 method. 

1134 """ 

1135 datasets = list(datasets) 

1136 if not datasets: 

1137 # nothing to do 

1138 return [] 

1139 

1140 # find dataset type 

1141 datasetTypes = {dataset.datasetType for dataset in datasets} 

1142 if len(datasetTypes) != 1: 

1143 raise DatasetTypeError(f"Multiple dataset types in input datasets: {datasetTypes}") 

1144 datasetType = datasetTypes.pop() 

1145 

1146 # get storage handler for this dataset type 

1147 storage = self._managers.datasets.find(datasetType.name) 

1148 if storage is None: 

1149 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.") 

1150 

1151 # find run name 

1152 runs = {dataset.run for dataset in datasets} 

1153 if len(runs) != 1: 

1154 raise ValueError(f"Multiple run names in input datasets: {runs}") 

1155 run = runs.pop() 

1156 

1157 runRecord = self._managers.collections.find(run) 

1158 if runRecord.type is not CollectionType.RUN: 

1159 raise CollectionTypeError( 

1160 f"Given collection '{runRecord.name}' is of type {runRecord.type.name};" 

1161 " RUN collection required." 

1162 ) 

1163 assert isinstance(runRecord, RunRecord) 

1164 

1165 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG) 

1166 if expand: 

1167 expandedDatasets = [ 

1168 dataset.expanded(self.expandDataId(dataset.dataId, dimensions=storage.datasetType.dimensions)) 

1169 for dataset in progress.wrap(datasets, f"Expanding {storage.datasetType.name} data IDs") 

1170 ] 

1171 else: 

1172 expandedDatasets = [ 

1173 DatasetRef(datasetType, dataset.dataId, id=dataset.id, run=dataset.run, conform=True) 

1174 for dataset in datasets 

1175 ] 

1176 

1177 try: 

1178 refs = list(storage.import_(runRecord, expandedDatasets)) 

1179 if self._managers.obscore: 

1180 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

1181 self._managers.obscore.add_datasets(refs, context) 

1182 except sqlalchemy.exc.IntegrityError as err: 

1183 raise ConflictingDefinitionError( 

1184 "A database constraint failure was triggered by inserting " 

1185 f"one or more datasets of type {storage.datasetType} into " 

1186 f"collection '{run}'. " 

1187 "This probably means a dataset with the same data ID " 

1188 "and dataset type already exists, but it may also mean a " 

1189 "dimension row is missing." 

1190 ) from err 

1191 # Check that imported dataset IDs match the input 

1192 for imported_ref, input_ref in zip(refs, datasets, strict=True): 

1193 if imported_ref.id != input_ref.id: 

1194 raise RegistryConsistencyError( 

1195 "Imported dataset ID differs from input dataset ID, " 

1196 f"input ref: {input_ref}, imported ref: {imported_ref}" 

1197 ) 

1198 return refs 

1199 

1200 def getDataset(self, id: DatasetId) -> DatasetRef | None: 

1201 """Retrieve a Dataset entry. 

1202 

1203 Parameters 

1204 ---------- 

1205 id : `DatasetId` 

1206 The unique identifier for the dataset. 

1207 

1208 Returns 

1209 ------- 

1210 ref : `DatasetRef` or `None` 

1211 A ref to the Dataset, or `None` if no matching Dataset 

1212 was found. 

1213 """ 

1214 return self._managers.datasets.getDatasetRef(id) 

1215 

1216 @transactional 

1217 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None: 

1218 """Remove datasets from the Registry. 

1219 

1220 The datasets will be removed unconditionally from all collections, and 

1221 any `Quantum` that consumed this dataset will instead be marked with 

1222 having a NULL input. `Datastore` records will *not* be deleted; the 

1223 caller is responsible for ensuring that the dataset has already been 

1224 removed from all Datastores. 

1225 

1226 Parameters 

1227 ---------- 

1228 refs : `~collections.abc.Iterable` [`DatasetRef`] 

1229 References to the datasets to be removed. Must include a valid 

1230 ``id`` attribute, and should be considered invalidated upon return. 

1231 

1232 Raises 

1233 ------ 

1234 lsst.daf.butler.AmbiguousDatasetError 

1235 Raised if any ``ref.id`` is `None`. 

1236 lsst.daf.butler.registry.OrphanedRecordError 

1237 Raised if any dataset is still present in any `Datastore`. 

1238 """ 

1239 progress = Progress("lsst.daf.butler.Registry.removeDatasets", level=logging.DEBUG) 

1240 for datasetType, refsForType in progress.iter_item_chunks( 

1241 DatasetRef.iter_by_type(refs), desc="Removing datasets by type" 

1242 ): 

1243 storage = self._managers.datasets[datasetType.name] 

1244 try: 

1245 storage.delete(refsForType) 

1246 except sqlalchemy.exc.IntegrityError as err: 

1247 raise OrphanedRecordError( 

1248 "One or more datasets is still present in one or more Datastores." 

1249 ) from err 

1250 

1251 @transactional 

1252 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

1253 """Add existing datasets to a `~CollectionType.TAGGED` collection. 

1254 

1255 If a DatasetRef with the same exact ID is already in a collection 

1256 nothing is changed. If a `DatasetRef` with the same `DatasetType` and 

1257 data ID but with different ID exists in the collection, 

1258 `~lsst.daf.butler.registry.ConflictingDefinitionError` is raised. 

1259 

1260 Parameters 

1261 ---------- 

1262 collection : `str` 

1263 Indicates the collection the datasets should be associated with. 

1264 refs : `~collections.abc.Iterable` [ `DatasetRef` ] 

1265 An iterable of resolved `DatasetRef` instances that already exist 

1266 in this `Registry`. 

1267 

1268 Raises 

1269 ------ 

1270 lsst.daf.butler.registry.ConflictingDefinitionError 

1271 If a Dataset with the given `DatasetRef` already exists in the 

1272 given collection. 

1273 lsst.daf.butler.registry.MissingCollectionError 

1274 Raised if ``collection`` does not exist in the registry. 

1275 lsst.daf.butler.registry.CollectionTypeError 

1276 Raise adding new datasets to the given ``collection`` is not 

1277 allowed. 

1278 """ 

1279 progress = Progress("lsst.daf.butler.Registry.associate", level=logging.DEBUG) 

1280 collectionRecord = self._managers.collections.find(collection) 

1281 if collectionRecord.type is not CollectionType.TAGGED: 

1282 raise CollectionTypeError( 

1283 f"Collection '{collection}' has type {collectionRecord.type.name}, not TAGGED." 

1284 ) 

1285 for datasetType, refsForType in progress.iter_item_chunks( 

1286 DatasetRef.iter_by_type(refs), desc="Associating datasets by type" 

1287 ): 

1288 storage = self._managers.datasets[datasetType.name] 

1289 try: 

1290 storage.associate(collectionRecord, refsForType) 

1291 if self._managers.obscore: 

1292 # If a TAGGED collection is being monitored by ObsCore 

1293 # manager then we may need to save the dataset. 

1294 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

1295 self._managers.obscore.associate(refsForType, collectionRecord, context) 

1296 except sqlalchemy.exc.IntegrityError as err: 

1297 raise ConflictingDefinitionError( 

1298 f"Constraint violation while associating dataset of type {datasetType.name} with " 

1299 f"collection {collection}. This probably means that one or more datasets with the same " 

1300 "dataset type and data ID already exist in the collection, but it may also indicate " 

1301 "that the datasets do not exist." 

1302 ) from err 

1303 

1304 @transactional 

1305 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

1306 """Remove existing datasets from a `~CollectionType.TAGGED` collection. 

1307 

1308 ``collection`` and ``ref`` combinations that are not currently 

1309 associated are silently ignored. 

1310 

1311 Parameters 

1312 ---------- 

1313 collection : `str` 

1314 The collection the datasets should no longer be associated with. 

1315 refs : `~collections.abc.Iterable` [ `DatasetRef` ] 

1316 An iterable of resolved `DatasetRef` instances that already exist 

1317 in this `Registry`. 

1318 

1319 Raises 

1320 ------ 

1321 lsst.daf.butler.AmbiguousDatasetError 

1322 Raised if any of the given dataset references is unresolved. 

1323 lsst.daf.butler.registry.MissingCollectionError 

1324 Raised if ``collection`` does not exist in the registry. 

1325 lsst.daf.butler.registry.CollectionTypeError 

1326 Raise adding new datasets to the given ``collection`` is not 

1327 allowed. 

1328 """ 

1329 progress = Progress("lsst.daf.butler.Registry.disassociate", level=logging.DEBUG) 

1330 collectionRecord = self._managers.collections.find(collection) 

1331 if collectionRecord.type is not CollectionType.TAGGED: 

1332 raise CollectionTypeError( 

1333 f"Collection '{collection}' has type {collectionRecord.type.name}; expected TAGGED." 

1334 ) 

1335 for datasetType, refsForType in progress.iter_item_chunks( 

1336 DatasetRef.iter_by_type(refs), desc="Disassociating datasets by type" 

1337 ): 

1338 storage = self._managers.datasets[datasetType.name] 

1339 storage.disassociate(collectionRecord, refsForType) 

1340 if self._managers.obscore: 

1341 self._managers.obscore.disassociate(refsForType, collectionRecord) 

1342 

1343 @transactional 

1344 def certify(self, collection: str, refs: Iterable[DatasetRef], timespan: Timespan) -> None: 

1345 """Associate one or more datasets with a calibration collection and a 

1346 validity range within it. 

1347 

1348 Parameters 

1349 ---------- 

1350 collection : `str` 

1351 The name of an already-registered `~CollectionType.CALIBRATION` 

1352 collection. 

1353 refs : `~collections.abc.Iterable` [ `DatasetRef` ] 

1354 Datasets to be associated. 

1355 timespan : `Timespan` 

1356 The validity range for these datasets within the collection. 

1357 

1358 Raises 

1359 ------ 

1360 lsst.daf.butler.AmbiguousDatasetError 

1361 Raised if any of the given `DatasetRef` instances is unresolved. 

1362 lsst.daf.butler.registry.ConflictingDefinitionError 

1363 Raised if the collection already contains a different dataset with 

1364 the same `DatasetType` and data ID and an overlapping validity 

1365 range. 

1366 lsst.daf.butler.registry.CollectionTypeError 

1367 Raised if ``collection`` is not a `~CollectionType.CALIBRATION` 

1368 collection or if one or more datasets are of a dataset type for 

1369 which `DatasetType.isCalibration` returns `False`. 

1370 """ 

1371 progress = Progress("lsst.daf.butler.Registry.certify", level=logging.DEBUG) 

1372 collectionRecord = self._managers.collections.find(collection) 

1373 for datasetType, refsForType in progress.iter_item_chunks( 

1374 DatasetRef.iter_by_type(refs), desc="Certifying datasets by type" 

1375 ): 

1376 storage = self._managers.datasets[datasetType.name] 

1377 storage.certify( 

1378 collectionRecord, 

1379 refsForType, 

1380 timespan, 

1381 context=queries.SqlQueryContext(self._db, self._managers.column_types), 

1382 ) 

1383 

1384 @transactional 

1385 def decertify( 

1386 self, 

1387 collection: str, 

1388 datasetType: str | DatasetType, 

1389 timespan: Timespan, 

1390 *, 

1391 dataIds: Iterable[DataId] | None = None, 

1392 ) -> None: 

1393 """Remove or adjust datasets to clear a validity range within a 

1394 calibration collection. 

1395 

1396 Parameters 

1397 ---------- 

1398 collection : `str` 

1399 The name of an already-registered `~CollectionType.CALIBRATION` 

1400 collection. 

1401 datasetType : `str` or `DatasetType` 

1402 Name or `DatasetType` instance for the datasets to be decertified. 

1403 timespan : `Timespan`, optional 

1404 The validity range to remove datasets from within the collection. 

1405 Datasets that overlap this range but are not contained by it will 

1406 have their validity ranges adjusted to not overlap it, which may 

1407 split a single dataset validity range into two. 

1408 dataIds : iterable [`dict` or `DataCoordinate`], optional 

1409 Data IDs that should be decertified within the given validity range 

1410 If `None`, all data IDs for ``self.datasetType`` will be 

1411 decertified. 

1412 

1413 Raises 

1414 ------ 

1415 lsst.daf.butler.registry.CollectionTypeError 

1416 Raised if ``collection`` is not a `~CollectionType.CALIBRATION` 

1417 collection or if ``datasetType.isCalibration() is False``. 

1418 """ 

1419 collectionRecord = self._managers.collections.find(collection) 

1420 if isinstance(datasetType, str): 

1421 storage = self._managers.datasets[datasetType] 

1422 else: 

1423 storage = self._managers.datasets[datasetType.name] 

1424 standardizedDataIds = None 

1425 if dataIds is not None: 

1426 standardizedDataIds = [ 

1427 DataCoordinate.standardize(d, dimensions=storage.datasetType.dimensions) for d in dataIds 

1428 ] 

1429 storage.decertify( 

1430 collectionRecord, 

1431 timespan, 

1432 dataIds=standardizedDataIds, 

1433 context=queries.SqlQueryContext(self._db, self._managers.column_types), 

1434 ) 

1435 

1436 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager: 

1437 """Return an object that allows a new `Datastore` instance to 

1438 communicate with this `Registry`. 

1439 

1440 Returns 

1441 ------- 

1442 manager : `~.interfaces.DatastoreRegistryBridgeManager` 

1443 Object that mediates communication between this `Registry` and its 

1444 associated datastores. 

1445 """ 

1446 return self._managers.datastores 

1447 

1448 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]: 

1449 """Retrieve datastore locations for a given dataset. 

1450 

1451 Parameters 

1452 ---------- 

1453 ref : `DatasetRef` 

1454 A reference to the dataset for which to retrieve storage 

1455 information. 

1456 

1457 Returns 

1458 ------- 

1459 datastores : `~collections.abc.Iterable` [ `str` ] 

1460 All the matching datastores holding this dataset. 

1461 

1462 Raises 

1463 ------ 

1464 lsst.daf.butler.AmbiguousDatasetError 

1465 Raised if ``ref.id`` is `None`. 

1466 """ 

1467 return self._managers.datastores.findDatastores(ref) 

1468 

1469 def expandDataId( 

1470 self, 

1471 dataId: DataId | None = None, 

1472 *, 

1473 dimensions: Iterable[str] | DimensionGroup | DimensionGraph | None = None, 

1474 graph: DimensionGraph | None = None, 

1475 records: NameLookupMapping[DimensionElement, DimensionRecord | None] | None = None, 

1476 withDefaults: bool = True, 

1477 **kwargs: Any, 

1478 ) -> DataCoordinate: 

1479 """Expand a dimension-based data ID to include additional information. 

1480 

1481 Parameters 

1482 ---------- 

1483 dataId : `DataCoordinate` or `dict`, optional 

1484 Data ID to be expanded; augmented and overridden by ``kwargs``. 

1485 dimensions : `~collections.abc.Iterable` [ `str` ], \ 

1486 `DimensionGroup`, or `DimensionGraph`, optional 

1487 The dimensions to be identified by the new `DataCoordinate`. 

1488 If not provided, will be inferred from the keys of ``mapping`` and 

1489 ``**kwargs``, and ``universe`` must be provided unless ``mapping`` 

1490 is already a `DataCoordinate`. 

1491 graph : `DimensionGraph`, optional 

1492 Like ``dimensions``, but as a ``DimensionGraph`` instance. Ignored 

1493 if ``dimensions`` is provided. Deprecated and will be removed 

1494 after v27. 

1495 records : `~collections.abc.Mapping` [`str`, `DimensionRecord`], \ 

1496 optional 

1497 Dimension record data to use before querying the database for that 

1498 data, keyed by element name. 

1499 withDefaults : `bool`, optional 

1500 Utilize ``self.defaults.dataId`` to fill in missing governor 

1501 dimension key-value pairs. Defaults to `True` (i.e. defaults are 

1502 used). 

1503 **kwargs 

1504 Additional keywords are treated like additional key-value pairs for 

1505 ``dataId``, extending and overriding. 

1506 

1507 Returns 

1508 ------- 

1509 expanded : `DataCoordinate` 

1510 A data ID that includes full metadata for all of the dimensions it 

1511 identifies, i.e. guarantees that ``expanded.hasRecords()`` and 

1512 ``expanded.hasFull()`` both return `True`. 

1513 

1514 Raises 

1515 ------ 

1516 lsst.daf.butler.registry.DataIdError 

1517 Raised when ``dataId`` or keyword arguments specify unknown 

1518 dimensions or values, or when a resulting data ID contains 

1519 contradictory key-value pairs, according to dimension 

1520 relationships. 

1521 

1522 Notes 

1523 ----- 

1524 This method cannot be relied upon to reject invalid data ID values 

1525 for dimensions that do actually not have any record columns. For 

1526 efficiency reasons the records for these dimensions (which have only 

1527 dimension key values that are given by the caller) may be constructed 

1528 directly rather than obtained from the registry database. 

1529 """ 

1530 if not withDefaults: 

1531 defaults = None 

1532 else: 

1533 defaults = self.defaults.dataId 

1534 standardized = DataCoordinate.standardize( 

1535 dataId, 

1536 graph=graph, 

1537 dimensions=dimensions, 

1538 universe=self.dimensions, 

1539 defaults=defaults, 

1540 **kwargs, 

1541 ) 

1542 if standardized.hasRecords(): 

1543 return standardized 

1544 if records is None: 

1545 records = {} 

1546 elif isinstance(records, NamedKeyMapping): 

1547 records = records.byName() 

1548 else: 

1549 records = dict(records) 

1550 if isinstance(dataId, DataCoordinate) and dataId.hasRecords(): 

1551 for element_name in dataId.dimensions.elements: 

1552 records[element_name] = dataId.records[element_name] 

1553 keys = dict(standardized.mapping) 

1554 for element_name in standardized.dimensions.lookup_order: 

1555 element = self.dimensions[element_name] 

1556 record = records.get(element_name, ...) # Use ... to mean not found; None might mean NULL 

1557 if record is ...: 

1558 if element_name in self.dimensions.dimensions.names and keys.get(element_name) is None: 

1559 if element_name in standardized.dimensions.required: 

1560 raise DimensionNameError( 

1561 f"No value or null value for required dimension {element_name}." 

1562 ) 

1563 keys[element_name] = None 

1564 record = None 

1565 else: 

1566 record = self._managers.dimensions.fetch_one( 

1567 element_name, 

1568 DataCoordinate.standardize(keys, dimensions=element.minimal_group), 

1569 self.dimension_record_cache, 

1570 ) 

1571 records[element_name] = record 

1572 if record is not None: 

1573 for d in element.implied: 

1574 value = getattr(record, d.name) 

1575 if keys.setdefault(d.name, value) != value: 

1576 raise InconsistentDataIdError( 

1577 f"Data ID {standardized} has {d.name}={keys[d.name]!r}, " 

1578 f"but {element_name} implies {d.name}={value!r}." 

1579 ) 

1580 else: 

1581 if element_name in standardized.dimensions.required: 

1582 raise DataIdValueError( 

1583 f"Could not fetch record for required dimension {element.name} via keys {keys}." 

1584 ) 

1585 if element.defines_relationships: 

1586 raise InconsistentDataIdError( 

1587 f"Could not fetch record for element {element_name} via keys {keys}, ", 

1588 "but it is marked as defining relationships; this means one or more dimensions are " 

1589 "have inconsistent values.", 

1590 ) 

1591 for d in element.implied: 

1592 keys.setdefault(d.name, None) 

1593 records.setdefault(d.name, None) 

1594 return DataCoordinate.standardize(keys, dimensions=standardized.dimensions).expanded(records=records) 

1595 

1596 def insertDimensionData( 

1597 self, 

1598 element: DimensionElement | str, 

1599 *data: Mapping[str, Any] | DimensionRecord, 

1600 conform: bool = True, 

1601 replace: bool = False, 

1602 skip_existing: bool = False, 

1603 ) -> None: 

1604 """Insert one or more dimension records into the database. 

1605 

1606 Parameters 

1607 ---------- 

1608 element : `DimensionElement` or `str` 

1609 The `DimensionElement` or name thereof that identifies the table 

1610 records will be inserted into. 

1611 *data : `dict` or `DimensionRecord` 

1612 One or more records to insert. 

1613 conform : `bool`, optional 

1614 If `False` (`True` is default) perform no checking or conversions, 

1615 and assume that ``element`` is a `DimensionElement` instance and 

1616 ``data`` is a one or more `DimensionRecord` instances of the 

1617 appropriate subclass. 

1618 replace : `bool`, optional 

1619 If `True` (`False` is default), replace existing records in the 

1620 database if there is a conflict. 

1621 skip_existing : `bool`, optional 

1622 If `True` (`False` is default), skip insertion if a record with 

1623 the same primary key values already exists. Unlike 

1624 `syncDimensionData`, this will not detect when the given record 

1625 differs from what is in the database, and should not be used when 

1626 this is a concern. 

1627 """ 

1628 if isinstance(element, str): 

1629 element = self.dimensions[element] 

1630 if conform: 

1631 records = [ 

1632 row if isinstance(row, DimensionRecord) else element.RecordClass(**row) for row in data 

1633 ] 

1634 else: 

1635 # Ignore typing since caller said to trust them with conform=False. 

1636 records = data # type: ignore 

1637 if element.name in self.dimension_record_cache: 

1638 self.dimension_record_cache.reset() 

1639 self._managers.dimensions.insert( 

1640 element, 

1641 *records, 

1642 replace=replace, 

1643 skip_existing=skip_existing, 

1644 ) 

1645 

1646 def syncDimensionData( 

1647 self, 

1648 element: DimensionElement | str, 

1649 row: Mapping[str, Any] | DimensionRecord, 

1650 conform: bool = True, 

1651 update: bool = False, 

1652 ) -> bool | dict[str, Any]: 

1653 """Synchronize the given dimension record with the database, inserting 

1654 if it does not already exist and comparing values if it does. 

1655 

1656 Parameters 

1657 ---------- 

1658 element : `DimensionElement` or `str` 

1659 The `DimensionElement` or name thereof that identifies the table 

1660 records will be inserted into. 

1661 row : `dict` or `DimensionRecord` 

1662 The record to insert. 

1663 conform : `bool`, optional 

1664 If `False` (`True` is default) perform no checking or conversions, 

1665 and assume that ``element`` is a `DimensionElement` instance and 

1666 ``data`` is a `DimensionRecord` instances of the appropriate 

1667 subclass. 

1668 update : `bool`, optional 

1669 If `True` (`False` is default), update the existing record in the 

1670 database if there is a conflict. 

1671 

1672 Returns 

1673 ------- 

1674 inserted_or_updated : `bool` or `dict` 

1675 `True` if a new row was inserted, `False` if no changes were 

1676 needed, or a `dict` mapping updated column names to their old 

1677 values if an update was performed (only possible if 

1678 ``update=True``). 

1679 

1680 Raises 

1681 ------ 

1682 lsst.daf.butler.registry.ConflictingDefinitionError 

1683 Raised if the record exists in the database (according to primary 

1684 key lookup) but is inconsistent with the given one. 

1685 """ 

1686 if conform: 

1687 if isinstance(element, str): 

1688 element = self.dimensions[element] 

1689 record = row if isinstance(row, DimensionRecord) else element.RecordClass(**row) 

1690 else: 

1691 # Ignore typing since caller said to trust them with conform=False. 

1692 record = row # type: ignore 

1693 if record.definition.name in self.dimension_record_cache: 

1694 self.dimension_record_cache.reset() 

1695 return self._managers.dimensions.sync(record, update=update) 

1696 

1697 def queryDatasetTypes( 

1698 self, 

1699 expression: Any = ..., 

1700 *, 

1701 components: bool | _Marker = _DefaultMarker, 

1702 missing: list[str] | None = None, 

1703 ) -> Iterable[DatasetType]: 

1704 """Iterate over the dataset types whose names match an expression. 

1705 

1706 Parameters 

1707 ---------- 

1708 expression : dataset type expression, optional 

1709 An expression that fully or partially identifies the dataset types 

1710 to return, such as a `str`, `re.Pattern`, or iterable thereof. 

1711 ``...`` can be used to return all dataset types, and is the 

1712 default. See :ref:`daf_butler_dataset_type_expressions` for more 

1713 information. 

1714 components : `bool`, optional 

1715 Must be `False`. Provided only for backwards compatibility. After 

1716 v27 this argument will be removed entirely. 

1717 missing : `list` of `str`, optional 

1718 String dataset type names that were explicitly given (i.e. not 

1719 regular expression patterns) but not found will be appended to this 

1720 list, if it is provided. 

1721 

1722 Returns 

1723 ------- 

1724 dataset_types : `~collections.abc.Iterable` [ `DatasetType`] 

1725 An `~collections.abc.Iterable` of `DatasetType` instances whose 

1726 names match ``expression``. 

1727 

1728 Raises 

1729 ------ 

1730 lsst.daf.butler.registry.DatasetTypeExpressionError 

1731 Raised when ``expression`` is invalid. 

1732 """ 

1733 if components is not _DefaultMarker: 

1734 if components is not False: 

1735 raise DatasetTypeError( 

1736 "Dataset component queries are no longer supported by Registry. Use " 

1737 "DatasetType methods to obtain components from parent dataset types instead." 

1738 ) 

1739 else: 

1740 warnings.warn( 

1741 "The components parameter is ignored. It will be removed after v27.", 

1742 category=FutureWarning, 

1743 stacklevel=find_outside_stacklevel("lsst.daf.butler"), 

1744 ) 

1745 wildcard = DatasetTypeWildcard.from_expression(expression) 

1746 return self._managers.datasets.resolve_wildcard(wildcard, missing=missing) 

1747 

1748 def queryCollections( 

1749 self, 

1750 expression: Any = ..., 

1751 datasetType: DatasetType | None = None, 

1752 collectionTypes: Iterable[CollectionType] | CollectionType = CollectionType.all(), 

1753 flattenChains: bool = False, 

1754 includeChains: bool | None = None, 

1755 ) -> Sequence[str]: 

1756 """Iterate over the collections whose names match an expression. 

1757 

1758 Parameters 

1759 ---------- 

1760 expression : collection expression, optional 

1761 An expression that identifies the collections to return, such as 

1762 a `str` (for full matches or partial matches via globs), 

1763 `re.Pattern` (for partial matches), or iterable thereof. ``...`` 

1764 can be used to return all collections, and is the default. 

1765 See :ref:`daf_butler_collection_expressions` for more information. 

1766 datasetType : `DatasetType`, optional 

1767 If provided, only yield collections that may contain datasets of 

1768 this type. This is a conservative approximation in general; it may 

1769 yield collections that do not have any such datasets. 

1770 collectionTypes : `~collections.abc.Set` [`CollectionType`] or \ 

1771 `CollectionType`, optional 

1772 If provided, only yield collections of these types. 

1773 flattenChains : `bool`, optional 

1774 If `True` (`False` is default), recursively yield the child 

1775 collections of matching `~CollectionType.CHAINED` collections. 

1776 includeChains : `bool`, optional 

1777 If `True`, yield records for matching `~CollectionType.CHAINED` 

1778 collections. Default is the opposite of ``flattenChains``: include 

1779 either CHAINED collections or their children, but not both. 

1780 

1781 Returns 

1782 ------- 

1783 collections : `~collections.abc.Sequence` [ `str` ] 

1784 The names of collections that match ``expression``. 

1785 

1786 Raises 

1787 ------ 

1788 lsst.daf.butler.registry.CollectionExpressionError 

1789 Raised when ``expression`` is invalid. 

1790 

1791 Notes 

1792 ----- 

1793 The order in which collections are returned is unspecified, except that 

1794 the children of a `~CollectionType.CHAINED` collection are guaranteed 

1795 to be in the order in which they are searched. When multiple parent 

1796 `~CollectionType.CHAINED` collections match the same criteria, the 

1797 order in which the two lists appear is unspecified, and the lists of 

1798 children may be incomplete if a child has multiple parents. 

1799 """ 

1800 # Right now the datasetTypes argument is completely ignored, but that 

1801 # is consistent with its [lack of] guarantees. DM-24939 or a follow-up 

1802 # ticket will take care of that. 

1803 try: 

1804 wildcard = CollectionWildcard.from_expression(expression) 

1805 except TypeError as exc: 

1806 raise CollectionExpressionError(f"Invalid collection expression '{expression}'") from exc 

1807 collectionTypes = ensure_iterable(collectionTypes) 

1808 return [ 

1809 record.name 

1810 for record in self._managers.collections.resolve_wildcard( 

1811 wildcard, 

1812 collection_types=frozenset(collectionTypes), 

1813 flatten_chains=flattenChains, 

1814 include_chains=includeChains, 

1815 ) 

1816 ] 

1817 

1818 def _makeQueryBuilder( 

1819 self, 

1820 summary: queries.QuerySummary, 

1821 doomed_by: Iterable[str] = (), 

1822 ) -> queries.QueryBuilder: 

1823 """Return a `QueryBuilder` instance capable of constructing and 

1824 managing more complex queries than those obtainable via `Registry` 

1825 interfaces. 

1826 

1827 This is an advanced interface; downstream code should prefer 

1828 `Registry.queryDataIds` and `Registry.queryDatasets` whenever those 

1829 are sufficient. 

1830 

1831 Parameters 

1832 ---------- 

1833 summary : `queries.QuerySummary` 

1834 Object describing and categorizing the full set of dimensions that 

1835 will be included in the query. 

1836 doomed_by : `~collections.abc.Iterable` of `str`, optional 

1837 A list of diagnostic messages that indicate why the query is going 

1838 to yield no results and should not even be executed. If an empty 

1839 container (default) the query will be executed unless other code 

1840 determines that it is doomed. 

1841 

1842 Returns 

1843 ------- 

1844 builder : `queries.QueryBuilder` 

1845 Object that can be used to construct and perform advanced queries. 

1846 """ 

1847 doomed_by = list(doomed_by) 

1848 backend = queries.SqlQueryBackend(self._db, self._managers, self.dimension_record_cache) 

1849 context = backend.context() 

1850 relation: Relation | None = None 

1851 if doomed_by: 

1852 relation = LeafRelation.make_doomed(context.sql_engine, set(), doomed_by) 

1853 return queries.QueryBuilder( 

1854 summary, 

1855 backend=backend, 

1856 context=context, 

1857 relation=relation, 

1858 ) 

1859 

1860 def _standardize_query_data_id_args( 

1861 self, data_id: DataId | None, *, doomed_by: list[str], **kwargs: Any 

1862 ) -> DataCoordinate: 

1863 """Preprocess the data ID arguments passed to query* methods. 

1864 

1865 Parameters 

1866 ---------- 

1867 data_id : `DataId` or `None` 

1868 Data ID that constrains the query results. 

1869 doomed_by : `list` [ `str` ] 

1870 List to append messages indicating why the query is doomed to 

1871 yield no results. 

1872 **kwargs 

1873 Additional data ID key-value pairs, extending and overriding 

1874 ``data_id``. 

1875 

1876 Returns 

1877 ------- 

1878 data_id : `DataCoordinate` 

1879 Standardized data ID. Will be fully expanded unless expansion 

1880 fails, in which case a message will be appended to ``doomed_by`` 

1881 on return. 

1882 """ 

1883 try: 

1884 return self.expandDataId(data_id, **kwargs) 

1885 except DataIdValueError as err: 

1886 doomed_by.append(str(err)) 

1887 return DataCoordinate.standardize( 

1888 data_id, **kwargs, universe=self.dimensions, defaults=self.defaults.dataId 

1889 ) 

1890 

1891 def _standardize_query_dataset_args( 

1892 self, 

1893 datasets: Any, 

1894 collections: CollectionArgType | None, 

1895 mode: Literal["find_first"] | Literal["find_all"] | Literal["constrain"] = "constrain", 

1896 *, 

1897 doomed_by: list[str], 

1898 ) -> tuple[list[DatasetType], CollectionWildcard | None]: 

1899 """Preprocess dataset arguments passed to query* methods. 

1900 

1901 Parameters 

1902 ---------- 

1903 datasets : `DatasetType`, `str`, `re.Pattern`, or iterable of these 

1904 Expression identifying dataset types. See `queryDatasetTypes` for 

1905 details. 

1906 collections : `str`, `re.Pattern`, or iterable of these 

1907 Expression identifying collections to be searched. See 

1908 `queryCollections` for details. 

1909 mode : `str`, optional 

1910 The way in which datasets are being used in this query; one of: 

1911 

1912 - "find_first": this is a query for the first dataset in an 

1913 ordered list of collections. Prohibits collection wildcards, 

1914 but permits dataset type wildcards. 

1915 

1916 - "find_all": this is a query for all datasets in all matched 

1917 collections. Permits collection and dataset type wildcards. 

1918 

1919 - "constrain": this is a query for something other than datasets, 

1920 with results constrained by dataset existence. Permits 

1921 collection wildcards and prohibits ``...`` as a dataset type 

1922 wildcard. 

1923 doomed_by : `list` [ `str` ] 

1924 List to append messages indicating why the query is doomed to 

1925 yield no results. 

1926 

1927 Returns 

1928 ------- 

1929 dataset_types : `list` [ `DatasetType` ] 

1930 List of matched dataset types. 

1931 collections : `CollectionWildcard` 

1932 Processed collection expression. 

1933 """ 

1934 dataset_types: list[DatasetType] = [] 

1935 collection_wildcard: CollectionWildcard | None = None 

1936 if datasets is not None: 

1937 if collections is None: 

1938 if not self.defaults.collections: 

1939 raise NoDefaultCollectionError("No collections, and no registry default collections.") 

1940 collection_wildcard = CollectionWildcard.from_expression(self.defaults.collections) 

1941 else: 

1942 collection_wildcard = CollectionWildcard.from_expression(collections) 

1943 if mode == "find_first" and collection_wildcard.patterns: 

1944 raise TypeError( 

1945 f"Collection pattern(s) {collection_wildcard.patterns} not allowed in this context." 

1946 ) 

1947 missing: list[str] = [] 

1948 dataset_types = self._managers.datasets.resolve_wildcard( 

1949 datasets, missing=missing, explicit_only=(mode == "constrain") 

1950 ) 

1951 if missing and mode == "constrain": 

1952 raise MissingDatasetTypeError( 

1953 f"Dataset type(s) {missing} are not registered.", 

1954 ) 

1955 doomed_by.extend(f"Dataset type {name} is not registered." for name in missing) 

1956 elif collections: 

1957 # I think this check should actually be `collections is not None`, 

1958 # but it looks like some CLI scripts use empty tuple as default. 

1959 raise ArgumentError(f"Cannot pass 'collections' (='{collections}') without 'datasets'.") 

1960 return dataset_types, collection_wildcard 

1961 

1962 def queryDatasets( 

1963 self, 

1964 datasetType: Any, 

1965 *, 

1966 collections: CollectionArgType | None = None, 

1967 dimensions: Iterable[Dimension | str] | None = None, 

1968 dataId: DataId | None = None, 

1969 where: str = "", 

1970 findFirst: bool = False, 

1971 components: bool | _Marker = _DefaultMarker, 

1972 bind: Mapping[str, Any] | None = None, 

1973 check: bool = True, 

1974 **kwargs: Any, 

1975 ) -> queries.DatasetQueryResults: 

1976 """Query for and iterate over dataset references matching user-provided 

1977 criteria. 

1978 

1979 Parameters 

1980 ---------- 

1981 datasetType : dataset type expression 

1982 An expression that fully or partially identifies the dataset types 

1983 to be queried. Allowed types include `DatasetType`, `str`, 

1984 `re.Pattern`, and iterables thereof. The special value ``...`` can 

1985 be used to query all dataset types. See 

1986 :ref:`daf_butler_dataset_type_expressions` for more information. 

1987 collections : collection expression, optional 

1988 An expression that identifies the collections to search, such as a 

1989 `str` (for full matches or partial matches via globs), `re.Pattern` 

1990 (for partial matches), or iterable thereof. ``...`` can be used to 

1991 search all collections (actually just all `~CollectionType.RUN` 

1992 collections, because this will still find all datasets). 

1993 If not provided, ``self.default.collections`` is used. See 

1994 :ref:`daf_butler_collection_expressions` for more information. 

1995 dimensions : `~collections.abc.Iterable` of `Dimension` or `str` 

1996 Dimensions to include in the query (in addition to those used 

1997 to identify the queried dataset type(s)), either to constrain 

1998 the resulting datasets to those for which a matching dimension 

1999 exists, or to relate the dataset type's dimensions to dimensions 

2000 referenced by the ``dataId`` or ``where`` arguments. 

2001 dataId : `dict` or `DataCoordinate`, optional 

2002 A data ID whose key-value pairs are used as equality constraints 

2003 in the query. 

2004 where : `str`, optional 

2005 A string expression similar to a SQL WHERE clause. May involve 

2006 any column of a dimension table or (as a shortcut for the primary 

2007 key column of a dimension table) dimension name. See 

2008 :ref:`daf_butler_dimension_expressions` for more information. 

2009 findFirst : `bool`, optional 

2010 If `True` (`False` is default), for each result data ID, only 

2011 yield one `DatasetRef` of each `DatasetType`, from the first 

2012 collection in which a dataset of that dataset type appears 

2013 (according to the order of ``collections`` passed in). If `True`, 

2014 ``collections`` must not contain regular expressions and may not 

2015 be ``...``. 

2016 components : `bool`, optional 

2017 Must be `False`. Provided only for backwards compatibility. After 

2018 v27 this argument will be removed entirely. 

2019 bind : `~collections.abc.Mapping`, optional 

2020 Mapping containing literal values that should be injected into the 

2021 ``where`` expression, keyed by the identifiers they replace. 

2022 Values of collection type can be expanded in some cases; see 

2023 :ref:`daf_butler_dimension_expressions_identifiers` for more 

2024 information. 

2025 check : `bool`, optional 

2026 If `True` (default) check the query for consistency before 

2027 executing it. This may reject some valid queries that resemble 

2028 common mistakes (e.g. queries for visits without specifying an 

2029 instrument). 

2030 **kwargs 

2031 Additional keyword arguments are forwarded to 

2032 `DataCoordinate.standardize` when processing the ``dataId`` 

2033 argument (and may be used to provide a constraining data ID even 

2034 when the ``dataId`` argument is `None`). 

2035 

2036 Returns 

2037 ------- 

2038 refs : `.queries.DatasetQueryResults` 

2039 Dataset references matching the given query criteria. Nested data 

2040 IDs are guaranteed to include values for all implied dimensions 

2041 (i.e. `DataCoordinate.hasFull` will return `True`), but will not 

2042 include dimension records (`DataCoordinate.hasRecords` will be 

2043 `False`) unless `~.queries.DatasetQueryResults.expanded` is 

2044 called on the result object (which returns a new one). 

2045 

2046 Raises 

2047 ------ 

2048 lsst.daf.butler.registry.DatasetTypeExpressionError 

2049 Raised when ``datasetType`` expression is invalid. 

2050 TypeError 

2051 Raised when the arguments are incompatible, such as when a 

2052 collection wildcard is passed when ``findFirst`` is `True`, or 

2053 when ``collections`` is `None` and ``self.defaults.collections`` is 

2054 also `None`. 

2055 lsst.daf.butler.registry.DataIdError 

2056 Raised when ``dataId`` or keyword arguments specify unknown 

2057 dimensions or values, or when they contain inconsistent values. 

2058 lsst.daf.butler.registry.UserExpressionError 

2059 Raised when ``where`` expression is invalid. 

2060 

2061 Notes 

2062 ----- 

2063 When multiple dataset types are queried in a single call, the 

2064 results of this operation are equivalent to querying for each dataset 

2065 type separately in turn, and no information about the relationships 

2066 between datasets of different types is included. In contexts where 

2067 that kind of information is important, the recommended pattern is to 

2068 use `queryDataIds` to first obtain data IDs (possibly with the 

2069 desired dataset types and collections passed as constraints to the 

2070 query), and then use multiple (generally much simpler) calls to 

2071 `queryDatasets` with the returned data IDs passed as constraints. 

2072 """ 

2073 if components is not _DefaultMarker: 

2074 if components is not False: 

2075 raise DatasetTypeError( 

2076 "Dataset component queries are no longer supported by Registry. Use " 

2077 "DatasetType methods to obtain components from parent dataset types instead." 

2078 ) 

2079 else: 

2080 warnings.warn( 

2081 "The components parameter is ignored. It will be removed after v27.", 

2082 category=FutureWarning, 

2083 stacklevel=find_outside_stacklevel("lsst.daf.butler"), 

2084 ) 

2085 doomed_by: list[str] = [] 

2086 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs) 

2087 resolved_dataset_types, collection_wildcard = self._standardize_query_dataset_args( 

2088 datasetType, 

2089 collections, 

2090 mode="find_first" if findFirst else "find_all", 

2091 doomed_by=doomed_by, 

2092 ) 

2093 if collection_wildcard is not None and collection_wildcard.empty(): 

2094 doomed_by.append("No datasets can be found because collection list is empty.") 

2095 return queries.ChainedDatasetQueryResults([], doomed_by=doomed_by) 

2096 parent_results: list[queries.ParentDatasetQueryResults] = [] 

2097 for resolved_dataset_type in resolved_dataset_types: 

2098 # The full set of dimensions in the query is the combination of 

2099 # those needed for the DatasetType and those explicitly requested, 

2100 # if any. 

2101 dimension_names = set(resolved_dataset_type.dimensions.names) 

2102 if dimensions is not None: 

2103 dimension_names.update(self.dimensions.conform(dimensions).names) 

2104 # Construct the summary structure needed to construct a 

2105 # QueryBuilder. 

2106 summary = queries.QuerySummary( 

2107 requested=self.dimensions.conform(dimension_names), 

2108 column_types=self._managers.column_types, 

2109 data_id=data_id, 

2110 expression=where, 

2111 bind=bind, 

2112 defaults=self.defaults.dataId, 

2113 check=check, 

2114 datasets=[resolved_dataset_type], 

2115 ) 

2116 builder = self._makeQueryBuilder(summary) 

2117 # Add the dataset subquery to the query, telling the QueryBuilder 

2118 # to include the rank of the selected collection in the results 

2119 # only if we need to findFirst. Note that if any of the 

2120 # collections are actually wildcard expressions, and 

2121 # findFirst=True, this will raise TypeError for us. 

2122 builder.joinDataset( 

2123 resolved_dataset_type, collection_wildcard, isResult=True, findFirst=findFirst 

2124 ) 

2125 query = builder.finish() 

2126 parent_results.append( 

2127 queries.ParentDatasetQueryResults(query, resolved_dataset_type, components=[None]) 

2128 ) 

2129 if not parent_results: 

2130 doomed_by.extend( 

2131 f"No registered dataset type matching {t!r} found, so no matching datasets can " 

2132 "exist in any collection." 

2133 for t in ensure_iterable(datasetType) 

2134 ) 

2135 return queries.ChainedDatasetQueryResults([], doomed_by=doomed_by) 

2136 elif len(parent_results) == 1: 

2137 return parent_results[0] 

2138 else: 

2139 return queries.ChainedDatasetQueryResults(parent_results) 

2140 

2141 def queryDataIds( 

2142 self, 

2143 # TODO: Drop Dimension support on DM-41326. 

2144 dimensions: DimensionGroup | Iterable[Dimension | str] | Dimension | str, 

2145 *, 

2146 dataId: DataId | None = None, 

2147 datasets: Any = None, 

2148 collections: CollectionArgType | None = None, 

2149 where: str = "", 

2150 components: bool | _Marker = _DefaultMarker, 

2151 bind: Mapping[str, Any] | None = None, 

2152 check: bool = True, 

2153 **kwargs: Any, 

2154 ) -> queries.DataCoordinateQueryResults: 

2155 """Query for data IDs matching user-provided criteria. 

2156 

2157 Parameters 

2158 ---------- 

2159 dimensions : `DimensionGroup`, `Dimension`, or `str`, or \ 

2160 `~collections.abc.Iterable` [ `Dimension` or `str` ] 

2161 The dimensions of the data IDs to yield, as either `Dimension` 

2162 instances or `str`. Will be automatically expanded to a complete 

2163 `DimensionGroup`. Support for `Dimension` instances is deprecated 

2164 and will not be supported after v27. 

2165 dataId : `dict` or `DataCoordinate`, optional 

2166 A data ID whose key-value pairs are used as equality constraints 

2167 in the query. 

2168 datasets : dataset type expression, optional 

2169 An expression that fully or partially identifies dataset types 

2170 that should constrain the yielded data IDs. For example, including 

2171 "raw" here would constrain the yielded ``instrument``, 

2172 ``exposure``, ``detector``, and ``physical_filter`` values to only 

2173 those for which at least one "raw" dataset exists in 

2174 ``collections``. Allowed types include `DatasetType`, `str`, 

2175 and iterables thereof. Regular expression objects (i.e. 

2176 `re.Pattern`) are deprecated and will be removed after the v26 

2177 release. See :ref:`daf_butler_dataset_type_expressions` for more 

2178 information. 

2179 collections : collection expression, optional 

2180 An expression that identifies the collections to search for 

2181 datasets, such as a `str` (for full matches or partial matches 

2182 via globs), `re.Pattern` (for partial matches), or iterable 

2183 thereof. ``...`` can be used to search all collections (actually 

2184 just all `~CollectionType.RUN` collections, because this will 

2185 still find all datasets). If not provided, 

2186 ``self.default.collections`` is used. Ignored unless ``datasets`` 

2187 is also passed. See :ref:`daf_butler_collection_expressions` for 

2188 more information. 

2189 where : `str`, optional 

2190 A string expression similar to a SQL WHERE clause. May involve 

2191 any column of a dimension table or (as a shortcut for the primary 

2192 key column of a dimension table) dimension name. See 

2193 :ref:`daf_butler_dimension_expressions` for more information. 

2194 components : `bool`, optional 

2195 Must be `False`. Provided only for backwards compatibility. After 

2196 v27 this argument will be removed entirely. 

2197 bind : `~collections.abc.Mapping`, optional 

2198 Mapping containing literal values that should be injected into the 

2199 ``where`` expression, keyed by the identifiers they replace. 

2200 Values of collection type can be expanded in some cases; see 

2201 :ref:`daf_butler_dimension_expressions_identifiers` for more 

2202 information. 

2203 check : `bool`, optional 

2204 If `True` (default) check the query for consistency before 

2205 executing it. This may reject some valid queries that resemble 

2206 common mistakes (e.g. queries for visits without specifying an 

2207 instrument). 

2208 **kwargs 

2209 Additional keyword arguments are forwarded to 

2210 `DataCoordinate.standardize` when processing the ``dataId`` 

2211 argument (and may be used to provide a constraining data ID even 

2212 when the ``dataId`` argument is `None`). 

2213 

2214 Returns 

2215 ------- 

2216 dataIds : `.queries.DataCoordinateQueryResults` 

2217 Data IDs matching the given query parameters. These are guaranteed 

2218 to identify all dimensions (`DataCoordinate.hasFull` returns 

2219 `True`), but will not contain `DimensionRecord` objects 

2220 (`DataCoordinate.hasRecords` returns `False`). Call 

2221 `~.queries.DataCoordinateQueryResults.expanded` on the 

2222 returned object to fetch those (and consider using 

2223 `~.queries.DataCoordinateQueryResults.materialize` on the 

2224 returned object first if the expected number of rows is very 

2225 large). See documentation for those methods for additional 

2226 information. 

2227 

2228 Raises 

2229 ------ 

2230 lsst.daf.butler.registry.NoDefaultCollectionError 

2231 Raised if ``collections`` is `None` and 

2232 ``self.defaults.collections`` is `None`. 

2233 lsst.daf.butler.registry.CollectionExpressionError 

2234 Raised when ``collections`` expression is invalid. 

2235 lsst.daf.butler.registry.DataIdError 

2236 Raised when ``dataId`` or keyword arguments specify unknown 

2237 dimensions or values, or when they contain inconsistent values. 

2238 lsst.daf.butler.registry.DatasetTypeExpressionError 

2239 Raised when ``datasetType`` expression is invalid. 

2240 lsst.daf.butler.registry.UserExpressionError 

2241 Raised when ``where`` expression is invalid. 

2242 """ 

2243 if components is not _DefaultMarker: 

2244 if components is not False: 

2245 raise DatasetTypeError( 

2246 "Dataset component queries are no longer supported by Registry. Use " 

2247 "DatasetType methods to obtain components from parent dataset types instead." 

2248 ) 

2249 else: 

2250 warnings.warn( 

2251 "The components parameter is ignored. It will be removed after v27.", 

2252 category=FutureWarning, 

2253 stacklevel=find_outside_stacklevel("lsst.daf.butler"), 

2254 ) 

2255 requested_dimensions = self.dimensions.conform(dimensions) 

2256 doomed_by: list[str] = [] 

2257 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs) 

2258 resolved_dataset_types, collection_wildcard = self._standardize_query_dataset_args( 

2259 datasets, collections, doomed_by=doomed_by 

2260 ) 

2261 if collection_wildcard is not None and collection_wildcard.empty(): 

2262 doomed_by.append("No data coordinates can be found because collection list is empty.") 

2263 summary = queries.QuerySummary( 

2264 requested=requested_dimensions, 

2265 column_types=self._managers.column_types, 

2266 data_id=data_id, 

2267 expression=where, 

2268 bind=bind, 

2269 defaults=self.defaults.dataId, 

2270 check=check, 

2271 datasets=resolved_dataset_types, 

2272 ) 

2273 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by) 

2274 for datasetType in resolved_dataset_types: 

2275 builder.joinDataset(datasetType, collection_wildcard, isResult=False) 

2276 query = builder.finish() 

2277 

2278 return queries.DataCoordinateQueryResults(query) 

2279 

2280 def queryDimensionRecords( 

2281 self, 

2282 element: DimensionElement | str, 

2283 *, 

2284 dataId: DataId | None = None, 

2285 datasets: Any = None, 

2286 collections: CollectionArgType | None = None, 

2287 where: str = "", 

2288 components: bool | _Marker = _DefaultMarker, 

2289 bind: Mapping[str, Any] | None = None, 

2290 check: bool = True, 

2291 **kwargs: Any, 

2292 ) -> queries.DimensionRecordQueryResults: 

2293 """Query for dimension information matching user-provided criteria. 

2294 

2295 Parameters 

2296 ---------- 

2297 element : `DimensionElement` or `str` 

2298 The dimension element to obtain records for. 

2299 dataId : `dict` or `DataCoordinate`, optional 

2300 A data ID whose key-value pairs are used as equality constraints 

2301 in the query. 

2302 datasets : dataset type expression, optional 

2303 An expression that fully or partially identifies dataset types 

2304 that should constrain the yielded records. See `queryDataIds` and 

2305 :ref:`daf_butler_dataset_type_expressions` for more information. 

2306 collections : collection expression, optional 

2307 An expression that identifies the collections to search for 

2308 datasets, such as a `str` (for full matches or partial matches 

2309 via globs), `re.Pattern` (for partial matches), or iterable 

2310 thereof. ``...`` can be used to search all collections (actually 

2311 just all `~CollectionType.RUN` collections, because this will 

2312 still find all datasets). If not provided, 

2313 ``self.default.collections`` is used. Ignored unless ``datasets`` 

2314 is also passed. See :ref:`daf_butler_collection_expressions` for 

2315 more information. 

2316 where : `str`, optional 

2317 A string expression similar to a SQL WHERE clause. See 

2318 `queryDataIds` and :ref:`daf_butler_dimension_expressions` for more 

2319 information. 

2320 components : `bool`, optional 

2321 Whether to apply dataset expressions to components as well. 

2322 See `queryDataIds` for more information. 

2323 

2324 Must be `False`. Provided only for backwards compatibility. After 

2325 v27 this argument will be removed entirely. 

2326 bind : `~collections.abc.Mapping`, optional 

2327 Mapping containing literal values that should be injected into the 

2328 ``where`` expression, keyed by the identifiers they replace. 

2329 Values of collection type can be expanded in some cases; see 

2330 :ref:`daf_butler_dimension_expressions_identifiers` for more 

2331 information. 

2332 check : `bool`, optional 

2333 If `True` (default) check the query for consistency before 

2334 executing it. This may reject some valid queries that resemble 

2335 common mistakes (e.g. queries for visits without specifying an 

2336 instrument). 

2337 **kwargs 

2338 Additional keyword arguments are forwarded to 

2339 `DataCoordinate.standardize` when processing the ``dataId`` 

2340 argument (and may be used to provide a constraining data ID even 

2341 when the ``dataId`` argument is `None`). 

2342 

2343 Returns 

2344 ------- 

2345 dataIds : `.queries.DimensionRecordQueryResults` 

2346 Data IDs matching the given query parameters. 

2347 

2348 Raises 

2349 ------ 

2350 lsst.daf.butler.registry.NoDefaultCollectionError 

2351 Raised if ``collections`` is `None` and 

2352 ``self.defaults.collections`` is `None`. 

2353 lsst.daf.butler.registry.CollectionExpressionError 

2354 Raised when ``collections`` expression is invalid. 

2355 lsst.daf.butler.registry.DataIdError 

2356 Raised when ``dataId`` or keyword arguments specify unknown 

2357 dimensions or values, or when they contain inconsistent values. 

2358 lsst.daf.butler.registry.DatasetTypeExpressionError 

2359 Raised when ``datasetType`` expression is invalid. 

2360 lsst.daf.butler.registry.UserExpressionError 

2361 Raised when ``where`` expression is invalid. 

2362 """ 

2363 if components is not _DefaultMarker: 

2364 if components is not False: 

2365 raise DatasetTypeError( 

2366 "Dataset component queries are no longer supported by Registry. Use " 

2367 "DatasetType methods to obtain components from parent dataset types instead." 

2368 ) 

2369 else: 

2370 warnings.warn( 

2371 "The components parameter is ignored. It will be removed after v27.", 

2372 category=FutureWarning, 

2373 stacklevel=find_outside_stacklevel("lsst.daf.butler"), 

2374 ) 

2375 if not isinstance(element, DimensionElement): 

2376 try: 

2377 element = self.dimensions[element] 

2378 except KeyError as e: 

2379 raise DimensionNameError( 

2380 f"No such dimension '{element}', available dimensions: " + str(self.dimensions.elements) 

2381 ) from e 

2382 doomed_by: list[str] = [] 

2383 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs) 

2384 resolved_dataset_types, collection_wildcard = self._standardize_query_dataset_args( 

2385 datasets, collections, doomed_by=doomed_by 

2386 ) 

2387 if collection_wildcard is not None and collection_wildcard.empty(): 

2388 doomed_by.append("No dimension records can be found because collection list is empty.") 

2389 summary = queries.QuerySummary( 

2390 requested=element.minimal_group, 

2391 column_types=self._managers.column_types, 

2392 data_id=data_id, 

2393 expression=where, 

2394 bind=bind, 

2395 defaults=self.defaults.dataId, 

2396 check=check, 

2397 datasets=resolved_dataset_types, 

2398 ) 

2399 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by) 

2400 for datasetType in resolved_dataset_types: 

2401 builder.joinDataset(datasetType, collection_wildcard, isResult=False) 

2402 query = builder.finish().with_record_columns(element.name) 

2403 return queries.DatabaseDimensionRecordQueryResults(query, element) 

2404 

2405 def queryDatasetAssociations( 

2406 self, 

2407 datasetType: str | DatasetType, 

2408 collections: CollectionArgType | None = ..., 

2409 *, 

2410 collectionTypes: Iterable[CollectionType] = CollectionType.all(), 

2411 flattenChains: bool = False, 

2412 ) -> Iterator[DatasetAssociation]: 

2413 """Iterate over dataset-collection combinations where the dataset is in 

2414 the collection. 

2415 

2416 This method is a temporary placeholder for better support for 

2417 association results in `queryDatasets`. It will probably be 

2418 removed in the future, and should be avoided in production code 

2419 whenever possible. 

2420 

2421 Parameters 

2422 ---------- 

2423 datasetType : `DatasetType` or `str` 

2424 A dataset type object or the name of one. 

2425 collections : collection expression, optional 

2426 An expression that identifies the collections to search for 

2427 datasets, such as a `str` (for full matches or partial matches 

2428 via globs), `re.Pattern` (for partial matches), or iterable 

2429 thereof. ``...`` can be used to search all collections (actually 

2430 just all `~CollectionType.RUN` collections, because this will still 

2431 find all datasets). If not provided, ``self.default.collections`` 

2432 is used. See :ref:`daf_butler_collection_expressions` for more 

2433 information. 

2434 collectionTypes : `~collections.abc.Set` [ `CollectionType` ], optional 

2435 If provided, only yield associations from collections of these 

2436 types. 

2437 flattenChains : `bool`, optional 

2438 If `True`, search in the children of `~CollectionType.CHAINED` 

2439 collections. If `False`, ``CHAINED`` collections are ignored. 

2440 

2441 Yields 

2442 ------ 

2443 association : `.DatasetAssociation` 

2444 Object representing the relationship between a single dataset and 

2445 a single collection. 

2446 

2447 Raises 

2448 ------ 

2449 lsst.daf.butler.registry.NoDefaultCollectionError 

2450 Raised if ``collections`` is `None` and 

2451 ``self.defaults.collections`` is `None`. 

2452 lsst.daf.butler.registry.CollectionExpressionError 

2453 Raised when ``collections`` expression is invalid. 

2454 """ 

2455 if collections is None: 

2456 if not self.defaults.collections: 

2457 raise NoDefaultCollectionError( 

2458 "No collections provided to queryDatasetAssociations, " 

2459 "and no defaults from registry construction." 

2460 ) 

2461 collections = self.defaults.collections 

2462 collection_wildcard = CollectionWildcard.from_expression(collections) 

2463 backend = queries.SqlQueryBackend(self._db, self._managers, self.dimension_record_cache) 

2464 parent_dataset_type = backend.resolve_single_dataset_type_wildcard(datasetType) 

2465 timespan_tag = DatasetColumnTag(parent_dataset_type.name, "timespan") 

2466 collection_tag = DatasetColumnTag(parent_dataset_type.name, "collection") 

2467 for parent_collection_record in backend.resolve_collection_wildcard( 

2468 collection_wildcard, 

2469 collection_types=frozenset(collectionTypes), 

2470 flatten_chains=flattenChains, 

2471 ): 

2472 # Resolve this possibly-chained collection into a list of 

2473 # non-CHAINED collections that actually hold datasets of this 

2474 # type. 

2475 candidate_collection_records = backend.resolve_dataset_collections( 

2476 parent_dataset_type, 

2477 CollectionWildcard.from_names([parent_collection_record.name]), 

2478 allow_calibration_collections=True, 

2479 governor_constraints={}, 

2480 ) 

2481 if not candidate_collection_records: 

2482 continue 

2483 with backend.context() as context: 

2484 relation = backend.make_dataset_query_relation( 

2485 parent_dataset_type, 

2486 candidate_collection_records, 

2487 columns={"dataset_id", "run", "timespan", "collection"}, 

2488 context=context, 

2489 ) 

2490 reader = queries.DatasetRefReader( 

2491 parent_dataset_type, 

2492 translate_collection=lambda k: self._managers.collections[k].name, 

2493 full=False, 

2494 ) 

2495 for row in context.fetch_iterable(relation): 

2496 ref = reader.read(row) 

2497 collection_record = self._managers.collections[row[collection_tag]] 

2498 if collection_record.type is CollectionType.CALIBRATION: 

2499 timespan = row[timespan_tag] 

2500 else: 

2501 # For backwards compatibility and (possibly?) user 

2502 # convenience we continue to define the timespan of a 

2503 # DatasetAssociation row for a non-CALIBRATION 

2504 # collection to be None rather than a fully unbounded 

2505 # timespan. 

2506 timespan = None 

2507 yield DatasetAssociation(ref=ref, collection=collection_record.name, timespan=timespan) 

2508 

2509 def get_datastore_records(self, ref: DatasetRef) -> DatasetRef: 

2510 """Retrieve datastore records for given ref. 

2511 

2512 Parameters 

2513 ---------- 

2514 ref : `DatasetRef` 

2515 Dataset reference for which to retrieve its corresponding datastore 

2516 records. 

2517 

2518 Returns 

2519 ------- 

2520 updated_ref : `DatasetRef` 

2521 Dataset reference with filled datastore records. 

2522 

2523 Notes 

2524 ----- 

2525 If this method is called with the dataset ref that is not known to the 

2526 registry then the reference with an empty set of records is returned. 

2527 """ 

2528 datastore_records: dict[str, list[StoredDatastoreItemInfo]] = {} 

2529 for opaque, record_class in self._datastore_record_classes.items(): 

2530 records = self.fetchOpaqueData(opaque, dataset_id=ref.id) 

2531 datastore_records[opaque] = [record_class.from_record(record) for record in records] 

2532 return ref.replace(datastore_records=datastore_records) 

2533 

2534 def store_datastore_records(self, refs: Mapping[str, DatasetRef]) -> None: 

2535 """Store datastore records for given refs. 

2536 

2537 Parameters 

2538 ---------- 

2539 refs : `~collections.abc.Mapping` [`str`, `DatasetRef`] 

2540 Mapping of a datastore name to dataset reference stored in that 

2541 datastore, reference must include datastore records. 

2542 """ 

2543 for datastore_name, ref in refs.items(): 

2544 # Store ref IDs in the bridge table. 

2545 bridge = self._managers.datastores.register(datastore_name) 

2546 bridge.insert([ref]) 

2547 

2548 # store records in opaque tables 

2549 assert ref._datastore_records is not None, "Dataset ref must have datastore records" 

2550 for table_name, records in ref._datastore_records.items(): 

2551 opaque_table = self._managers.opaque.get(table_name) 

2552 assert opaque_table is not None, f"Unexpected opaque table name {table_name}" 

2553 opaque_table.insert(*(record.to_record(dataset_id=ref.id) for record in records)) 

2554 

2555 def make_datastore_tables(self, tables: Mapping[str, DatastoreOpaqueTable]) -> None: 

2556 """Create opaque tables used by datastores. 

2557 

2558 Parameters 

2559 ---------- 

2560 tables : `~collections.abc.Mapping` 

2561 Maps opaque table name to its definition. 

2562 

2563 Notes 

2564 ----- 

2565 This method should disappear in the future when opaque table 

2566 definitions will be provided during `Registry` construction. 

2567 """ 

2568 datastore_record_classes = {} 

2569 for table_name, table_def in tables.items(): 

2570 datastore_record_classes[table_name] = table_def.record_class 

2571 try: 

2572 self._managers.opaque.register(table_name, table_def.table_spec) 

2573 except ReadOnlyDatabaseError: 

2574 # If the database is read only and we just tried and failed to 

2575 # create a table, it means someone is trying to create a 

2576 # read-only butler client for an empty repo. That should be 

2577 # okay, as long as they then try to get any datasets before 

2578 # some other client creates the table. Chances are they're 

2579 # just validating configuration. 

2580 pass 

2581 self._datastore_record_classes = datastore_record_classes 

2582 

2583 def preload_cache(self) -> None: 

2584 """Immediately load caches that are used for common operations.""" 

2585 self.dimension_record_cache.preload_cache() 

2586 

2587 @property 

2588 def obsCoreTableManager(self) -> ObsCoreTableManager | None: 

2589 """The ObsCore manager instance for this registry 

2590 (`~.interfaces.ObsCoreTableManager` 

2591 or `None`). 

2592 

2593 ObsCore manager may not be implemented for all registry backend, or 

2594 may not be enabled for many repositories. 

2595 """ 

2596 return self._managers.obscore 

2597 

2598 storageClasses: StorageClassFactory 

2599 """All storage classes known to the registry (`StorageClassFactory`). 

2600 """ 

2601 

2602 _defaults: RegistryDefaults 

2603 """Default collections used for registry queries (`RegistryDefaults`)."""