Coverage for python/lsst/daf/butler/registry/sql_registry.py: 18%

583 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-05 02:53 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30from .. import ddl 

31 

32__all__ = ("SqlRegistry",) 

33 

34import contextlib 

35import logging 

36import warnings 

37from collections.abc import Iterable, Iterator, Mapping, Sequence 

38from typing import TYPE_CHECKING, Any, Literal, cast 

39 

40import sqlalchemy 

41from lsst.daf.relation import LeafRelation, Relation 

42from lsst.resources import ResourcePathExpression 

43from lsst.utils.introspection import find_outside_stacklevel 

44from lsst.utils.iteration import ensure_iterable 

45 

46from .._column_tags import DatasetColumnTag 

47from .._config import Config 

48from .._dataset_association import DatasetAssociation 

49from .._dataset_ref import DatasetId, DatasetIdGenEnum, DatasetRef 

50from .._dataset_type import DatasetType 

51from .._exceptions import CalibrationLookupError, DimensionNameError 

52from .._named import NamedKeyMapping, NameLookupMapping 

53from .._storage_class import StorageClassFactory 

54from .._timespan import Timespan 

55from ..dimensions import ( 

56 DataCoordinate, 

57 DataId, 

58 Dimension, 

59 DimensionConfig, 

60 DimensionElement, 

61 DimensionGraph, 

62 DimensionGroup, 

63 DimensionRecord, 

64 DimensionUniverse, 

65) 

66from ..dimensions.record_cache import DimensionRecordCache 

67from ..progress import Progress 

68from ..registry import ( 

69 ArgumentError, 

70 CollectionExpressionError, 

71 CollectionSummary, 

72 CollectionType, 

73 CollectionTypeError, 

74 ConflictingDefinitionError, 

75 DataIdValueError, 

76 DatasetTypeError, 

77 InconsistentDataIdError, 

78 MissingDatasetTypeError, 

79 NoDefaultCollectionError, 

80 OrphanedRecordError, 

81 RegistryConfig, 

82 RegistryConsistencyError, 

83 RegistryDefaults, 

84 queries, 

85) 

86from ..registry.interfaces import ChainedCollectionRecord, ReadOnlyDatabaseError, RunRecord 

87from ..registry.managers import RegistryManagerInstances, RegistryManagerTypes 

88from ..registry.wildcards import CollectionWildcard, DatasetTypeWildcard 

89from ..utils import _DefaultMarker, _Marker, transactional 

90 

91if TYPE_CHECKING: 

92 from .._butler_config import ButlerConfig 

93 from ..datastore._datastore import DatastoreOpaqueTable 

94 from ..datastore.stored_file_info import StoredDatastoreItemInfo 

95 from ..registry._registry import CollectionArgType 

96 from ..registry.interfaces import ( 

97 CollectionRecord, 

98 Database, 

99 DatastoreRegistryBridgeManager, 

100 ObsCoreTableManager, 

101 ) 

102 

103 

104_LOG = logging.getLogger(__name__) 

105 

106 

107class SqlRegistry: 

108 """Butler Registry implementation that uses SQL database as backend. 

109 

110 Parameters 

111 ---------- 

112 database : `Database` 

113 Database instance to store Registry. 

114 defaults : `RegistryDefaults` 

115 Default collection search path and/or output `~CollectionType.RUN` 

116 collection. 

117 managers : `RegistryManagerInstances` 

118 All the managers required for this registry. 

119 """ 

120 

121 defaultConfigFile: str | None = None 

122 """Path to configuration defaults. Accessed within the ``configs`` resource 

123 or relative to a search path. Can be None if no defaults specified. 

124 """ 

125 

126 @classmethod 

127 def forceRegistryConfig( 

128 cls, config: ButlerConfig | RegistryConfig | Config | str | None 

129 ) -> RegistryConfig: 

130 """Force the supplied config to a `RegistryConfig`. 

131 

132 Parameters 

133 ---------- 

134 config : `RegistryConfig`, `Config` or `str` or `None` 

135 Registry configuration, if missing then default configuration will 

136 be loaded from registry.yaml. 

137 

138 Returns 

139 ------- 

140 registry_config : `RegistryConfig` 

141 A registry config. 

142 """ 

143 if not isinstance(config, RegistryConfig): 

144 if isinstance(config, str | Config) or config is None: 

145 config = RegistryConfig(config) 

146 else: 

147 raise ValueError(f"Incompatible Registry configuration: {config}") 

148 return config 

149 

150 @classmethod 

151 def createFromConfig( 

152 cls, 

153 config: RegistryConfig | str | None = None, 

154 dimensionConfig: DimensionConfig | str | None = None, 

155 butlerRoot: ResourcePathExpression | None = None, 

156 ) -> SqlRegistry: 

157 """Create registry database and return `SqlRegistry` instance. 

158 

159 This method initializes database contents, database must be empty 

160 prior to calling this method. 

161 

162 Parameters 

163 ---------- 

164 config : `RegistryConfig` or `str`, optional 

165 Registry configuration, if missing then default configuration will 

166 be loaded from registry.yaml. 

167 dimensionConfig : `DimensionConfig` or `str`, optional 

168 Dimensions configuration, if missing then default configuration 

169 will be loaded from dimensions.yaml. 

170 butlerRoot : convertible to `lsst.resources.ResourcePath`, optional 

171 Path to the repository root this `SqlRegistry` will manage. 

172 

173 Returns 

174 ------- 

175 registry : `SqlRegistry` 

176 A new `SqlRegistry` instance. 

177 """ 

178 config = cls.forceRegistryConfig(config) 

179 config.replaceRoot(butlerRoot) 

180 

181 if isinstance(dimensionConfig, str): 

182 dimensionConfig = DimensionConfig(dimensionConfig) 

183 elif dimensionConfig is None: 

184 dimensionConfig = DimensionConfig() 

185 elif not isinstance(dimensionConfig, DimensionConfig): 

186 raise TypeError(f"Incompatible Dimension configuration type: {type(dimensionConfig)}") 

187 

188 DatabaseClass = config.getDatabaseClass() 

189 database = DatabaseClass.fromUri( 

190 config.connectionString, origin=config.get("origin", 0), namespace=config.get("namespace") 

191 ) 

192 managerTypes = RegistryManagerTypes.fromConfig(config) 

193 managers = managerTypes.makeRepo(database, dimensionConfig) 

194 return cls(database, RegistryDefaults(), managers) 

195 

196 @classmethod 

197 def fromConfig( 

198 cls, 

199 config: ButlerConfig | RegistryConfig | Config | str, 

200 butlerRoot: ResourcePathExpression | None = None, 

201 writeable: bool = True, 

202 defaults: RegistryDefaults | None = None, 

203 ) -> SqlRegistry: 

204 """Create `Registry` subclass instance from `config`. 

205 

206 Registry database must be initialized prior to calling this method. 

207 

208 Parameters 

209 ---------- 

210 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str` 

211 Registry configuration. 

212 butlerRoot : `lsst.resources.ResourcePathExpression`, optional 

213 Path to the repository root this `Registry` will manage. 

214 writeable : `bool`, optional 

215 If `True` (default) create a read-write connection to the database. 

216 defaults : `RegistryDefaults`, optional 

217 Default collection search path and/or output `~CollectionType.RUN` 

218 collection. 

219 

220 Returns 

221 ------- 

222 registry : `SqlRegistry` 

223 A new `SqlRegistry` subclass instance. 

224 """ 

225 config = cls.forceRegistryConfig(config) 

226 config.replaceRoot(butlerRoot) 

227 DatabaseClass = config.getDatabaseClass() 

228 database = DatabaseClass.fromUri( 

229 config.connectionString, 

230 origin=config.get("origin", 0), 

231 namespace=config.get("namespace"), 

232 writeable=writeable, 

233 ) 

234 managerTypes = RegistryManagerTypes.fromConfig(config) 

235 with database.session(): 

236 managers = managerTypes.loadRepo(database) 

237 if defaults is None: 

238 defaults = RegistryDefaults() 

239 return cls(database, defaults, managers) 

240 

241 def __init__( 

242 self, 

243 database: Database, 

244 defaults: RegistryDefaults, 

245 managers: RegistryManagerInstances, 

246 ): 

247 self._db = database 

248 self._managers = managers 

249 self.storageClasses = StorageClassFactory() 

250 # This is public to SqlRegistry's internal-to-daf_butler callers, but 

251 # it is intentionally not part of RegistryShim. 

252 self.dimension_record_cache = DimensionRecordCache( 

253 self._managers.dimensions.universe, 

254 fetch=self._managers.dimensions.fetch_cache_dict, 

255 ) 

256 # Intentionally invoke property setter to initialize defaults. This 

257 # can only be done after most of the rest of Registry has already been 

258 # initialized, and must be done before the property getter is used. 

259 self.defaults = defaults 

260 # TODO: This is currently initialized by `make_datastore_tables`, 

261 # eventually we'll need to do it during construction. 

262 # The mapping is indexed by the opaque table name. 

263 self._datastore_record_classes: Mapping[str, type[StoredDatastoreItemInfo]] = {} 

264 

265 def __str__(self) -> str: 

266 return str(self._db) 

267 

268 def __repr__(self) -> str: 

269 return f"SqlRegistry({self._db!r}, {self.dimensions!r})" 

270 

271 def isWriteable(self) -> bool: 

272 """Return `True` if this registry allows write operations, and `False` 

273 otherwise. 

274 """ 

275 return self._db.isWriteable() 

276 

277 def copy(self, defaults: RegistryDefaults | None = None) -> SqlRegistry: 

278 """Create a new `SqlRegistry` backed by the same data repository 

279 as this one and sharing a database connection pool with it, but with 

280 independent defaults and database sessions. 

281 

282 Parameters 

283 ---------- 

284 defaults : `~lsst.daf.butler.registry.RegistryDefaults`, optional 

285 Default collections and data ID values for the new registry. If 

286 not provided, ``self.defaults`` will be used (but future changes 

287 to either registry's defaults will not affect the other). 

288 

289 Returns 

290 ------- 

291 copy : `SqlRegistry` 

292 A new `SqlRegistry` instance with its own defaults. 

293 """ 

294 if defaults is None: 

295 # No need to copy, because `RegistryDefaults` is immutable; we 

296 # effectively copy on write. 

297 defaults = self.defaults 

298 db = self._db.clone() 

299 result = SqlRegistry(db, defaults, self._managers.clone(db)) 

300 result._datastore_record_classes = dict(self._datastore_record_classes) 

301 result.dimension_record_cache.load_from(self.dimension_record_cache) 

302 return result 

303 

304 @property 

305 def dimensions(self) -> DimensionUniverse: 

306 """Definitions of all dimensions recognized by this `Registry` 

307 (`DimensionUniverse`). 

308 """ 

309 return self._managers.dimensions.universe 

310 

311 @property 

312 def defaults(self) -> RegistryDefaults: 

313 """Default collection search path and/or output `~CollectionType.RUN` 

314 collection (`~lsst.daf.butler.registry.RegistryDefaults`). 

315 

316 This is an immutable struct whose components may not be set 

317 individually, but the entire struct can be set by assigning to this 

318 property. 

319 """ 

320 return self._defaults 

321 

322 @defaults.setter 

323 def defaults(self, value: RegistryDefaults) -> None: 

324 if value.run is not None: 

325 self.registerRun(value.run) 

326 value.finish(self) 

327 self._defaults = value 

328 

329 def refresh(self) -> None: 

330 """Refresh all in-memory state by querying the database. 

331 

332 This may be necessary to enable querying for entities added by other 

333 registry instances after this one was constructed. 

334 """ 

335 self.dimension_record_cache.reset() 

336 with self._db.transaction(): 

337 self._managers.refresh() 

338 

339 def caching_context(self) -> contextlib.AbstractContextManager[None]: 

340 """Return context manager that enables caching. 

341 

342 Returns 

343 ------- 

344 manager 

345 A context manager that enables client-side caching. Entering 

346 the context returns `None`. 

347 """ 

348 return self._managers.caching_context_manager() 

349 

350 @contextlib.contextmanager 

351 def transaction(self, *, savepoint: bool = False) -> Iterator[None]: 

352 """Return a context manager that represents a transaction. 

353 

354 Parameters 

355 ---------- 

356 savepoint : `bool` 

357 Whether to issue a SAVEPOINT in the database. 

358 

359 Yields 

360 ------ 

361 `None` 

362 """ 

363 with self._db.transaction(savepoint=savepoint): 

364 yield 

365 

366 def resetConnectionPool(self) -> None: 

367 """Reset SQLAlchemy connection pool for `SqlRegistry` database. 

368 

369 This operation is useful when using registry with fork-based 

370 multiprocessing. To use registry across fork boundary one has to make 

371 sure that there are no currently active connections (no session or 

372 transaction is in progress) and connection pool is reset using this 

373 method. This method should be called by the child process immediately 

374 after the fork. 

375 """ 

376 self._db._engine.dispose() 

377 

378 def registerOpaqueTable(self, tableName: str, spec: ddl.TableSpec) -> None: 

379 """Add an opaque (to the `Registry`) table for use by a `Datastore` or 

380 other data repository client. 

381 

382 Opaque table records can be added via `insertOpaqueData`, retrieved via 

383 `fetchOpaqueData`, and removed via `deleteOpaqueData`. 

384 

385 Parameters 

386 ---------- 

387 tableName : `str` 

388 Logical name of the opaque table. This may differ from the 

389 actual name used in the database by a prefix and/or suffix. 

390 spec : `ddl.TableSpec` 

391 Specification for the table to be added. 

392 """ 

393 self._managers.opaque.register(tableName, spec) 

394 

395 @transactional 

396 def insertOpaqueData(self, tableName: str, *data: dict) -> None: 

397 """Insert records into an opaque table. 

398 

399 Parameters 

400 ---------- 

401 tableName : `str` 

402 Logical name of the opaque table. Must match the name used in a 

403 previous call to `registerOpaqueTable`. 

404 *data 

405 Each additional positional argument is a dictionary that represents 

406 a single row to be added. 

407 """ 

408 self._managers.opaque[tableName].insert(*data) 

409 

410 def fetchOpaqueData(self, tableName: str, **where: Any) -> Iterator[Mapping[str, Any]]: 

411 """Retrieve records from an opaque table. 

412 

413 Parameters 

414 ---------- 

415 tableName : `str` 

416 Logical name of the opaque table. Must match the name used in a 

417 previous call to `registerOpaqueTable`. 

418 **where 

419 Additional keyword arguments are interpreted as equality 

420 constraints that restrict the returned rows (combined with AND); 

421 keyword arguments are column names and values are the values they 

422 must have. 

423 

424 Yields 

425 ------ 

426 row : `dict` 

427 A dictionary representing a single result row. 

428 """ 

429 yield from self._managers.opaque[tableName].fetch(**where) 

430 

431 @transactional 

432 def deleteOpaqueData(self, tableName: str, **where: Any) -> None: 

433 """Remove records from an opaque table. 

434 

435 Parameters 

436 ---------- 

437 tableName : `str` 

438 Logical name of the opaque table. Must match the name used in a 

439 previous call to `registerOpaqueTable`. 

440 **where 

441 Additional keyword arguments are interpreted as equality 

442 constraints that restrict the deleted rows (combined with AND); 

443 keyword arguments are column names and values are the values they 

444 must have. 

445 """ 

446 self._managers.opaque[tableName].delete(where.keys(), where) 

447 

448 def registerCollection( 

449 self, name: str, type: CollectionType = CollectionType.TAGGED, doc: str | None = None 

450 ) -> bool: 

451 """Add a new collection if one with the given name does not exist. 

452 

453 Parameters 

454 ---------- 

455 name : `str` 

456 The name of the collection to create. 

457 type : `CollectionType` 

458 Enum value indicating the type of collection to create. 

459 doc : `str`, optional 

460 Documentation string for the collection. 

461 

462 Returns 

463 ------- 

464 registered : `bool` 

465 Boolean indicating whether the collection was already registered 

466 or was created by this call. 

467 

468 Notes 

469 ----- 

470 This method cannot be called within transactions, as it needs to be 

471 able to perform its own transaction to be concurrent. 

472 """ 

473 _, registered = self._managers.collections.register(name, type, doc=doc) 

474 return registered 

475 

476 def getCollectionType(self, name: str) -> CollectionType: 

477 """Return an enumeration value indicating the type of the given 

478 collection. 

479 

480 Parameters 

481 ---------- 

482 name : `str` 

483 The name of the collection. 

484 

485 Returns 

486 ------- 

487 type : `CollectionType` 

488 Enum value indicating the type of this collection. 

489 

490 Raises 

491 ------ 

492 lsst.daf.butler.registry.MissingCollectionError 

493 Raised if no collection with the given name exists. 

494 """ 

495 return self._managers.collections.find(name).type 

496 

497 def get_collection_record(self, name: str) -> CollectionRecord: 

498 """Return the record for this collection. 

499 

500 Parameters 

501 ---------- 

502 name : `str` 

503 Name of the collection for which the record is to be retrieved. 

504 

505 Returns 

506 ------- 

507 record : `CollectionRecord` 

508 The record for this collection. 

509 """ 

510 return self._managers.collections.find(name) 

511 

512 def registerRun(self, name: str, doc: str | None = None) -> bool: 

513 """Add a new run if one with the given name does not exist. 

514 

515 Parameters 

516 ---------- 

517 name : `str` 

518 The name of the run to create. 

519 doc : `str`, optional 

520 Documentation string for the collection. 

521 

522 Returns 

523 ------- 

524 registered : `bool` 

525 Boolean indicating whether a new run was registered. `False` 

526 if it already existed. 

527 

528 Notes 

529 ----- 

530 This method cannot be called within transactions, as it needs to be 

531 able to perform its own transaction to be concurrent. 

532 """ 

533 _, registered = self._managers.collections.register(name, CollectionType.RUN, doc=doc) 

534 return registered 

535 

536 @transactional 

537 def removeCollection(self, name: str) -> None: 

538 """Remove the given collection from the registry. 

539 

540 Parameters 

541 ---------- 

542 name : `str` 

543 The name of the collection to remove. 

544 

545 Raises 

546 ------ 

547 lsst.daf.butler.registry.MissingCollectionError 

548 Raised if no collection with the given name exists. 

549 sqlalchemy.exc.IntegrityError 

550 Raised if the database rows associated with the collection are 

551 still referenced by some other table, such as a dataset in a 

552 datastore (for `~CollectionType.RUN` collections only) or a 

553 `~CollectionType.CHAINED` collection of which this collection is 

554 a child. 

555 

556 Notes 

557 ----- 

558 If this is a `~CollectionType.RUN` collection, all datasets and quanta 

559 in it will removed from the `Registry` database. This requires that 

560 those datasets be removed (or at least trashed) from any datastores 

561 that hold them first. 

562 

563 A collection may not be deleted as long as it is referenced by a 

564 `~CollectionType.CHAINED` collection; the ``CHAINED`` collection must 

565 be deleted or redefined first. 

566 """ 

567 self._managers.collections.remove(name) 

568 

569 def getCollectionChain(self, parent: str) -> tuple[str, ...]: 

570 """Return the child collections in a `~CollectionType.CHAINED` 

571 collection. 

572 

573 Parameters 

574 ---------- 

575 parent : `str` 

576 Name of the chained collection. Must have already been added via 

577 a call to `Registry.registerCollection`. 

578 

579 Returns 

580 ------- 

581 children : `~collections.abc.Sequence` [ `str` ] 

582 An ordered sequence of collection names that are searched when the 

583 given chained collection is searched. 

584 

585 Raises 

586 ------ 

587 lsst.daf.butler.registry.MissingCollectionError 

588 Raised if ``parent`` does not exist in the `Registry`. 

589 lsst.daf.butler.registry.CollectionTypeError 

590 Raised if ``parent`` does not correspond to a 

591 `~CollectionType.CHAINED` collection. 

592 """ 

593 record = self._managers.collections.find(parent) 

594 if record.type is not CollectionType.CHAINED: 

595 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

596 assert isinstance(record, ChainedCollectionRecord) 

597 return record.children 

598 

599 @transactional 

600 def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None: 

601 """Define or redefine a `~CollectionType.CHAINED` collection. 

602 

603 Parameters 

604 ---------- 

605 parent : `str` 

606 Name of the chained collection. Must have already been added via 

607 a call to `Registry.registerCollection`. 

608 children : collection expression 

609 An expression defining an ordered search of child collections, 

610 generally an iterable of `str`; see 

611 :ref:`daf_butler_collection_expressions` for more information. 

612 flatten : `bool`, optional 

613 If `True` (`False` is default), recursively flatten out any nested 

614 `~CollectionType.CHAINED` collections in ``children`` first. 

615 

616 Raises 

617 ------ 

618 lsst.daf.butler.registry.MissingCollectionError 

619 Raised when any of the given collections do not exist in the 

620 `Registry`. 

621 lsst.daf.butler.registry.CollectionTypeError 

622 Raised if ``parent`` does not correspond to a 

623 `~CollectionType.CHAINED` collection. 

624 CollectionCycleError 

625 Raised if the given collections contains a cycle. 

626 

627 Notes 

628 ----- 

629 If this function is called within a call to ``Butler.transaction``, it 

630 will hold a lock that prevents other processes from modifying the 

631 parent collection until the end of the transaction. Keep these 

632 transactions short. 

633 """ 

634 record = self._managers.collections.find(parent) 

635 if record.type is not CollectionType.CHAINED: 

636 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

637 assert isinstance(record, ChainedCollectionRecord) 

638 children = CollectionWildcard.from_expression(children).require_ordered() 

639 if children != record.children or flatten: 

640 self._managers.collections.update_chain(record, children, flatten=flatten) 

641 

642 def getCollectionParentChains(self, collection: str) -> set[str]: 

643 """Return the CHAINED collections that directly contain the given one. 

644 

645 Parameters 

646 ---------- 

647 collection : `str` 

648 Name of the collection. 

649 

650 Returns 

651 ------- 

652 chains : `set` of `str` 

653 Set of `~CollectionType.CHAINED` collection names. 

654 """ 

655 return self._managers.collections.getParentChains(self._managers.collections.find(collection).key) 

656 

657 def getCollectionDocumentation(self, collection: str) -> str | None: 

658 """Retrieve the documentation string for a collection. 

659 

660 Parameters 

661 ---------- 

662 collection : `str` 

663 Name of the collection. 

664 

665 Returns 

666 ------- 

667 docs : `str` or `None` 

668 Docstring for the collection with the given name. 

669 """ 

670 return self._managers.collections.getDocumentation(self._managers.collections.find(collection).key) 

671 

672 def setCollectionDocumentation(self, collection: str, doc: str | None) -> None: 

673 """Set the documentation string for a collection. 

674 

675 Parameters 

676 ---------- 

677 collection : `str` 

678 Name of the collection. 

679 doc : `str` or `None` 

680 Docstring for the collection with the given name; will replace any 

681 existing docstring. Passing `None` will remove any existing 

682 docstring. 

683 """ 

684 self._managers.collections.setDocumentation(self._managers.collections.find(collection).key, doc) 

685 

686 def getCollectionSummary(self, collection: str) -> CollectionSummary: 

687 """Return a summary for the given collection. 

688 

689 Parameters 

690 ---------- 

691 collection : `str` 

692 Name of the collection for which a summary is to be retrieved. 

693 

694 Returns 

695 ------- 

696 summary : `~lsst.daf.butler.registry.CollectionSummary` 

697 Summary of the dataset types and governor dimension values in 

698 this collection. 

699 """ 

700 record = self._managers.collections.find(collection) 

701 return self._managers.datasets.getCollectionSummary(record) 

702 

703 def registerDatasetType(self, datasetType: DatasetType) -> bool: 

704 """Add a new `DatasetType` to the Registry. 

705 

706 It is not an error to register the same `DatasetType` twice. 

707 

708 Parameters 

709 ---------- 

710 datasetType : `DatasetType` 

711 The `DatasetType` to be added. 

712 

713 Returns 

714 ------- 

715 inserted : `bool` 

716 `True` if ``datasetType`` was inserted, `False` if an identical 

717 existing `DatasetType` was found. Note that in either case the 

718 DatasetType is guaranteed to be defined in the Registry 

719 consistently with the given definition. 

720 

721 Raises 

722 ------ 

723 ValueError 

724 Raised if the dimensions or storage class are invalid. 

725 lsst.daf.butler.registry.ConflictingDefinitionError 

726 Raised if this `DatasetType` is already registered with a different 

727 definition. 

728 

729 Notes 

730 ----- 

731 This method cannot be called within transactions, as it needs to be 

732 able to perform its own transaction to be concurrent. 

733 """ 

734 return self._managers.datasets.register(datasetType) 

735 

736 def removeDatasetType(self, name: str | tuple[str, ...]) -> None: 

737 """Remove the named `DatasetType` from the registry. 

738 

739 .. warning:: 

740 

741 Registry implementations can cache the dataset type definitions. 

742 This means that deleting the dataset type definition may result in 

743 unexpected behavior from other butler processes that are active 

744 that have not seen the deletion. 

745 

746 Parameters 

747 ---------- 

748 name : `str` or `tuple` [`str`] 

749 Name of the type to be removed or tuple containing a list of type 

750 names to be removed. Wildcards are allowed. 

751 

752 Raises 

753 ------ 

754 lsst.daf.butler.registry.OrphanedRecordError 

755 Raised if an attempt is made to remove the dataset type definition 

756 when there are already datasets associated with it. 

757 

758 Notes 

759 ----- 

760 If the dataset type is not registered the method will return without 

761 action. 

762 """ 

763 for datasetTypeExpression in ensure_iterable(name): 

764 # Catch any warnings from the caller specifying a component 

765 # dataset type. This will result in an error later but the 

766 # warning could be confusing when the caller is not querying 

767 # anything. 

768 with warnings.catch_warnings(): 

769 warnings.simplefilter("ignore", category=FutureWarning) 

770 datasetTypes = list(self.queryDatasetTypes(datasetTypeExpression)) 

771 if not datasetTypes: 

772 _LOG.info("Dataset type %r not defined", datasetTypeExpression) 

773 else: 

774 for datasetType in datasetTypes: 

775 self._managers.datasets.remove(datasetType.name) 

776 _LOG.info("Removed dataset type %r", datasetType.name) 

777 

778 def getDatasetType(self, name: str) -> DatasetType: 

779 """Get the `DatasetType`. 

780 

781 Parameters 

782 ---------- 

783 name : `str` 

784 Name of the type. 

785 

786 Returns 

787 ------- 

788 type : `DatasetType` 

789 The `DatasetType` associated with the given name. 

790 

791 Raises 

792 ------ 

793 lsst.daf.butler.registry.MissingDatasetTypeError 

794 Raised if the requested dataset type has not been registered. 

795 

796 Notes 

797 ----- 

798 This method handles component dataset types automatically, though most 

799 other registry operations do not. 

800 """ 

801 parent_name, component = DatasetType.splitDatasetTypeName(name) 

802 storage = self._managers.datasets[parent_name] 

803 if component is None: 

804 return storage.datasetType 

805 else: 

806 return storage.datasetType.makeComponentDatasetType(component) 

807 

808 def supportsIdGenerationMode(self, mode: DatasetIdGenEnum) -> bool: 

809 """Test whether the given dataset ID generation mode is supported by 

810 `insertDatasets`. 

811 

812 Parameters 

813 ---------- 

814 mode : `DatasetIdGenEnum` 

815 Enum value for the mode to test. 

816 

817 Returns 

818 ------- 

819 supported : `bool` 

820 Whether the given mode is supported. 

821 """ 

822 return self._managers.datasets.supportsIdGenerationMode(mode) 

823 

824 def findDataset( 

825 self, 

826 datasetType: DatasetType | str, 

827 dataId: DataId | None = None, 

828 *, 

829 collections: CollectionArgType | None = None, 

830 timespan: Timespan | None = None, 

831 datastore_records: bool = False, 

832 **kwargs: Any, 

833 ) -> DatasetRef | None: 

834 """Find a dataset given its `DatasetType` and data ID. 

835 

836 This can be used to obtain a `DatasetRef` that permits the dataset to 

837 be read from a `Datastore`. If the dataset is a component and can not 

838 be found using the provided dataset type, a dataset ref for the parent 

839 will be returned instead but with the correct dataset type. 

840 

841 Parameters 

842 ---------- 

843 datasetType : `DatasetType` or `str` 

844 A `DatasetType` or the name of one. If this is a `DatasetType` 

845 instance, its storage class will be respected and propagated to 

846 the output, even if it differs from the dataset type definition 

847 in the registry, as long as the storage classes are convertible. 

848 dataId : `dict` or `DataCoordinate`, optional 

849 A `dict`-like object containing the `Dimension` links that identify 

850 the dataset within a collection. 

851 collections : collection expression, optional 

852 An expression that fully or partially identifies the collections to 

853 search for the dataset; see 

854 :ref:`daf_butler_collection_expressions` for more information. 

855 Defaults to ``self.defaults.collections``. 

856 timespan : `Timespan`, optional 

857 A timespan that the validity range of the dataset must overlap. 

858 If not provided, any `~CollectionType.CALIBRATION` collections 

859 matched by the ``collections`` argument will not be searched. 

860 datastore_records : `bool`, optional 

861 Whether to attach datastore records to the `DatasetRef`. 

862 **kwargs 

863 Additional keyword arguments passed to 

864 `DataCoordinate.standardize` to convert ``dataId`` to a true 

865 `DataCoordinate` or augment an existing one. 

866 

867 Returns 

868 ------- 

869 ref : `DatasetRef` 

870 A reference to the dataset, or `None` if no matching Dataset 

871 was found. 

872 

873 Raises 

874 ------ 

875 lsst.daf.butler.registry.NoDefaultCollectionError 

876 Raised if ``collections`` is `None` and 

877 ``self.defaults.collections`` is `None`. 

878 LookupError 

879 Raised if one or more data ID keys are missing. 

880 lsst.daf.butler.registry.MissingDatasetTypeError 

881 Raised if the dataset type does not exist. 

882 lsst.daf.butler.registry.MissingCollectionError 

883 Raised if any of ``collections`` does not exist in the registry. 

884 

885 Notes 

886 ----- 

887 This method simply returns `None` and does not raise an exception even 

888 when the set of collections searched is intrinsically incompatible with 

889 the dataset type, e.g. if ``datasetType.isCalibration() is False``, but 

890 only `~CollectionType.CALIBRATION` collections are being searched. 

891 This may make it harder to debug some lookup failures, but the behavior 

892 is intentional; we consider it more important that failed searches are 

893 reported consistently, regardless of the reason, and that adding 

894 additional collections that do not contain a match to the search path 

895 never changes the behavior. 

896 

897 This method handles component dataset types automatically, though most 

898 other registry operations do not. 

899 """ 

900 if collections is None: 

901 if not self.defaults.collections: 

902 raise NoDefaultCollectionError( 

903 "No collections provided to findDataset, and no defaults from registry construction." 

904 ) 

905 collections = self.defaults.collections 

906 backend = queries.SqlQueryBackend(self._db, self._managers, self.dimension_record_cache) 

907 with backend.caching_context(): 

908 collection_wildcard = CollectionWildcard.from_expression(collections, require_ordered=True) 

909 if collection_wildcard.empty(): 

910 return None 

911 matched_collections = backend.resolve_collection_wildcard(collection_wildcard) 

912 resolved_dataset_type = backend.resolve_single_dataset_type_wildcard(datasetType) 

913 dataId = DataCoordinate.standardize( 

914 dataId, 

915 dimensions=resolved_dataset_type.dimensions, 

916 universe=self.dimensions, 

917 defaults=self.defaults.dataId, 

918 **kwargs, 

919 ) 

920 governor_constraints = {name: {cast(str, dataId[name])} for name in dataId.dimensions.governors} 

921 (filtered_collections,) = backend.filter_dataset_collections( 

922 [resolved_dataset_type], 

923 matched_collections, 

924 governor_constraints=governor_constraints, 

925 ).values() 

926 if not filtered_collections: 

927 return None 

928 if timespan is None: 

929 filtered_collections = [ 

930 collection_record 

931 for collection_record in filtered_collections 

932 if collection_record.type is not CollectionType.CALIBRATION 

933 ] 

934 if filtered_collections: 

935 requested_columns = {"dataset_id", "run", "collection"} 

936 with backend.context() as context: 

937 predicate = context.make_data_coordinate_predicate( 

938 dataId.subset(resolved_dataset_type.dimensions), full=False 

939 ) 

940 if timespan is not None: 

941 requested_columns.add("timespan") 

942 predicate = predicate.logical_and( 

943 context.make_timespan_overlap_predicate( 

944 DatasetColumnTag(resolved_dataset_type.name, "timespan"), timespan 

945 ) 

946 ) 

947 relation = backend.make_dataset_query_relation( 

948 resolved_dataset_type, filtered_collections, requested_columns, context 

949 ).with_rows_satisfying(predicate) 

950 rows = list(context.fetch_iterable(relation)) 

951 else: 

952 rows = [] 

953 if not rows: 

954 return None 

955 elif len(rows) == 1: 

956 best_row = rows[0] 

957 else: 

958 rank_by_collection_key = {record.key: n for n, record in enumerate(filtered_collections)} 

959 collection_tag = DatasetColumnTag(resolved_dataset_type.name, "collection") 

960 row_iter = iter(rows) 

961 best_row = next(row_iter) 

962 best_rank = rank_by_collection_key[best_row[collection_tag]] 

963 have_tie = False 

964 for row in row_iter: 

965 if (rank := rank_by_collection_key[row[collection_tag]]) < best_rank: 

966 best_row = row 

967 best_rank = rank 

968 have_tie = False 

969 elif rank == best_rank: 

970 have_tie = True 

971 assert timespan is not None, "Rank ties should be impossible given DB constraints." 

972 if have_tie: 

973 raise CalibrationLookupError( 

974 f"Ambiguous calibration lookup for {resolved_dataset_type.name} in collections " 

975 f"{collection_wildcard.strings} with timespan {timespan}." 

976 ) 

977 reader = queries.DatasetRefReader( 

978 resolved_dataset_type, 

979 translate_collection=lambda k: self._managers.collections[k].name, 

980 ) 

981 ref = reader.read(best_row, data_id=dataId) 

982 if datastore_records: 

983 ref = self.get_datastore_records(ref) 

984 

985 return ref 

986 

987 @transactional 

988 def insertDatasets( 

989 self, 

990 datasetType: DatasetType | str, 

991 dataIds: Iterable[DataId], 

992 run: str | None = None, 

993 expand: bool = True, 

994 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

995 ) -> list[DatasetRef]: 

996 """Insert one or more datasets into the `Registry`. 

997 

998 This always adds new datasets; to associate existing datasets with 

999 a new collection, use ``associate``. 

1000 

1001 Parameters 

1002 ---------- 

1003 datasetType : `DatasetType` or `str` 

1004 A `DatasetType` or the name of one. 

1005 dataIds : `~collections.abc.Iterable` of `dict` or `DataCoordinate` 

1006 Dimension-based identifiers for the new datasets. 

1007 run : `str`, optional 

1008 The name of the run that produced the datasets. Defaults to 

1009 ``self.defaults.run``. 

1010 expand : `bool`, optional 

1011 If `True` (default), expand data IDs as they are inserted. This is 

1012 necessary in general to allow datastore to generate file templates, 

1013 but it may be disabled if the caller can guarantee this is 

1014 unnecessary. 

1015 idGenerationMode : `DatasetIdGenEnum`, optional 

1016 Specifies option for generating dataset IDs. By default unique IDs 

1017 are generated for each inserted dataset. 

1018 

1019 Returns 

1020 ------- 

1021 refs : `list` of `DatasetRef` 

1022 Resolved `DatasetRef` instances for all given data IDs (in the same 

1023 order). 

1024 

1025 Raises 

1026 ------ 

1027 lsst.daf.butler.registry.DatasetTypeError 

1028 Raised if ``datasetType`` is not known to registry. 

1029 lsst.daf.butler.registry.CollectionTypeError 

1030 Raised if ``run`` collection type is not `~CollectionType.RUN`. 

1031 lsst.daf.butler.registry.NoDefaultCollectionError 

1032 Raised if ``run`` is `None` and ``self.defaults.run`` is `None`. 

1033 lsst.daf.butler.registry.ConflictingDefinitionError 

1034 If a dataset with the same dataset type and data ID as one of those 

1035 given already exists in ``run``. 

1036 lsst.daf.butler.registry.MissingCollectionError 

1037 Raised if ``run`` does not exist in the registry. 

1038 """ 

1039 if isinstance(datasetType, DatasetType): 

1040 storage = self._managers.datasets.find(datasetType.name) 

1041 if storage is None: 

1042 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.") 

1043 else: 

1044 storage = self._managers.datasets.find(datasetType) 

1045 if storage is None: 

1046 raise DatasetTypeError(f"DatasetType with name '{datasetType}' has not been registered.") 

1047 if run is None: 

1048 if self.defaults.run is None: 

1049 raise NoDefaultCollectionError( 

1050 "No run provided to insertDatasets, and no default from registry construction." 

1051 ) 

1052 run = self.defaults.run 

1053 runRecord = self._managers.collections.find(run) 

1054 if runRecord.type is not CollectionType.RUN: 

1055 raise CollectionTypeError( 

1056 f"Given collection is of type {runRecord.type.name}; RUN collection required." 

1057 ) 

1058 assert isinstance(runRecord, RunRecord) 

1059 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG) 

1060 if expand: 

1061 expandedDataIds = [ 

1062 self.expandDataId(dataId, dimensions=storage.datasetType.dimensions) 

1063 for dataId in progress.wrap(dataIds, f"Expanding {storage.datasetType.name} data IDs") 

1064 ] 

1065 else: 

1066 expandedDataIds = [ 

1067 DataCoordinate.standardize(dataId, graph=storage.datasetType.dimensions) for dataId in dataIds 

1068 ] 

1069 try: 

1070 refs = list(storage.insert(runRecord, expandedDataIds, idGenerationMode)) 

1071 if self._managers.obscore: 

1072 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

1073 self._managers.obscore.add_datasets(refs, context) 

1074 except sqlalchemy.exc.IntegrityError as err: 

1075 raise ConflictingDefinitionError( 

1076 "A database constraint failure was triggered by inserting " 

1077 f"one or more datasets of type {storage.datasetType} into " 

1078 f"collection '{run}'. " 

1079 "This probably means a dataset with the same data ID " 

1080 "and dataset type already exists, but it may also mean a " 

1081 "dimension row is missing." 

1082 ) from err 

1083 return refs 

1084 

1085 @transactional 

1086 def _importDatasets( 

1087 self, 

1088 datasets: Iterable[DatasetRef], 

1089 expand: bool = True, 

1090 ) -> list[DatasetRef]: 

1091 """Import one or more datasets into the `Registry`. 

1092 

1093 Difference from `insertDatasets` method is that this method accepts 

1094 `DatasetRef` instances which should already be resolved and have a 

1095 dataset ID. If registry supports globally-unique dataset IDs (e.g. 

1096 `uuid.UUID`) then datasets which already exist in the registry will be 

1097 ignored if imported again. 

1098 

1099 Parameters 

1100 ---------- 

1101 datasets : `~collections.abc.Iterable` of `DatasetRef` 

1102 Datasets to be inserted. All `DatasetRef` instances must have 

1103 identical ``datasetType`` and ``run`` attributes. ``run`` 

1104 attribute can be `None` and defaults to ``self.defaults.run``. 

1105 Datasets can specify ``id`` attribute which will be used for 

1106 inserted datasets. All dataset IDs must have the same type 

1107 (`int` or `uuid.UUID`), if type of dataset IDs does not match 

1108 configured backend then IDs will be ignored and new IDs will be 

1109 generated by backend. 

1110 expand : `bool`, optional 

1111 If `True` (default), expand data IDs as they are inserted. This is 

1112 necessary in general, but it may be disabled if the caller can 

1113 guarantee this is unnecessary. 

1114 

1115 Returns 

1116 ------- 

1117 refs : `list` of `DatasetRef` 

1118 Resolved `DatasetRef` instances for all given data IDs (in the same 

1119 order). If any of ``datasets`` has an ID which already exists in 

1120 the database then it will not be inserted or updated, but a 

1121 resolved `DatasetRef` will be returned for it in any case. 

1122 

1123 Raises 

1124 ------ 

1125 lsst.daf.butler.registry.NoDefaultCollectionError 

1126 Raised if ``run`` is `None` and ``self.defaults.run`` is `None`. 

1127 lsst.daf.butler.registry.DatasetTypeError 

1128 Raised if datasets correspond to more than one dataset type or 

1129 dataset type is not known to registry. 

1130 lsst.daf.butler.registry.ConflictingDefinitionError 

1131 If a dataset with the same dataset type and data ID as one of those 

1132 given already exists in ``run``. 

1133 lsst.daf.butler.registry.MissingCollectionError 

1134 Raised if ``run`` does not exist in the registry. 

1135 

1136 Notes 

1137 ----- 

1138 This method is considered package-private and internal to Butler 

1139 implementation. Clients outside daf_butler package should not use this 

1140 method. 

1141 """ 

1142 datasets = list(datasets) 

1143 if not datasets: 

1144 # nothing to do 

1145 return [] 

1146 

1147 # find dataset type 

1148 datasetTypes = {dataset.datasetType for dataset in datasets} 

1149 if len(datasetTypes) != 1: 

1150 raise DatasetTypeError(f"Multiple dataset types in input datasets: {datasetTypes}") 

1151 datasetType = datasetTypes.pop() 

1152 

1153 # get storage handler for this dataset type 

1154 storage = self._managers.datasets.find(datasetType.name) 

1155 if storage is None: 

1156 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.") 

1157 

1158 # find run name 

1159 runs = {dataset.run for dataset in datasets} 

1160 if len(runs) != 1: 

1161 raise ValueError(f"Multiple run names in input datasets: {runs}") 

1162 run = runs.pop() 

1163 

1164 runRecord = self._managers.collections.find(run) 

1165 if runRecord.type is not CollectionType.RUN: 

1166 raise CollectionTypeError( 

1167 f"Given collection '{runRecord.name}' is of type {runRecord.type.name};" 

1168 " RUN collection required." 

1169 ) 

1170 assert isinstance(runRecord, RunRecord) 

1171 

1172 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG) 

1173 if expand: 

1174 expandedDatasets = [ 

1175 dataset.expanded(self.expandDataId(dataset.dataId, dimensions=storage.datasetType.dimensions)) 

1176 for dataset in progress.wrap(datasets, f"Expanding {storage.datasetType.name} data IDs") 

1177 ] 

1178 else: 

1179 expandedDatasets = [ 

1180 DatasetRef(datasetType, dataset.dataId, id=dataset.id, run=dataset.run, conform=True) 

1181 for dataset in datasets 

1182 ] 

1183 

1184 try: 

1185 refs = list(storage.import_(runRecord, expandedDatasets)) 

1186 if self._managers.obscore: 

1187 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

1188 self._managers.obscore.add_datasets(refs, context) 

1189 except sqlalchemy.exc.IntegrityError as err: 

1190 raise ConflictingDefinitionError( 

1191 "A database constraint failure was triggered by inserting " 

1192 f"one or more datasets of type {storage.datasetType} into " 

1193 f"collection '{run}'. " 

1194 "This probably means a dataset with the same data ID " 

1195 "and dataset type already exists, but it may also mean a " 

1196 "dimension row is missing." 

1197 ) from err 

1198 # Check that imported dataset IDs match the input 

1199 for imported_ref, input_ref in zip(refs, datasets, strict=True): 

1200 if imported_ref.id != input_ref.id: 

1201 raise RegistryConsistencyError( 

1202 "Imported dataset ID differs from input dataset ID, " 

1203 f"input ref: {input_ref}, imported ref: {imported_ref}" 

1204 ) 

1205 return refs 

1206 

1207 def getDataset(self, id: DatasetId) -> DatasetRef | None: 

1208 """Retrieve a Dataset entry. 

1209 

1210 Parameters 

1211 ---------- 

1212 id : `DatasetId` 

1213 The unique identifier for the dataset. 

1214 

1215 Returns 

1216 ------- 

1217 ref : `DatasetRef` or `None` 

1218 A ref to the Dataset, or `None` if no matching Dataset 

1219 was found. 

1220 """ 

1221 return self._managers.datasets.getDatasetRef(id) 

1222 

1223 @transactional 

1224 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None: 

1225 """Remove datasets from the Registry. 

1226 

1227 The datasets will be removed unconditionally from all collections, and 

1228 any `Quantum` that consumed this dataset will instead be marked with 

1229 having a NULL input. `Datastore` records will *not* be deleted; the 

1230 caller is responsible for ensuring that the dataset has already been 

1231 removed from all Datastores. 

1232 

1233 Parameters 

1234 ---------- 

1235 refs : `~collections.abc.Iterable` [`DatasetRef`] 

1236 References to the datasets to be removed. Must include a valid 

1237 ``id`` attribute, and should be considered invalidated upon return. 

1238 

1239 Raises 

1240 ------ 

1241 lsst.daf.butler.AmbiguousDatasetError 

1242 Raised if any ``ref.id`` is `None`. 

1243 lsst.daf.butler.registry.OrphanedRecordError 

1244 Raised if any dataset is still present in any `Datastore`. 

1245 """ 

1246 progress = Progress("lsst.daf.butler.Registry.removeDatasets", level=logging.DEBUG) 

1247 for datasetType, refsForType in progress.iter_item_chunks( 

1248 DatasetRef.iter_by_type(refs), desc="Removing datasets by type" 

1249 ): 

1250 storage = self._managers.datasets[datasetType.name] 

1251 try: 

1252 storage.delete(refsForType) 

1253 except sqlalchemy.exc.IntegrityError as err: 

1254 raise OrphanedRecordError( 

1255 "One or more datasets is still present in one or more Datastores." 

1256 ) from err 

1257 

1258 @transactional 

1259 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

1260 """Add existing datasets to a `~CollectionType.TAGGED` collection. 

1261 

1262 If a DatasetRef with the same exact ID is already in a collection 

1263 nothing is changed. If a `DatasetRef` with the same `DatasetType` and 

1264 data ID but with different ID exists in the collection, 

1265 `~lsst.daf.butler.registry.ConflictingDefinitionError` is raised. 

1266 

1267 Parameters 

1268 ---------- 

1269 collection : `str` 

1270 Indicates the collection the datasets should be associated with. 

1271 refs : `~collections.abc.Iterable` [ `DatasetRef` ] 

1272 An iterable of resolved `DatasetRef` instances that already exist 

1273 in this `Registry`. 

1274 

1275 Raises 

1276 ------ 

1277 lsst.daf.butler.registry.ConflictingDefinitionError 

1278 If a Dataset with the given `DatasetRef` already exists in the 

1279 given collection. 

1280 lsst.daf.butler.registry.MissingCollectionError 

1281 Raised if ``collection`` does not exist in the registry. 

1282 lsst.daf.butler.registry.CollectionTypeError 

1283 Raise adding new datasets to the given ``collection`` is not 

1284 allowed. 

1285 """ 

1286 progress = Progress("lsst.daf.butler.Registry.associate", level=logging.DEBUG) 

1287 collectionRecord = self._managers.collections.find(collection) 

1288 if collectionRecord.type is not CollectionType.TAGGED: 

1289 raise CollectionTypeError( 

1290 f"Collection '{collection}' has type {collectionRecord.type.name}, not TAGGED." 

1291 ) 

1292 for datasetType, refsForType in progress.iter_item_chunks( 

1293 DatasetRef.iter_by_type(refs), desc="Associating datasets by type" 

1294 ): 

1295 storage = self._managers.datasets[datasetType.name] 

1296 try: 

1297 storage.associate(collectionRecord, refsForType) 

1298 if self._managers.obscore: 

1299 # If a TAGGED collection is being monitored by ObsCore 

1300 # manager then we may need to save the dataset. 

1301 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

1302 self._managers.obscore.associate(refsForType, collectionRecord, context) 

1303 except sqlalchemy.exc.IntegrityError as err: 

1304 raise ConflictingDefinitionError( 

1305 f"Constraint violation while associating dataset of type {datasetType.name} with " 

1306 f"collection {collection}. This probably means that one or more datasets with the same " 

1307 "dataset type and data ID already exist in the collection, but it may also indicate " 

1308 "that the datasets do not exist." 

1309 ) from err 

1310 

1311 @transactional 

1312 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

1313 """Remove existing datasets from a `~CollectionType.TAGGED` collection. 

1314 

1315 ``collection`` and ``ref`` combinations that are not currently 

1316 associated are silently ignored. 

1317 

1318 Parameters 

1319 ---------- 

1320 collection : `str` 

1321 The collection the datasets should no longer be associated with. 

1322 refs : `~collections.abc.Iterable` [ `DatasetRef` ] 

1323 An iterable of resolved `DatasetRef` instances that already exist 

1324 in this `Registry`. 

1325 

1326 Raises 

1327 ------ 

1328 lsst.daf.butler.AmbiguousDatasetError 

1329 Raised if any of the given dataset references is unresolved. 

1330 lsst.daf.butler.registry.MissingCollectionError 

1331 Raised if ``collection`` does not exist in the registry. 

1332 lsst.daf.butler.registry.CollectionTypeError 

1333 Raise adding new datasets to the given ``collection`` is not 

1334 allowed. 

1335 """ 

1336 progress = Progress("lsst.daf.butler.Registry.disassociate", level=logging.DEBUG) 

1337 collectionRecord = self._managers.collections.find(collection) 

1338 if collectionRecord.type is not CollectionType.TAGGED: 

1339 raise CollectionTypeError( 

1340 f"Collection '{collection}' has type {collectionRecord.type.name}; expected TAGGED." 

1341 ) 

1342 for datasetType, refsForType in progress.iter_item_chunks( 

1343 DatasetRef.iter_by_type(refs), desc="Disassociating datasets by type" 

1344 ): 

1345 storage = self._managers.datasets[datasetType.name] 

1346 storage.disassociate(collectionRecord, refsForType) 

1347 if self._managers.obscore: 

1348 self._managers.obscore.disassociate(refsForType, collectionRecord) 

1349 

1350 @transactional 

1351 def certify(self, collection: str, refs: Iterable[DatasetRef], timespan: Timespan) -> None: 

1352 """Associate one or more datasets with a calibration collection and a 

1353 validity range within it. 

1354 

1355 Parameters 

1356 ---------- 

1357 collection : `str` 

1358 The name of an already-registered `~CollectionType.CALIBRATION` 

1359 collection. 

1360 refs : `~collections.abc.Iterable` [ `DatasetRef` ] 

1361 Datasets to be associated. 

1362 timespan : `Timespan` 

1363 The validity range for these datasets within the collection. 

1364 

1365 Raises 

1366 ------ 

1367 lsst.daf.butler.AmbiguousDatasetError 

1368 Raised if any of the given `DatasetRef` instances is unresolved. 

1369 lsst.daf.butler.registry.ConflictingDefinitionError 

1370 Raised if the collection already contains a different dataset with 

1371 the same `DatasetType` and data ID and an overlapping validity 

1372 range. 

1373 lsst.daf.butler.registry.CollectionTypeError 

1374 Raised if ``collection`` is not a `~CollectionType.CALIBRATION` 

1375 collection or if one or more datasets are of a dataset type for 

1376 which `DatasetType.isCalibration` returns `False`. 

1377 """ 

1378 progress = Progress("lsst.daf.butler.Registry.certify", level=logging.DEBUG) 

1379 collectionRecord = self._managers.collections.find(collection) 

1380 for datasetType, refsForType in progress.iter_item_chunks( 

1381 DatasetRef.iter_by_type(refs), desc="Certifying datasets by type" 

1382 ): 

1383 storage = self._managers.datasets[datasetType.name] 

1384 storage.certify( 

1385 collectionRecord, 

1386 refsForType, 

1387 timespan, 

1388 context=queries.SqlQueryContext(self._db, self._managers.column_types), 

1389 ) 

1390 

1391 @transactional 

1392 def decertify( 

1393 self, 

1394 collection: str, 

1395 datasetType: str | DatasetType, 

1396 timespan: Timespan, 

1397 *, 

1398 dataIds: Iterable[DataId] | None = None, 

1399 ) -> None: 

1400 """Remove or adjust datasets to clear a validity range within a 

1401 calibration collection. 

1402 

1403 Parameters 

1404 ---------- 

1405 collection : `str` 

1406 The name of an already-registered `~CollectionType.CALIBRATION` 

1407 collection. 

1408 datasetType : `str` or `DatasetType` 

1409 Name or `DatasetType` instance for the datasets to be decertified. 

1410 timespan : `Timespan`, optional 

1411 The validity range to remove datasets from within the collection. 

1412 Datasets that overlap this range but are not contained by it will 

1413 have their validity ranges adjusted to not overlap it, which may 

1414 split a single dataset validity range into two. 

1415 dataIds : iterable [`dict` or `DataCoordinate`], optional 

1416 Data IDs that should be decertified within the given validity range 

1417 If `None`, all data IDs for ``self.datasetType`` will be 

1418 decertified. 

1419 

1420 Raises 

1421 ------ 

1422 lsst.daf.butler.registry.CollectionTypeError 

1423 Raised if ``collection`` is not a `~CollectionType.CALIBRATION` 

1424 collection or if ``datasetType.isCalibration() is False``. 

1425 """ 

1426 collectionRecord = self._managers.collections.find(collection) 

1427 if isinstance(datasetType, str): 

1428 storage = self._managers.datasets[datasetType] 

1429 else: 

1430 storage = self._managers.datasets[datasetType.name] 

1431 standardizedDataIds = None 

1432 if dataIds is not None: 

1433 standardizedDataIds = [ 

1434 DataCoordinate.standardize(d, dimensions=storage.datasetType.dimensions) for d in dataIds 

1435 ] 

1436 storage.decertify( 

1437 collectionRecord, 

1438 timespan, 

1439 dataIds=standardizedDataIds, 

1440 context=queries.SqlQueryContext(self._db, self._managers.column_types), 

1441 ) 

1442 

1443 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager: 

1444 """Return an object that allows a new `Datastore` instance to 

1445 communicate with this `Registry`. 

1446 

1447 Returns 

1448 ------- 

1449 manager : `~.interfaces.DatastoreRegistryBridgeManager` 

1450 Object that mediates communication between this `Registry` and its 

1451 associated datastores. 

1452 """ 

1453 return self._managers.datastores 

1454 

1455 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]: 

1456 """Retrieve datastore locations for a given dataset. 

1457 

1458 Parameters 

1459 ---------- 

1460 ref : `DatasetRef` 

1461 A reference to the dataset for which to retrieve storage 

1462 information. 

1463 

1464 Returns 

1465 ------- 

1466 datastores : `~collections.abc.Iterable` [ `str` ] 

1467 All the matching datastores holding this dataset. 

1468 

1469 Raises 

1470 ------ 

1471 lsst.daf.butler.AmbiguousDatasetError 

1472 Raised if ``ref.id`` is `None`. 

1473 """ 

1474 return self._managers.datastores.findDatastores(ref) 

1475 

1476 def expandDataId( 

1477 self, 

1478 dataId: DataId | None = None, 

1479 *, 

1480 dimensions: Iterable[str] | DimensionGroup | DimensionGraph | None = None, 

1481 graph: DimensionGraph | None = None, 

1482 records: NameLookupMapping[DimensionElement, DimensionRecord | None] | None = None, 

1483 withDefaults: bool = True, 

1484 **kwargs: Any, 

1485 ) -> DataCoordinate: 

1486 """Expand a dimension-based data ID to include additional information. 

1487 

1488 Parameters 

1489 ---------- 

1490 dataId : `DataCoordinate` or `dict`, optional 

1491 Data ID to be expanded; augmented and overridden by ``kwargs``. 

1492 dimensions : `~collections.abc.Iterable` [ `str` ], \ 

1493 `DimensionGroup`, or `DimensionGraph`, optional 

1494 The dimensions to be identified by the new `DataCoordinate`. 

1495 If not provided, will be inferred from the keys of ``mapping`` and 

1496 ``**kwargs``, and ``universe`` must be provided unless ``mapping`` 

1497 is already a `DataCoordinate`. 

1498 graph : `DimensionGraph`, optional 

1499 Like ``dimensions``, but as a ``DimensionGraph`` instance. Ignored 

1500 if ``dimensions`` is provided. Deprecated and will be removed 

1501 after v27. 

1502 records : `~collections.abc.Mapping` [`str`, `DimensionRecord`], \ 

1503 optional 

1504 Dimension record data to use before querying the database for that 

1505 data, keyed by element name. 

1506 withDefaults : `bool`, optional 

1507 Utilize ``self.defaults.dataId`` to fill in missing governor 

1508 dimension key-value pairs. Defaults to `True` (i.e. defaults are 

1509 used). 

1510 **kwargs 

1511 Additional keywords are treated like additional key-value pairs for 

1512 ``dataId``, extending and overriding. 

1513 

1514 Returns 

1515 ------- 

1516 expanded : `DataCoordinate` 

1517 A data ID that includes full metadata for all of the dimensions it 

1518 identifies, i.e. guarantees that ``expanded.hasRecords()`` and 

1519 ``expanded.hasFull()`` both return `True`. 

1520 

1521 Raises 

1522 ------ 

1523 lsst.daf.butler.registry.DataIdError 

1524 Raised when ``dataId`` or keyword arguments specify unknown 

1525 dimensions or values, or when a resulting data ID contains 

1526 contradictory key-value pairs, according to dimension 

1527 relationships. 

1528 

1529 Notes 

1530 ----- 

1531 This method cannot be relied upon to reject invalid data ID values 

1532 for dimensions that do actually not have any record columns. For 

1533 efficiency reasons the records for these dimensions (which have only 

1534 dimension key values that are given by the caller) may be constructed 

1535 directly rather than obtained from the registry database. 

1536 """ 

1537 if not withDefaults: 

1538 defaults = None 

1539 else: 

1540 defaults = self.defaults.dataId 

1541 standardized = DataCoordinate.standardize( 

1542 dataId, 

1543 graph=graph, 

1544 dimensions=dimensions, 

1545 universe=self.dimensions, 

1546 defaults=defaults, 

1547 **kwargs, 

1548 ) 

1549 if standardized.hasRecords(): 

1550 return standardized 

1551 if records is None: 

1552 records = {} 

1553 elif isinstance(records, NamedKeyMapping): 

1554 records = records.byName() 

1555 else: 

1556 records = dict(records) 

1557 if isinstance(dataId, DataCoordinate) and dataId.hasRecords(): 

1558 for element_name in dataId.dimensions.elements: 

1559 records[element_name] = dataId.records[element_name] 

1560 keys = dict(standardized.mapping) 

1561 for element_name in standardized.dimensions.lookup_order: 

1562 element = self.dimensions[element_name] 

1563 record = records.get(element_name, ...) # Use ... to mean not found; None might mean NULL 

1564 if record is ...: 

1565 if element_name in self.dimensions.dimensions.names and keys.get(element_name) is None: 

1566 if element_name in standardized.dimensions.required: 

1567 raise DimensionNameError( 

1568 f"No value or null value for required dimension {element_name}." 

1569 ) 

1570 keys[element_name] = None 

1571 record = None 

1572 else: 

1573 record = self._managers.dimensions.fetch_one( 

1574 element_name, 

1575 DataCoordinate.standardize(keys, dimensions=element.minimal_group), 

1576 self.dimension_record_cache, 

1577 ) 

1578 records[element_name] = record 

1579 if record is not None: 

1580 for d in element.implied: 

1581 value = getattr(record, d.name) 

1582 if keys.setdefault(d.name, value) != value: 

1583 raise InconsistentDataIdError( 

1584 f"Data ID {standardized} has {d.name}={keys[d.name]!r}, " 

1585 f"but {element_name} implies {d.name}={value!r}." 

1586 ) 

1587 else: 

1588 if element_name in standardized.dimensions.required: 

1589 raise DataIdValueError( 

1590 f"Could not fetch record for required dimension {element.name} via keys {keys}." 

1591 ) 

1592 if element.defines_relationships: 

1593 raise InconsistentDataIdError( 

1594 f"Could not fetch record for element {element_name} via keys {keys}, ", 

1595 "but it is marked as defining relationships; this means one or more dimensions are " 

1596 "have inconsistent values.", 

1597 ) 

1598 for d in element.implied: 

1599 keys.setdefault(d.name, None) 

1600 records.setdefault(d.name, None) 

1601 return DataCoordinate.standardize(keys, dimensions=standardized.dimensions).expanded(records=records) 

1602 

1603 def insertDimensionData( 

1604 self, 

1605 element: DimensionElement | str, 

1606 *data: Mapping[str, Any] | DimensionRecord, 

1607 conform: bool = True, 

1608 replace: bool = False, 

1609 skip_existing: bool = False, 

1610 ) -> None: 

1611 """Insert one or more dimension records into the database. 

1612 

1613 Parameters 

1614 ---------- 

1615 element : `DimensionElement` or `str` 

1616 The `DimensionElement` or name thereof that identifies the table 

1617 records will be inserted into. 

1618 *data : `dict` or `DimensionRecord` 

1619 One or more records to insert. 

1620 conform : `bool`, optional 

1621 If `False` (`True` is default) perform no checking or conversions, 

1622 and assume that ``element`` is a `DimensionElement` instance and 

1623 ``data`` is a one or more `DimensionRecord` instances of the 

1624 appropriate subclass. 

1625 replace : `bool`, optional 

1626 If `True` (`False` is default), replace existing records in the 

1627 database if there is a conflict. 

1628 skip_existing : `bool`, optional 

1629 If `True` (`False` is default), skip insertion if a record with 

1630 the same primary key values already exists. Unlike 

1631 `syncDimensionData`, this will not detect when the given record 

1632 differs from what is in the database, and should not be used when 

1633 this is a concern. 

1634 """ 

1635 if isinstance(element, str): 

1636 element = self.dimensions[element] 

1637 if conform: 

1638 records = [ 

1639 row if isinstance(row, DimensionRecord) else element.RecordClass(**row) for row in data 

1640 ] 

1641 else: 

1642 # Ignore typing since caller said to trust them with conform=False. 

1643 records = data # type: ignore 

1644 if element.name in self.dimension_record_cache: 

1645 self.dimension_record_cache.reset() 

1646 self._managers.dimensions.insert( 

1647 element, 

1648 *records, 

1649 replace=replace, 

1650 skip_existing=skip_existing, 

1651 ) 

1652 

1653 def syncDimensionData( 

1654 self, 

1655 element: DimensionElement | str, 

1656 row: Mapping[str, Any] | DimensionRecord, 

1657 conform: bool = True, 

1658 update: bool = False, 

1659 ) -> bool | dict[str, Any]: 

1660 """Synchronize the given dimension record with the database, inserting 

1661 if it does not already exist and comparing values if it does. 

1662 

1663 Parameters 

1664 ---------- 

1665 element : `DimensionElement` or `str` 

1666 The `DimensionElement` or name thereof that identifies the table 

1667 records will be inserted into. 

1668 row : `dict` or `DimensionRecord` 

1669 The record to insert. 

1670 conform : `bool`, optional 

1671 If `False` (`True` is default) perform no checking or conversions, 

1672 and assume that ``element`` is a `DimensionElement` instance and 

1673 ``data`` is a `DimensionRecord` instances of the appropriate 

1674 subclass. 

1675 update : `bool`, optional 

1676 If `True` (`False` is default), update the existing record in the 

1677 database if there is a conflict. 

1678 

1679 Returns 

1680 ------- 

1681 inserted_or_updated : `bool` or `dict` 

1682 `True` if a new row was inserted, `False` if no changes were 

1683 needed, or a `dict` mapping updated column names to their old 

1684 values if an update was performed (only possible if 

1685 ``update=True``). 

1686 

1687 Raises 

1688 ------ 

1689 lsst.daf.butler.registry.ConflictingDefinitionError 

1690 Raised if the record exists in the database (according to primary 

1691 key lookup) but is inconsistent with the given one. 

1692 """ 

1693 if conform: 

1694 if isinstance(element, str): 

1695 element = self.dimensions[element] 

1696 record = row if isinstance(row, DimensionRecord) else element.RecordClass(**row) 

1697 else: 

1698 # Ignore typing since caller said to trust them with conform=False. 

1699 record = row # type: ignore 

1700 if record.definition.name in self.dimension_record_cache: 

1701 self.dimension_record_cache.reset() 

1702 return self._managers.dimensions.sync(record, update=update) 

1703 

1704 def queryDatasetTypes( 

1705 self, 

1706 expression: Any = ..., 

1707 *, 

1708 components: bool | _Marker = _DefaultMarker, 

1709 missing: list[str] | None = None, 

1710 ) -> Iterable[DatasetType]: 

1711 """Iterate over the dataset types whose names match an expression. 

1712 

1713 Parameters 

1714 ---------- 

1715 expression : dataset type expression, optional 

1716 An expression that fully or partially identifies the dataset types 

1717 to return, such as a `str`, `re.Pattern`, or iterable thereof. 

1718 ``...`` can be used to return all dataset types, and is the 

1719 default. See :ref:`daf_butler_dataset_type_expressions` for more 

1720 information. 

1721 components : `bool`, optional 

1722 Must be `False`. Provided only for backwards compatibility. After 

1723 v27 this argument will be removed entirely. 

1724 missing : `list` of `str`, optional 

1725 String dataset type names that were explicitly given (i.e. not 

1726 regular expression patterns) but not found will be appended to this 

1727 list, if it is provided. 

1728 

1729 Returns 

1730 ------- 

1731 dataset_types : `~collections.abc.Iterable` [ `DatasetType`] 

1732 An `~collections.abc.Iterable` of `DatasetType` instances whose 

1733 names match ``expression``. 

1734 

1735 Raises 

1736 ------ 

1737 lsst.daf.butler.registry.DatasetTypeExpressionError 

1738 Raised when ``expression`` is invalid. 

1739 """ 

1740 if components is not _DefaultMarker: 

1741 if components is not False: 

1742 raise DatasetTypeError( 

1743 "Dataset component queries are no longer supported by Registry. Use " 

1744 "DatasetType methods to obtain components from parent dataset types instead." 

1745 ) 

1746 else: 

1747 warnings.warn( 

1748 "The components parameter is ignored. It will be removed after v27.", 

1749 category=FutureWarning, 

1750 stacklevel=find_outside_stacklevel("lsst.daf.butler"), 

1751 ) 

1752 wildcard = DatasetTypeWildcard.from_expression(expression) 

1753 return self._managers.datasets.resolve_wildcard(wildcard, missing=missing) 

1754 

1755 def queryCollections( 

1756 self, 

1757 expression: Any = ..., 

1758 datasetType: DatasetType | None = None, 

1759 collectionTypes: Iterable[CollectionType] | CollectionType = CollectionType.all(), 

1760 flattenChains: bool = False, 

1761 includeChains: bool | None = None, 

1762 ) -> Sequence[str]: 

1763 """Iterate over the collections whose names match an expression. 

1764 

1765 Parameters 

1766 ---------- 

1767 expression : collection expression, optional 

1768 An expression that identifies the collections to return, such as 

1769 a `str` (for full matches or partial matches via globs), 

1770 `re.Pattern` (for partial matches), or iterable thereof. ``...`` 

1771 can be used to return all collections, and is the default. 

1772 See :ref:`daf_butler_collection_expressions` for more information. 

1773 datasetType : `DatasetType`, optional 

1774 If provided, only yield collections that may contain datasets of 

1775 this type. This is a conservative approximation in general; it may 

1776 yield collections that do not have any such datasets. 

1777 collectionTypes : `~collections.abc.Set` [`CollectionType`] or \ 

1778 `CollectionType`, optional 

1779 If provided, only yield collections of these types. 

1780 flattenChains : `bool`, optional 

1781 If `True` (`False` is default), recursively yield the child 

1782 collections of matching `~CollectionType.CHAINED` collections. 

1783 includeChains : `bool`, optional 

1784 If `True`, yield records for matching `~CollectionType.CHAINED` 

1785 collections. Default is the opposite of ``flattenChains``: include 

1786 either CHAINED collections or their children, but not both. 

1787 

1788 Returns 

1789 ------- 

1790 collections : `~collections.abc.Sequence` [ `str` ] 

1791 The names of collections that match ``expression``. 

1792 

1793 Raises 

1794 ------ 

1795 lsst.daf.butler.registry.CollectionExpressionError 

1796 Raised when ``expression`` is invalid. 

1797 

1798 Notes 

1799 ----- 

1800 The order in which collections are returned is unspecified, except that 

1801 the children of a `~CollectionType.CHAINED` collection are guaranteed 

1802 to be in the order in which they are searched. When multiple parent 

1803 `~CollectionType.CHAINED` collections match the same criteria, the 

1804 order in which the two lists appear is unspecified, and the lists of 

1805 children may be incomplete if a child has multiple parents. 

1806 """ 

1807 # Right now the datasetTypes argument is completely ignored, but that 

1808 # is consistent with its [lack of] guarantees. DM-24939 or a follow-up 

1809 # ticket will take care of that. 

1810 try: 

1811 wildcard = CollectionWildcard.from_expression(expression) 

1812 except TypeError as exc: 

1813 raise CollectionExpressionError(f"Invalid collection expression '{expression}'") from exc 

1814 collectionTypes = ensure_iterable(collectionTypes) 

1815 return [ 

1816 record.name 

1817 for record in self._managers.collections.resolve_wildcard( 

1818 wildcard, 

1819 collection_types=frozenset(collectionTypes), 

1820 flatten_chains=flattenChains, 

1821 include_chains=includeChains, 

1822 ) 

1823 ] 

1824 

1825 def _makeQueryBuilder( 

1826 self, 

1827 summary: queries.QuerySummary, 

1828 doomed_by: Iterable[str] = (), 

1829 ) -> queries.QueryBuilder: 

1830 """Return a `QueryBuilder` instance capable of constructing and 

1831 managing more complex queries than those obtainable via `Registry` 

1832 interfaces. 

1833 

1834 This is an advanced interface; downstream code should prefer 

1835 `Registry.queryDataIds` and `Registry.queryDatasets` whenever those 

1836 are sufficient. 

1837 

1838 Parameters 

1839 ---------- 

1840 summary : `queries.QuerySummary` 

1841 Object describing and categorizing the full set of dimensions that 

1842 will be included in the query. 

1843 doomed_by : `~collections.abc.Iterable` of `str`, optional 

1844 A list of diagnostic messages that indicate why the query is going 

1845 to yield no results and should not even be executed. If an empty 

1846 container (default) the query will be executed unless other code 

1847 determines that it is doomed. 

1848 

1849 Returns 

1850 ------- 

1851 builder : `queries.QueryBuilder` 

1852 Object that can be used to construct and perform advanced queries. 

1853 """ 

1854 doomed_by = list(doomed_by) 

1855 backend = queries.SqlQueryBackend(self._db, self._managers, self.dimension_record_cache) 

1856 context = backend.context() 

1857 relation: Relation | None = None 

1858 if doomed_by: 

1859 relation = LeafRelation.make_doomed(context.sql_engine, set(), doomed_by) 

1860 return queries.QueryBuilder( 

1861 summary, 

1862 backend=backend, 

1863 context=context, 

1864 relation=relation, 

1865 ) 

1866 

1867 def _standardize_query_data_id_args( 

1868 self, data_id: DataId | None, *, doomed_by: list[str], **kwargs: Any 

1869 ) -> DataCoordinate: 

1870 """Preprocess the data ID arguments passed to query* methods. 

1871 

1872 Parameters 

1873 ---------- 

1874 data_id : `DataId` or `None` 

1875 Data ID that constrains the query results. 

1876 doomed_by : `list` [ `str` ] 

1877 List to append messages indicating why the query is doomed to 

1878 yield no results. 

1879 **kwargs 

1880 Additional data ID key-value pairs, extending and overriding 

1881 ``data_id``. 

1882 

1883 Returns 

1884 ------- 

1885 data_id : `DataCoordinate` 

1886 Standardized data ID. Will be fully expanded unless expansion 

1887 fails, in which case a message will be appended to ``doomed_by`` 

1888 on return. 

1889 """ 

1890 try: 

1891 return self.expandDataId(data_id, **kwargs) 

1892 except DataIdValueError as err: 

1893 doomed_by.append(str(err)) 

1894 return DataCoordinate.standardize( 

1895 data_id, **kwargs, universe=self.dimensions, defaults=self.defaults.dataId 

1896 ) 

1897 

1898 def _standardize_query_dataset_args( 

1899 self, 

1900 datasets: Any, 

1901 collections: CollectionArgType | None, 

1902 mode: Literal["find_first"] | Literal["find_all"] | Literal["constrain"] = "constrain", 

1903 *, 

1904 doomed_by: list[str], 

1905 ) -> tuple[list[DatasetType], CollectionWildcard | None]: 

1906 """Preprocess dataset arguments passed to query* methods. 

1907 

1908 Parameters 

1909 ---------- 

1910 datasets : `DatasetType`, `str`, `re.Pattern`, or iterable of these 

1911 Expression identifying dataset types. See `queryDatasetTypes` for 

1912 details. 

1913 collections : `str`, `re.Pattern`, or iterable of these 

1914 Expression identifying collections to be searched. See 

1915 `queryCollections` for details. 

1916 mode : `str`, optional 

1917 The way in which datasets are being used in this query; one of: 

1918 

1919 - "find_first": this is a query for the first dataset in an 

1920 ordered list of collections. Prohibits collection wildcards, 

1921 but permits dataset type wildcards. 

1922 

1923 - "find_all": this is a query for all datasets in all matched 

1924 collections. Permits collection and dataset type wildcards. 

1925 

1926 - "constrain": this is a query for something other than datasets, 

1927 with results constrained by dataset existence. Permits 

1928 collection wildcards and prohibits ``...`` as a dataset type 

1929 wildcard. 

1930 doomed_by : `list` [ `str` ] 

1931 List to append messages indicating why the query is doomed to 

1932 yield no results. 

1933 

1934 Returns 

1935 ------- 

1936 dataset_types : `list` [ `DatasetType` ] 

1937 List of matched dataset types. 

1938 collections : `CollectionWildcard` 

1939 Processed collection expression. 

1940 """ 

1941 dataset_types: list[DatasetType] = [] 

1942 collection_wildcard: CollectionWildcard | None = None 

1943 if datasets is not None: 

1944 if collections is None: 

1945 if not self.defaults.collections: 

1946 raise NoDefaultCollectionError("No collections, and no registry default collections.") 

1947 collection_wildcard = CollectionWildcard.from_expression(self.defaults.collections) 

1948 else: 

1949 collection_wildcard = CollectionWildcard.from_expression(collections) 

1950 if mode == "find_first" and collection_wildcard.patterns: 

1951 raise TypeError( 

1952 f"Collection pattern(s) {collection_wildcard.patterns} not allowed in this context." 

1953 ) 

1954 missing: list[str] = [] 

1955 dataset_types = self._managers.datasets.resolve_wildcard( 

1956 datasets, missing=missing, explicit_only=(mode == "constrain") 

1957 ) 

1958 if missing and mode == "constrain": 

1959 raise MissingDatasetTypeError( 

1960 f"Dataset type(s) {missing} are not registered.", 

1961 ) 

1962 doomed_by.extend(f"Dataset type {name} is not registered." for name in missing) 

1963 elif collections: 

1964 # I think this check should actually be `collections is not None`, 

1965 # but it looks like some CLI scripts use empty tuple as default. 

1966 raise ArgumentError(f"Cannot pass 'collections' (='{collections}') without 'datasets'.") 

1967 return dataset_types, collection_wildcard 

1968 

1969 def queryDatasets( 

1970 self, 

1971 datasetType: Any, 

1972 *, 

1973 collections: CollectionArgType | None = None, 

1974 dimensions: Iterable[Dimension | str] | None = None, 

1975 dataId: DataId | None = None, 

1976 where: str = "", 

1977 findFirst: bool = False, 

1978 components: bool | _Marker = _DefaultMarker, 

1979 bind: Mapping[str, Any] | None = None, 

1980 check: bool = True, 

1981 **kwargs: Any, 

1982 ) -> queries.DatasetQueryResults: 

1983 """Query for and iterate over dataset references matching user-provided 

1984 criteria. 

1985 

1986 Parameters 

1987 ---------- 

1988 datasetType : dataset type expression 

1989 An expression that fully or partially identifies the dataset types 

1990 to be queried. Allowed types include `DatasetType`, `str`, 

1991 `re.Pattern`, and iterables thereof. The special value ``...`` can 

1992 be used to query all dataset types. See 

1993 :ref:`daf_butler_dataset_type_expressions` for more information. 

1994 collections : collection expression, optional 

1995 An expression that identifies the collections to search, such as a 

1996 `str` (for full matches or partial matches via globs), `re.Pattern` 

1997 (for partial matches), or iterable thereof. ``...`` can be used to 

1998 search all collections (actually just all `~CollectionType.RUN` 

1999 collections, because this will still find all datasets). 

2000 If not provided, ``self.default.collections`` is used. See 

2001 :ref:`daf_butler_collection_expressions` for more information. 

2002 dimensions : `~collections.abc.Iterable` of `Dimension` or `str` 

2003 Dimensions to include in the query (in addition to those used 

2004 to identify the queried dataset type(s)), either to constrain 

2005 the resulting datasets to those for which a matching dimension 

2006 exists, or to relate the dataset type's dimensions to dimensions 

2007 referenced by the ``dataId`` or ``where`` arguments. 

2008 dataId : `dict` or `DataCoordinate`, optional 

2009 A data ID whose key-value pairs are used as equality constraints 

2010 in the query. 

2011 where : `str`, optional 

2012 A string expression similar to a SQL WHERE clause. May involve 

2013 any column of a dimension table or (as a shortcut for the primary 

2014 key column of a dimension table) dimension name. See 

2015 :ref:`daf_butler_dimension_expressions` for more information. 

2016 findFirst : `bool`, optional 

2017 If `True` (`False` is default), for each result data ID, only 

2018 yield one `DatasetRef` of each `DatasetType`, from the first 

2019 collection in which a dataset of that dataset type appears 

2020 (according to the order of ``collections`` passed in). If `True`, 

2021 ``collections`` must not contain regular expressions and may not 

2022 be ``...``. 

2023 components : `bool`, optional 

2024 Must be `False`. Provided only for backwards compatibility. After 

2025 v27 this argument will be removed entirely. 

2026 bind : `~collections.abc.Mapping`, optional 

2027 Mapping containing literal values that should be injected into the 

2028 ``where`` expression, keyed by the identifiers they replace. 

2029 Values of collection type can be expanded in some cases; see 

2030 :ref:`daf_butler_dimension_expressions_identifiers` for more 

2031 information. 

2032 check : `bool`, optional 

2033 If `True` (default) check the query for consistency before 

2034 executing it. This may reject some valid queries that resemble 

2035 common mistakes (e.g. queries for visits without specifying an 

2036 instrument). 

2037 **kwargs 

2038 Additional keyword arguments are forwarded to 

2039 `DataCoordinate.standardize` when processing the ``dataId`` 

2040 argument (and may be used to provide a constraining data ID even 

2041 when the ``dataId`` argument is `None`). 

2042 

2043 Returns 

2044 ------- 

2045 refs : `.queries.DatasetQueryResults` 

2046 Dataset references matching the given query criteria. Nested data 

2047 IDs are guaranteed to include values for all implied dimensions 

2048 (i.e. `DataCoordinate.hasFull` will return `True`), but will not 

2049 include dimension records (`DataCoordinate.hasRecords` will be 

2050 `False`) unless `~.queries.DatasetQueryResults.expanded` is 

2051 called on the result object (which returns a new one). 

2052 

2053 Raises 

2054 ------ 

2055 lsst.daf.butler.registry.DatasetTypeExpressionError 

2056 Raised when ``datasetType`` expression is invalid. 

2057 TypeError 

2058 Raised when the arguments are incompatible, such as when a 

2059 collection wildcard is passed when ``findFirst`` is `True`, or 

2060 when ``collections`` is `None` and ``self.defaults.collections`` is 

2061 also `None`. 

2062 lsst.daf.butler.registry.DataIdError 

2063 Raised when ``dataId`` or keyword arguments specify unknown 

2064 dimensions or values, or when they contain inconsistent values. 

2065 lsst.daf.butler.registry.UserExpressionError 

2066 Raised when ``where`` expression is invalid. 

2067 

2068 Notes 

2069 ----- 

2070 When multiple dataset types are queried in a single call, the 

2071 results of this operation are equivalent to querying for each dataset 

2072 type separately in turn, and no information about the relationships 

2073 between datasets of different types is included. In contexts where 

2074 that kind of information is important, the recommended pattern is to 

2075 use `queryDataIds` to first obtain data IDs (possibly with the 

2076 desired dataset types and collections passed as constraints to the 

2077 query), and then use multiple (generally much simpler) calls to 

2078 `queryDatasets` with the returned data IDs passed as constraints. 

2079 """ 

2080 if components is not _DefaultMarker: 

2081 if components is not False: 

2082 raise DatasetTypeError( 

2083 "Dataset component queries are no longer supported by Registry. Use " 

2084 "DatasetType methods to obtain components from parent dataset types instead." 

2085 ) 

2086 else: 

2087 warnings.warn( 

2088 "The components parameter is ignored. It will be removed after v27.", 

2089 category=FutureWarning, 

2090 stacklevel=find_outside_stacklevel("lsst.daf.butler"), 

2091 ) 

2092 doomed_by: list[str] = [] 

2093 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs) 

2094 resolved_dataset_types, collection_wildcard = self._standardize_query_dataset_args( 

2095 datasetType, 

2096 collections, 

2097 mode="find_first" if findFirst else "find_all", 

2098 doomed_by=doomed_by, 

2099 ) 

2100 if collection_wildcard is not None and collection_wildcard.empty(): 

2101 doomed_by.append("No datasets can be found because collection list is empty.") 

2102 return queries.ChainedDatasetQueryResults([], doomed_by=doomed_by) 

2103 parent_results: list[queries.ParentDatasetQueryResults] = [] 

2104 for resolved_dataset_type in resolved_dataset_types: 

2105 # The full set of dimensions in the query is the combination of 

2106 # those needed for the DatasetType and those explicitly requested, 

2107 # if any. 

2108 dimension_names = set(resolved_dataset_type.dimensions.names) 

2109 if dimensions is not None: 

2110 dimension_names.update(self.dimensions.conform(dimensions).names) 

2111 # Construct the summary structure needed to construct a 

2112 # QueryBuilder. 

2113 summary = queries.QuerySummary( 

2114 requested=self.dimensions.conform(dimension_names), 

2115 column_types=self._managers.column_types, 

2116 data_id=data_id, 

2117 expression=where, 

2118 bind=bind, 

2119 defaults=self.defaults.dataId, 

2120 check=check, 

2121 datasets=[resolved_dataset_type], 

2122 ) 

2123 builder = self._makeQueryBuilder(summary) 

2124 # Add the dataset subquery to the query, telling the QueryBuilder 

2125 # to include the rank of the selected collection in the results 

2126 # only if we need to findFirst. Note that if any of the 

2127 # collections are actually wildcard expressions, and 

2128 # findFirst=True, this will raise TypeError for us. 

2129 builder.joinDataset( 

2130 resolved_dataset_type, collection_wildcard, isResult=True, findFirst=findFirst 

2131 ) 

2132 query = builder.finish() 

2133 parent_results.append( 

2134 queries.ParentDatasetQueryResults(query, resolved_dataset_type, components=[None]) 

2135 ) 

2136 if not parent_results: 

2137 doomed_by.extend( 

2138 f"No registered dataset type matching {t!r} found, so no matching datasets can " 

2139 "exist in any collection." 

2140 for t in ensure_iterable(datasetType) 

2141 ) 

2142 return queries.ChainedDatasetQueryResults([], doomed_by=doomed_by) 

2143 elif len(parent_results) == 1: 

2144 return parent_results[0] 

2145 else: 

2146 return queries.ChainedDatasetQueryResults(parent_results) 

2147 

2148 def queryDataIds( 

2149 self, 

2150 # TODO: Drop Dimension support on DM-41326. 

2151 dimensions: DimensionGroup | Iterable[Dimension | str] | Dimension | str, 

2152 *, 

2153 dataId: DataId | None = None, 

2154 datasets: Any = None, 

2155 collections: CollectionArgType | None = None, 

2156 where: str = "", 

2157 components: bool | _Marker = _DefaultMarker, 

2158 bind: Mapping[str, Any] | None = None, 

2159 check: bool = True, 

2160 **kwargs: Any, 

2161 ) -> queries.DataCoordinateQueryResults: 

2162 """Query for data IDs matching user-provided criteria. 

2163 

2164 Parameters 

2165 ---------- 

2166 dimensions : `DimensionGroup`, `Dimension`, or `str`, or \ 

2167 `~collections.abc.Iterable` [ `Dimension` or `str` ] 

2168 The dimensions of the data IDs to yield, as either `Dimension` 

2169 instances or `str`. Will be automatically expanded to a complete 

2170 `DimensionGroup`. Support for `Dimension` instances is deprecated 

2171 and will not be supported after v27. 

2172 dataId : `dict` or `DataCoordinate`, optional 

2173 A data ID whose key-value pairs are used as equality constraints 

2174 in the query. 

2175 datasets : dataset type expression, optional 

2176 An expression that fully or partially identifies dataset types 

2177 that should constrain the yielded data IDs. For example, including 

2178 "raw" here would constrain the yielded ``instrument``, 

2179 ``exposure``, ``detector``, and ``physical_filter`` values to only 

2180 those for which at least one "raw" dataset exists in 

2181 ``collections``. Allowed types include `DatasetType`, `str`, 

2182 and iterables thereof. Regular expression objects (i.e. 

2183 `re.Pattern`) are deprecated and will be removed after the v26 

2184 release. See :ref:`daf_butler_dataset_type_expressions` for more 

2185 information. 

2186 collections : collection expression, optional 

2187 An expression that identifies the collections to search for 

2188 datasets, such as a `str` (for full matches or partial matches 

2189 via globs), `re.Pattern` (for partial matches), or iterable 

2190 thereof. ``...`` can be used to search all collections (actually 

2191 just all `~CollectionType.RUN` collections, because this will 

2192 still find all datasets). If not provided, 

2193 ``self.default.collections`` is used. Ignored unless ``datasets`` 

2194 is also passed. See :ref:`daf_butler_collection_expressions` for 

2195 more information. 

2196 where : `str`, optional 

2197 A string expression similar to a SQL WHERE clause. May involve 

2198 any column of a dimension table or (as a shortcut for the primary 

2199 key column of a dimension table) dimension name. See 

2200 :ref:`daf_butler_dimension_expressions` for more information. 

2201 components : `bool`, optional 

2202 Must be `False`. Provided only for backwards compatibility. After 

2203 v27 this argument will be removed entirely. 

2204 bind : `~collections.abc.Mapping`, optional 

2205 Mapping containing literal values that should be injected into the 

2206 ``where`` expression, keyed by the identifiers they replace. 

2207 Values of collection type can be expanded in some cases; see 

2208 :ref:`daf_butler_dimension_expressions_identifiers` for more 

2209 information. 

2210 check : `bool`, optional 

2211 If `True` (default) check the query for consistency before 

2212 executing it. This may reject some valid queries that resemble 

2213 common mistakes (e.g. queries for visits without specifying an 

2214 instrument). 

2215 **kwargs 

2216 Additional keyword arguments are forwarded to 

2217 `DataCoordinate.standardize` when processing the ``dataId`` 

2218 argument (and may be used to provide a constraining data ID even 

2219 when the ``dataId`` argument is `None`). 

2220 

2221 Returns 

2222 ------- 

2223 dataIds : `.queries.DataCoordinateQueryResults` 

2224 Data IDs matching the given query parameters. These are guaranteed 

2225 to identify all dimensions (`DataCoordinate.hasFull` returns 

2226 `True`), but will not contain `DimensionRecord` objects 

2227 (`DataCoordinate.hasRecords` returns `False`). Call 

2228 `~.queries.DataCoordinateQueryResults.expanded` on the 

2229 returned object to fetch those (and consider using 

2230 `~.queries.DataCoordinateQueryResults.materialize` on the 

2231 returned object first if the expected number of rows is very 

2232 large). See documentation for those methods for additional 

2233 information. 

2234 

2235 Raises 

2236 ------ 

2237 lsst.daf.butler.registry.NoDefaultCollectionError 

2238 Raised if ``collections`` is `None` and 

2239 ``self.defaults.collections`` is `None`. 

2240 lsst.daf.butler.registry.CollectionExpressionError 

2241 Raised when ``collections`` expression is invalid. 

2242 lsst.daf.butler.registry.DataIdError 

2243 Raised when ``dataId`` or keyword arguments specify unknown 

2244 dimensions or values, or when they contain inconsistent values. 

2245 lsst.daf.butler.registry.DatasetTypeExpressionError 

2246 Raised when ``datasetType`` expression is invalid. 

2247 lsst.daf.butler.registry.UserExpressionError 

2248 Raised when ``where`` expression is invalid. 

2249 """ 

2250 if components is not _DefaultMarker: 

2251 if components is not False: 

2252 raise DatasetTypeError( 

2253 "Dataset component queries are no longer supported by Registry. Use " 

2254 "DatasetType methods to obtain components from parent dataset types instead." 

2255 ) 

2256 else: 

2257 warnings.warn( 

2258 "The components parameter is ignored. It will be removed after v27.", 

2259 category=FutureWarning, 

2260 stacklevel=find_outside_stacklevel("lsst.daf.butler"), 

2261 ) 

2262 requested_dimensions = self.dimensions.conform(dimensions) 

2263 doomed_by: list[str] = [] 

2264 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs) 

2265 resolved_dataset_types, collection_wildcard = self._standardize_query_dataset_args( 

2266 datasets, collections, doomed_by=doomed_by 

2267 ) 

2268 if collection_wildcard is not None and collection_wildcard.empty(): 

2269 doomed_by.append("No data coordinates can be found because collection list is empty.") 

2270 summary = queries.QuerySummary( 

2271 requested=requested_dimensions, 

2272 column_types=self._managers.column_types, 

2273 data_id=data_id, 

2274 expression=where, 

2275 bind=bind, 

2276 defaults=self.defaults.dataId, 

2277 check=check, 

2278 datasets=resolved_dataset_types, 

2279 ) 

2280 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by) 

2281 for datasetType in resolved_dataset_types: 

2282 builder.joinDataset(datasetType, collection_wildcard, isResult=False) 

2283 query = builder.finish() 

2284 

2285 return queries.DataCoordinateQueryResults(query) 

2286 

2287 def queryDimensionRecords( 

2288 self, 

2289 element: DimensionElement | str, 

2290 *, 

2291 dataId: DataId | None = None, 

2292 datasets: Any = None, 

2293 collections: CollectionArgType | None = None, 

2294 where: str = "", 

2295 components: bool | _Marker = _DefaultMarker, 

2296 bind: Mapping[str, Any] | None = None, 

2297 check: bool = True, 

2298 **kwargs: Any, 

2299 ) -> queries.DimensionRecordQueryResults: 

2300 """Query for dimension information matching user-provided criteria. 

2301 

2302 Parameters 

2303 ---------- 

2304 element : `DimensionElement` or `str` 

2305 The dimension element to obtain records for. 

2306 dataId : `dict` or `DataCoordinate`, optional 

2307 A data ID whose key-value pairs are used as equality constraints 

2308 in the query. 

2309 datasets : dataset type expression, optional 

2310 An expression that fully or partially identifies dataset types 

2311 that should constrain the yielded records. See `queryDataIds` and 

2312 :ref:`daf_butler_dataset_type_expressions` for more information. 

2313 collections : collection expression, optional 

2314 An expression that identifies the collections to search for 

2315 datasets, such as a `str` (for full matches or partial matches 

2316 via globs), `re.Pattern` (for partial matches), or iterable 

2317 thereof. ``...`` can be used to search all collections (actually 

2318 just all `~CollectionType.RUN` collections, because this will 

2319 still find all datasets). If not provided, 

2320 ``self.default.collections`` is used. Ignored unless ``datasets`` 

2321 is also passed. See :ref:`daf_butler_collection_expressions` for 

2322 more information. 

2323 where : `str`, optional 

2324 A string expression similar to a SQL WHERE clause. See 

2325 `queryDataIds` and :ref:`daf_butler_dimension_expressions` for more 

2326 information. 

2327 components : `bool`, optional 

2328 Whether to apply dataset expressions to components as well. 

2329 See `queryDataIds` for more information. 

2330 

2331 Must be `False`. Provided only for backwards compatibility. After 

2332 v27 this argument will be removed entirely. 

2333 bind : `~collections.abc.Mapping`, optional 

2334 Mapping containing literal values that should be injected into the 

2335 ``where`` expression, keyed by the identifiers they replace. 

2336 Values of collection type can be expanded in some cases; see 

2337 :ref:`daf_butler_dimension_expressions_identifiers` for more 

2338 information. 

2339 check : `bool`, optional 

2340 If `True` (default) check the query for consistency before 

2341 executing it. This may reject some valid queries that resemble 

2342 common mistakes (e.g. queries for visits without specifying an 

2343 instrument). 

2344 **kwargs 

2345 Additional keyword arguments are forwarded to 

2346 `DataCoordinate.standardize` when processing the ``dataId`` 

2347 argument (and may be used to provide a constraining data ID even 

2348 when the ``dataId`` argument is `None`). 

2349 

2350 Returns 

2351 ------- 

2352 dataIds : `.queries.DimensionRecordQueryResults` 

2353 Data IDs matching the given query parameters. 

2354 

2355 Raises 

2356 ------ 

2357 lsst.daf.butler.registry.NoDefaultCollectionError 

2358 Raised if ``collections`` is `None` and 

2359 ``self.defaults.collections`` is `None`. 

2360 lsst.daf.butler.registry.CollectionExpressionError 

2361 Raised when ``collections`` expression is invalid. 

2362 lsst.daf.butler.registry.DataIdError 

2363 Raised when ``dataId`` or keyword arguments specify unknown 

2364 dimensions or values, or when they contain inconsistent values. 

2365 lsst.daf.butler.registry.DatasetTypeExpressionError 

2366 Raised when ``datasetType`` expression is invalid. 

2367 lsst.daf.butler.registry.UserExpressionError 

2368 Raised when ``where`` expression is invalid. 

2369 """ 

2370 if components is not _DefaultMarker: 

2371 if components is not False: 

2372 raise DatasetTypeError( 

2373 "Dataset component queries are no longer supported by Registry. Use " 

2374 "DatasetType methods to obtain components from parent dataset types instead." 

2375 ) 

2376 else: 

2377 warnings.warn( 

2378 "The components parameter is ignored. It will be removed after v27.", 

2379 category=FutureWarning, 

2380 stacklevel=find_outside_stacklevel("lsst.daf.butler"), 

2381 ) 

2382 if not isinstance(element, DimensionElement): 

2383 try: 

2384 element = self.dimensions[element] 

2385 except KeyError as e: 

2386 raise DimensionNameError( 

2387 f"No such dimension '{element}', available dimensions: " + str(self.dimensions.elements) 

2388 ) from e 

2389 doomed_by: list[str] = [] 

2390 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs) 

2391 resolved_dataset_types, collection_wildcard = self._standardize_query_dataset_args( 

2392 datasets, collections, doomed_by=doomed_by 

2393 ) 

2394 if collection_wildcard is not None and collection_wildcard.empty(): 

2395 doomed_by.append("No dimension records can be found because collection list is empty.") 

2396 summary = queries.QuerySummary( 

2397 requested=element.minimal_group, 

2398 column_types=self._managers.column_types, 

2399 data_id=data_id, 

2400 expression=where, 

2401 bind=bind, 

2402 defaults=self.defaults.dataId, 

2403 check=check, 

2404 datasets=resolved_dataset_types, 

2405 ) 

2406 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by) 

2407 for datasetType in resolved_dataset_types: 

2408 builder.joinDataset(datasetType, collection_wildcard, isResult=False) 

2409 query = builder.finish().with_record_columns(element.name) 

2410 return queries.DatabaseDimensionRecordQueryResults(query, element) 

2411 

2412 def queryDatasetAssociations( 

2413 self, 

2414 datasetType: str | DatasetType, 

2415 collections: CollectionArgType | None = ..., 

2416 *, 

2417 collectionTypes: Iterable[CollectionType] = CollectionType.all(), 

2418 flattenChains: bool = False, 

2419 ) -> Iterator[DatasetAssociation]: 

2420 """Iterate over dataset-collection combinations where the dataset is in 

2421 the collection. 

2422 

2423 This method is a temporary placeholder for better support for 

2424 association results in `queryDatasets`. It will probably be 

2425 removed in the future, and should be avoided in production code 

2426 whenever possible. 

2427 

2428 Parameters 

2429 ---------- 

2430 datasetType : `DatasetType` or `str` 

2431 A dataset type object or the name of one. 

2432 collections : collection expression, optional 

2433 An expression that identifies the collections to search for 

2434 datasets, such as a `str` (for full matches or partial matches 

2435 via globs), `re.Pattern` (for partial matches), or iterable 

2436 thereof. ``...`` can be used to search all collections (actually 

2437 just all `~CollectionType.RUN` collections, because this will still 

2438 find all datasets). If not provided, ``self.default.collections`` 

2439 is used. See :ref:`daf_butler_collection_expressions` for more 

2440 information. 

2441 collectionTypes : `~collections.abc.Set` [ `CollectionType` ], optional 

2442 If provided, only yield associations from collections of these 

2443 types. 

2444 flattenChains : `bool`, optional 

2445 If `True`, search in the children of `~CollectionType.CHAINED` 

2446 collections. If `False`, ``CHAINED`` collections are ignored. 

2447 

2448 Yields 

2449 ------ 

2450 association : `.DatasetAssociation` 

2451 Object representing the relationship between a single dataset and 

2452 a single collection. 

2453 

2454 Raises 

2455 ------ 

2456 lsst.daf.butler.registry.NoDefaultCollectionError 

2457 Raised if ``collections`` is `None` and 

2458 ``self.defaults.collections`` is `None`. 

2459 lsst.daf.butler.registry.CollectionExpressionError 

2460 Raised when ``collections`` expression is invalid. 

2461 """ 

2462 if collections is None: 

2463 if not self.defaults.collections: 

2464 raise NoDefaultCollectionError( 

2465 "No collections provided to queryDatasetAssociations, " 

2466 "and no defaults from registry construction." 

2467 ) 

2468 collections = self.defaults.collections 

2469 collection_wildcard = CollectionWildcard.from_expression(collections) 

2470 backend = queries.SqlQueryBackend(self._db, self._managers, self.dimension_record_cache) 

2471 parent_dataset_type = backend.resolve_single_dataset_type_wildcard(datasetType) 

2472 timespan_tag = DatasetColumnTag(parent_dataset_type.name, "timespan") 

2473 collection_tag = DatasetColumnTag(parent_dataset_type.name, "collection") 

2474 for parent_collection_record in backend.resolve_collection_wildcard( 

2475 collection_wildcard, 

2476 collection_types=frozenset(collectionTypes), 

2477 flatten_chains=flattenChains, 

2478 ): 

2479 # Resolve this possibly-chained collection into a list of 

2480 # non-CHAINED collections that actually hold datasets of this 

2481 # type. 

2482 candidate_collection_records = backend.resolve_dataset_collections( 

2483 parent_dataset_type, 

2484 CollectionWildcard.from_names([parent_collection_record.name]), 

2485 allow_calibration_collections=True, 

2486 governor_constraints={}, 

2487 ) 

2488 if not candidate_collection_records: 

2489 continue 

2490 with backend.context() as context: 

2491 relation = backend.make_dataset_query_relation( 

2492 parent_dataset_type, 

2493 candidate_collection_records, 

2494 columns={"dataset_id", "run", "timespan", "collection"}, 

2495 context=context, 

2496 ) 

2497 reader = queries.DatasetRefReader( 

2498 parent_dataset_type, 

2499 translate_collection=lambda k: self._managers.collections[k].name, 

2500 full=False, 

2501 ) 

2502 for row in context.fetch_iterable(relation): 

2503 ref = reader.read(row) 

2504 collection_record = self._managers.collections[row[collection_tag]] 

2505 if collection_record.type is CollectionType.CALIBRATION: 

2506 timespan = row[timespan_tag] 

2507 else: 

2508 # For backwards compatibility and (possibly?) user 

2509 # convenience we continue to define the timespan of a 

2510 # DatasetAssociation row for a non-CALIBRATION 

2511 # collection to be None rather than a fully unbounded 

2512 # timespan. 

2513 timespan = None 

2514 yield DatasetAssociation(ref=ref, collection=collection_record.name, timespan=timespan) 

2515 

2516 def get_datastore_records(self, ref: DatasetRef) -> DatasetRef: 

2517 """Retrieve datastore records for given ref. 

2518 

2519 Parameters 

2520 ---------- 

2521 ref : `DatasetRef` 

2522 Dataset reference for which to retrieve its corresponding datastore 

2523 records. 

2524 

2525 Returns 

2526 ------- 

2527 updated_ref : `DatasetRef` 

2528 Dataset reference with filled datastore records. 

2529 

2530 Notes 

2531 ----- 

2532 If this method is called with the dataset ref that is not known to the 

2533 registry then the reference with an empty set of records is returned. 

2534 """ 

2535 datastore_records: dict[str, list[StoredDatastoreItemInfo]] = {} 

2536 for opaque, record_class in self._datastore_record_classes.items(): 

2537 records = self.fetchOpaqueData(opaque, dataset_id=ref.id) 

2538 datastore_records[opaque] = [record_class.from_record(record) for record in records] 

2539 return ref.replace(datastore_records=datastore_records) 

2540 

2541 def store_datastore_records(self, refs: Mapping[str, DatasetRef]) -> None: 

2542 """Store datastore records for given refs. 

2543 

2544 Parameters 

2545 ---------- 

2546 refs : `~collections.abc.Mapping` [`str`, `DatasetRef`] 

2547 Mapping of a datastore name to dataset reference stored in that 

2548 datastore, reference must include datastore records. 

2549 """ 

2550 for datastore_name, ref in refs.items(): 

2551 # Store ref IDs in the bridge table. 

2552 bridge = self._managers.datastores.register(datastore_name) 

2553 bridge.insert([ref]) 

2554 

2555 # store records in opaque tables 

2556 assert ref._datastore_records is not None, "Dataset ref must have datastore records" 

2557 for table_name, records in ref._datastore_records.items(): 

2558 opaque_table = self._managers.opaque.get(table_name) 

2559 assert opaque_table is not None, f"Unexpected opaque table name {table_name}" 

2560 opaque_table.insert(*(record.to_record(dataset_id=ref.id) for record in records)) 

2561 

2562 def make_datastore_tables(self, tables: Mapping[str, DatastoreOpaqueTable]) -> None: 

2563 """Create opaque tables used by datastores. 

2564 

2565 Parameters 

2566 ---------- 

2567 tables : `~collections.abc.Mapping` 

2568 Maps opaque table name to its definition. 

2569 

2570 Notes 

2571 ----- 

2572 This method should disappear in the future when opaque table 

2573 definitions will be provided during `Registry` construction. 

2574 """ 

2575 datastore_record_classes = {} 

2576 for table_name, table_def in tables.items(): 

2577 datastore_record_classes[table_name] = table_def.record_class 

2578 try: 

2579 self._managers.opaque.register(table_name, table_def.table_spec) 

2580 except ReadOnlyDatabaseError: 

2581 # If the database is read only and we just tried and failed to 

2582 # create a table, it means someone is trying to create a 

2583 # read-only butler client for an empty repo. That should be 

2584 # okay, as long as they then try to get any datasets before 

2585 # some other client creates the table. Chances are they're 

2586 # just validating configuration. 

2587 pass 

2588 self._datastore_record_classes = datastore_record_classes 

2589 

2590 def preload_cache(self) -> None: 

2591 """Immediately load caches that are used for common operations.""" 

2592 self.dimension_record_cache.preload_cache() 

2593 

2594 @property 

2595 def obsCoreTableManager(self) -> ObsCoreTableManager | None: 

2596 """The ObsCore manager instance for this registry 

2597 (`~.interfaces.ObsCoreTableManager` 

2598 or `None`). 

2599 

2600 ObsCore manager may not be implemented for all registry backend, or 

2601 may not be enabled for many repositories. 

2602 """ 

2603 return self._managers.obscore 

2604 

2605 storageClasses: StorageClassFactory 

2606 """All storage classes known to the registry (`StorageClassFactory`). 

2607 """ 

2608 

2609 _defaults: RegistryDefaults 

2610 """Default collections used for registry queries (`RegistryDefaults`)."""