Coverage for python/lsst/daf/butler/registry/sql_registry.py: 17%

577 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-16 10:44 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30from .. import ddl 

31 

32__all__ = ("SqlRegistry",) 

33 

34import contextlib 

35import logging 

36import warnings 

37from collections.abc import Iterable, Iterator, Mapping, Sequence 

38from typing import TYPE_CHECKING, Any, Literal, cast 

39 

40import sqlalchemy 

41from lsst.daf.relation import LeafRelation, Relation 

42from lsst.resources import ResourcePathExpression 

43from lsst.utils.introspection import find_outside_stacklevel 

44from lsst.utils.iteration import ensure_iterable 

45 

46from .._column_tags import DatasetColumnTag 

47from .._config import Config 

48from .._dataset_association import DatasetAssociation 

49from .._dataset_ref import DatasetId, DatasetIdGenEnum, DatasetRef 

50from .._dataset_type import DatasetType 

51from .._named import NamedKeyMapping, NameLookupMapping 

52from .._storage_class import StorageClassFactory 

53from .._timespan import Timespan 

54from ..dimensions import ( 

55 DataCoordinate, 

56 DataId, 

57 Dimension, 

58 DimensionConfig, 

59 DimensionElement, 

60 DimensionGraph, 

61 DimensionGroup, 

62 DimensionRecord, 

63 DimensionUniverse, 

64) 

65from ..dimensions.record_cache import DimensionRecordCache 

66from ..progress import Progress 

67from ..registry import ( 

68 ArgumentError, 

69 CollectionExpressionError, 

70 CollectionSummary, 

71 CollectionType, 

72 CollectionTypeError, 

73 ConflictingDefinitionError, 

74 DataIdValueError, 

75 DatasetTypeError, 

76 DimensionNameError, 

77 InconsistentDataIdError, 

78 MissingDatasetTypeError, 

79 NoDefaultCollectionError, 

80 OrphanedRecordError, 

81 RegistryConfig, 

82 RegistryConsistencyError, 

83 RegistryDefaults, 

84 queries, 

85) 

86from ..registry.interfaces import ChainedCollectionRecord, ReadOnlyDatabaseError, RunRecord 

87from ..registry.managers import RegistryManagerInstances, RegistryManagerTypes 

88from ..registry.wildcards import CollectionWildcard, DatasetTypeWildcard 

89from ..utils import _DefaultMarker, _Marker, transactional 

90 

91if TYPE_CHECKING: 

92 from .._butler_config import ButlerConfig 

93 from ..datastore._datastore import DatastoreOpaqueTable 

94 from ..datastore.stored_file_info import StoredDatastoreItemInfo 

95 from ..registry._registry import CollectionArgType 

96 from ..registry.interfaces import ( 

97 CollectionRecord, 

98 Database, 

99 DatastoreRegistryBridgeManager, 

100 ObsCoreTableManager, 

101 ) 

102 

103 

104_LOG = logging.getLogger(__name__) 

105 

106 

107class SqlRegistry: 

108 """Butler Registry implementation that uses SQL database as backend. 

109 

110 Parameters 

111 ---------- 

112 database : `Database` 

113 Database instance to store Registry. 

114 defaults : `RegistryDefaults` 

115 Default collection search path and/or output `~CollectionType.RUN` 

116 collection. 

117 managers : `RegistryManagerInstances` 

118 All the managers required for this registry. 

119 """ 

120 

121 defaultConfigFile: str | None = None 

122 """Path to configuration defaults. Accessed within the ``configs`` resource 

123 or relative to a search path. Can be None if no defaults specified. 

124 """ 

125 

126 @classmethod 

127 def forceRegistryConfig( 

128 cls, config: ButlerConfig | RegistryConfig | Config | str | None 

129 ) -> RegistryConfig: 

130 """Force the supplied config to a `RegistryConfig`. 

131 

132 Parameters 

133 ---------- 

134 config : `RegistryConfig`, `Config` or `str` or `None` 

135 Registry configuration, if missing then default configuration will 

136 be loaded from registry.yaml. 

137 

138 Returns 

139 ------- 

140 registry_config : `RegistryConfig` 

141 A registry config. 

142 """ 

143 if not isinstance(config, RegistryConfig): 

144 if isinstance(config, str | Config) or config is None: 

145 config = RegistryConfig(config) 

146 else: 

147 raise ValueError(f"Incompatible Registry configuration: {config}") 

148 return config 

149 

150 @classmethod 

151 def createFromConfig( 

152 cls, 

153 config: RegistryConfig | str | None = None, 

154 dimensionConfig: DimensionConfig | str | None = None, 

155 butlerRoot: ResourcePathExpression | None = None, 

156 ) -> SqlRegistry: 

157 """Create registry database and return `SqlRegistry` instance. 

158 

159 This method initializes database contents, database must be empty 

160 prior to calling this method. 

161 

162 Parameters 

163 ---------- 

164 config : `RegistryConfig` or `str`, optional 

165 Registry configuration, if missing then default configuration will 

166 be loaded from registry.yaml. 

167 dimensionConfig : `DimensionConfig` or `str`, optional 

168 Dimensions configuration, if missing then default configuration 

169 will be loaded from dimensions.yaml. 

170 butlerRoot : convertible to `lsst.resources.ResourcePath`, optional 

171 Path to the repository root this `SqlRegistry` will manage. 

172 

173 Returns 

174 ------- 

175 registry : `SqlRegistry` 

176 A new `SqlRegistry` instance. 

177 """ 

178 config = cls.forceRegistryConfig(config) 

179 config.replaceRoot(butlerRoot) 

180 

181 if isinstance(dimensionConfig, str): 

182 dimensionConfig = DimensionConfig(dimensionConfig) 

183 elif dimensionConfig is None: 

184 dimensionConfig = DimensionConfig() 

185 elif not isinstance(dimensionConfig, DimensionConfig): 

186 raise TypeError(f"Incompatible Dimension configuration type: {type(dimensionConfig)}") 

187 

188 DatabaseClass = config.getDatabaseClass() 

189 database = DatabaseClass.fromUri( 

190 config.connectionString, origin=config.get("origin", 0), namespace=config.get("namespace") 

191 ) 

192 managerTypes = RegistryManagerTypes.fromConfig(config) 

193 managers = managerTypes.makeRepo(database, dimensionConfig) 

194 return cls(database, RegistryDefaults(), managers) 

195 

196 @classmethod 

197 def fromConfig( 

198 cls, 

199 config: ButlerConfig | RegistryConfig | Config | str, 

200 butlerRoot: ResourcePathExpression | None = None, 

201 writeable: bool = True, 

202 defaults: RegistryDefaults | None = None, 

203 ) -> SqlRegistry: 

204 """Create `Registry` subclass instance from `config`. 

205 

206 Registry database must be initialized prior to calling this method. 

207 

208 Parameters 

209 ---------- 

210 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str` 

211 Registry configuration. 

212 butlerRoot : `lsst.resources.ResourcePathExpression`, optional 

213 Path to the repository root this `Registry` will manage. 

214 writeable : `bool`, optional 

215 If `True` (default) create a read-write connection to the database. 

216 defaults : `RegistryDefaults`, optional 

217 Default collection search path and/or output `~CollectionType.RUN` 

218 collection. 

219 

220 Returns 

221 ------- 

222 registry : `SqlRegistry` 

223 A new `SqlRegistry` subclass instance. 

224 """ 

225 config = cls.forceRegistryConfig(config) 

226 config.replaceRoot(butlerRoot) 

227 DatabaseClass = config.getDatabaseClass() 

228 database = DatabaseClass.fromUri( 

229 config.connectionString, 

230 origin=config.get("origin", 0), 

231 namespace=config.get("namespace"), 

232 writeable=writeable, 

233 ) 

234 managerTypes = RegistryManagerTypes.fromConfig(config) 

235 with database.session(): 

236 managers = managerTypes.loadRepo(database) 

237 if defaults is None: 

238 defaults = RegistryDefaults() 

239 return cls(database, defaults, managers) 

240 

241 def __init__( 

242 self, 

243 database: Database, 

244 defaults: RegistryDefaults, 

245 managers: RegistryManagerInstances, 

246 ): 

247 self._db = database 

248 self._managers = managers 

249 self.storageClasses = StorageClassFactory() 

250 # This is public to SqlRegistry's internal-to-daf_butler callers, but 

251 # it is intentionally not part of RegistryShim. 

252 self.dimension_record_cache = DimensionRecordCache( 

253 self._managers.dimensions.universe, 

254 fetch=self._managers.dimensions.fetch_cache_dict, 

255 ) 

256 # Intentionally invoke property setter to initialize defaults. This 

257 # can only be done after most of the rest of Registry has already been 

258 # initialized, and must be done before the property getter is used. 

259 self.defaults = defaults 

260 # TODO: This is currently initialized by `make_datastore_tables`, 

261 # eventually we'll need to do it during construction. 

262 # The mapping is indexed by the opaque table name. 

263 self._datastore_record_classes: Mapping[str, type[StoredDatastoreItemInfo]] = {} 

264 

265 def __str__(self) -> str: 

266 return str(self._db) 

267 

268 def __repr__(self) -> str: 

269 return f"SqlRegistry({self._db!r}, {self.dimensions!r})" 

270 

271 def isWriteable(self) -> bool: 

272 """Return `True` if this registry allows write operations, and `False` 

273 otherwise. 

274 """ 

275 return self._db.isWriteable() 

276 

277 def copy(self, defaults: RegistryDefaults | None = None) -> SqlRegistry: 

278 """Create a new `SqlRegistry` backed by the same data repository 

279 and connection as this one, but independent defaults. 

280 

281 Parameters 

282 ---------- 

283 defaults : `~lsst.daf.butler.registry.RegistryDefaults`, optional 

284 Default collections and data ID values for the new registry. If 

285 not provided, ``self.defaults`` will be used (but future changes 

286 to either registry's defaults will not affect the other). 

287 

288 Returns 

289 ------- 

290 copy : `SqlRegistry` 

291 A new `SqlRegistry` instance with its own defaults. 

292 

293 Notes 

294 ----- 

295 Because the new registry shares a connection with the original, they 

296 also share transaction state (despite the fact that their `transaction` 

297 context manager methods do not reflect this), and must be used with 

298 care. 

299 """ 

300 if defaults is None: 

301 # No need to copy, because `RegistryDefaults` is immutable; we 

302 # effectively copy on write. 

303 defaults = self.defaults 

304 result = SqlRegistry(self._db, defaults, self._managers) 

305 result.dimension_record_cache.load_from(self.dimension_record_cache) 

306 return result 

307 

308 @property 

309 def dimensions(self) -> DimensionUniverse: 

310 """Definitions of all dimensions recognized by this `Registry` 

311 (`DimensionUniverse`). 

312 """ 

313 return self._managers.dimensions.universe 

314 

315 @property 

316 def defaults(self) -> RegistryDefaults: 

317 """Default collection search path and/or output `~CollectionType.RUN` 

318 collection (`~lsst.daf.butler.registry.RegistryDefaults`). 

319 

320 This is an immutable struct whose components may not be set 

321 individually, but the entire struct can be set by assigning to this 

322 property. 

323 """ 

324 return self._defaults 

325 

326 @defaults.setter 

327 def defaults(self, value: RegistryDefaults) -> None: 

328 if value.run is not None: 

329 self.registerRun(value.run) 

330 value.finish(self) 

331 self._defaults = value 

332 

333 def refresh(self) -> None: 

334 """Refresh all in-memory state by querying the database. 

335 

336 This may be necessary to enable querying for entities added by other 

337 registry instances after this one was constructed. 

338 """ 

339 self.dimension_record_cache.reset() 

340 with self._db.transaction(): 

341 self._managers.refresh() 

342 

343 def caching_context(self) -> contextlib.AbstractContextManager[None]: 

344 """Return context manager that enables caching. 

345 

346 Returns 

347 ------- 

348 manager 

349 A context manager that enables client-side caching. Entering 

350 the context returns `None`. 

351 """ 

352 return self._managers.caching_context_manager() 

353 

354 @contextlib.contextmanager 

355 def transaction(self, *, savepoint: bool = False) -> Iterator[None]: 

356 """Return a context manager that represents a transaction. 

357 

358 Parameters 

359 ---------- 

360 savepoint : `bool` 

361 Whether to issue a SAVEPOINT in the database. 

362 

363 Yields 

364 ------ 

365 `None` 

366 """ 

367 with self._db.transaction(savepoint=savepoint): 

368 yield 

369 

370 def resetConnectionPool(self) -> None: 

371 """Reset SQLAlchemy connection pool for `SqlRegistry` database. 

372 

373 This operation is useful when using registry with fork-based 

374 multiprocessing. To use registry across fork boundary one has to make 

375 sure that there are no currently active connections (no session or 

376 transaction is in progress) and connection pool is reset using this 

377 method. This method should be called by the child process immediately 

378 after the fork. 

379 """ 

380 self._db._engine.dispose() 

381 

382 def registerOpaqueTable(self, tableName: str, spec: ddl.TableSpec) -> None: 

383 """Add an opaque (to the `Registry`) table for use by a `Datastore` or 

384 other data repository client. 

385 

386 Opaque table records can be added via `insertOpaqueData`, retrieved via 

387 `fetchOpaqueData`, and removed via `deleteOpaqueData`. 

388 

389 Parameters 

390 ---------- 

391 tableName : `str` 

392 Logical name of the opaque table. This may differ from the 

393 actual name used in the database by a prefix and/or suffix. 

394 spec : `ddl.TableSpec` 

395 Specification for the table to be added. 

396 """ 

397 self._managers.opaque.register(tableName, spec) 

398 

399 @transactional 

400 def insertOpaqueData(self, tableName: str, *data: dict) -> None: 

401 """Insert records into an opaque table. 

402 

403 Parameters 

404 ---------- 

405 tableName : `str` 

406 Logical name of the opaque table. Must match the name used in a 

407 previous call to `registerOpaqueTable`. 

408 *data 

409 Each additional positional argument is a dictionary that represents 

410 a single row to be added. 

411 """ 

412 self._managers.opaque[tableName].insert(*data) 

413 

414 def fetchOpaqueData(self, tableName: str, **where: Any) -> Iterator[Mapping[str, Any]]: 

415 """Retrieve records from an opaque table. 

416 

417 Parameters 

418 ---------- 

419 tableName : `str` 

420 Logical name of the opaque table. Must match the name used in a 

421 previous call to `registerOpaqueTable`. 

422 **where 

423 Additional keyword arguments are interpreted as equality 

424 constraints that restrict the returned rows (combined with AND); 

425 keyword arguments are column names and values are the values they 

426 must have. 

427 

428 Yields 

429 ------ 

430 row : `dict` 

431 A dictionary representing a single result row. 

432 """ 

433 yield from self._managers.opaque[tableName].fetch(**where) 

434 

435 @transactional 

436 def deleteOpaqueData(self, tableName: str, **where: Any) -> None: 

437 """Remove records from an opaque table. 

438 

439 Parameters 

440 ---------- 

441 tableName : `str` 

442 Logical name of the opaque table. Must match the name used in a 

443 previous call to `registerOpaqueTable`. 

444 **where 

445 Additional keyword arguments are interpreted as equality 

446 constraints that restrict the deleted rows (combined with AND); 

447 keyword arguments are column names and values are the values they 

448 must have. 

449 """ 

450 self._managers.opaque[tableName].delete(where.keys(), where) 

451 

452 def registerCollection( 

453 self, name: str, type: CollectionType = CollectionType.TAGGED, doc: str | None = None 

454 ) -> bool: 

455 """Add a new collection if one with the given name does not exist. 

456 

457 Parameters 

458 ---------- 

459 name : `str` 

460 The name of the collection to create. 

461 type : `CollectionType` 

462 Enum value indicating the type of collection to create. 

463 doc : `str`, optional 

464 Documentation string for the collection. 

465 

466 Returns 

467 ------- 

468 registered : `bool` 

469 Boolean indicating whether the collection was already registered 

470 or was created by this call. 

471 

472 Notes 

473 ----- 

474 This method cannot be called within transactions, as it needs to be 

475 able to perform its own transaction to be concurrent. 

476 """ 

477 _, registered = self._managers.collections.register(name, type, doc=doc) 

478 return registered 

479 

480 def getCollectionType(self, name: str) -> CollectionType: 

481 """Return an enumeration value indicating the type of the given 

482 collection. 

483 

484 Parameters 

485 ---------- 

486 name : `str` 

487 The name of the collection. 

488 

489 Returns 

490 ------- 

491 type : `CollectionType` 

492 Enum value indicating the type of this collection. 

493 

494 Raises 

495 ------ 

496 lsst.daf.butler.registry.MissingCollectionError 

497 Raised if no collection with the given name exists. 

498 """ 

499 return self._managers.collections.find(name).type 

500 

501 def _get_collection_record(self, name: str) -> CollectionRecord: 

502 """Return the record for this collection. 

503 

504 Parameters 

505 ---------- 

506 name : `str` 

507 Name of the collection for which the record is to be retrieved. 

508 

509 Returns 

510 ------- 

511 record : `CollectionRecord` 

512 The record for this collection. 

513 """ 

514 return self._managers.collections.find(name) 

515 

516 def registerRun(self, name: str, doc: str | None = None) -> bool: 

517 """Add a new run if one with the given name does not exist. 

518 

519 Parameters 

520 ---------- 

521 name : `str` 

522 The name of the run to create. 

523 doc : `str`, optional 

524 Documentation string for the collection. 

525 

526 Returns 

527 ------- 

528 registered : `bool` 

529 Boolean indicating whether a new run was registered. `False` 

530 if it already existed. 

531 

532 Notes 

533 ----- 

534 This method cannot be called within transactions, as it needs to be 

535 able to perform its own transaction to be concurrent. 

536 """ 

537 _, registered = self._managers.collections.register(name, CollectionType.RUN, doc=doc) 

538 return registered 

539 

540 @transactional 

541 def removeCollection(self, name: str) -> None: 

542 """Remove the given collection from the registry. 

543 

544 Parameters 

545 ---------- 

546 name : `str` 

547 The name of the collection to remove. 

548 

549 Raises 

550 ------ 

551 lsst.daf.butler.registry.MissingCollectionError 

552 Raised if no collection with the given name exists. 

553 sqlalchemy.exc.IntegrityError 

554 Raised if the database rows associated with the collection are 

555 still referenced by some other table, such as a dataset in a 

556 datastore (for `~CollectionType.RUN` collections only) or a 

557 `~CollectionType.CHAINED` collection of which this collection is 

558 a child. 

559 

560 Notes 

561 ----- 

562 If this is a `~CollectionType.RUN` collection, all datasets and quanta 

563 in it will removed from the `Registry` database. This requires that 

564 those datasets be removed (or at least trashed) from any datastores 

565 that hold them first. 

566 

567 A collection may not be deleted as long as it is referenced by a 

568 `~CollectionType.CHAINED` collection; the ``CHAINED`` collection must 

569 be deleted or redefined first. 

570 """ 

571 self._managers.collections.remove(name) 

572 

573 def getCollectionChain(self, parent: str) -> tuple[str, ...]: 

574 """Return the child collections in a `~CollectionType.CHAINED` 

575 collection. 

576 

577 Parameters 

578 ---------- 

579 parent : `str` 

580 Name of the chained collection. Must have already been added via 

581 a call to `Registry.registerCollection`. 

582 

583 Returns 

584 ------- 

585 children : `~collections.abc.Sequence` [ `str` ] 

586 An ordered sequence of collection names that are searched when the 

587 given chained collection is searched. 

588 

589 Raises 

590 ------ 

591 lsst.daf.butler.registry.MissingCollectionError 

592 Raised if ``parent`` does not exist in the `Registry`. 

593 lsst.daf.butler.registry.CollectionTypeError 

594 Raised if ``parent`` does not correspond to a 

595 `~CollectionType.CHAINED` collection. 

596 """ 

597 record = self._managers.collections.find(parent) 

598 if record.type is not CollectionType.CHAINED: 

599 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

600 assert isinstance(record, ChainedCollectionRecord) 

601 return record.children 

602 

603 @transactional 

604 def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None: 

605 """Define or redefine a `~CollectionType.CHAINED` collection. 

606 

607 Parameters 

608 ---------- 

609 parent : `str` 

610 Name of the chained collection. Must have already been added via 

611 a call to `Registry.registerCollection`. 

612 children : collection expression 

613 An expression defining an ordered search of child collections, 

614 generally an iterable of `str`; see 

615 :ref:`daf_butler_collection_expressions` for more information. 

616 flatten : `bool`, optional 

617 If `True` (`False` is default), recursively flatten out any nested 

618 `~CollectionType.CHAINED` collections in ``children`` first. 

619 

620 Raises 

621 ------ 

622 lsst.daf.butler.registry.MissingCollectionError 

623 Raised when any of the given collections do not exist in the 

624 `Registry`. 

625 lsst.daf.butler.registry.CollectionTypeError 

626 Raised if ``parent`` does not correspond to a 

627 `~CollectionType.CHAINED` collection. 

628 ValueError 

629 Raised if the given collections contains a cycle. 

630 """ 

631 record = self._managers.collections.find(parent) 

632 if record.type is not CollectionType.CHAINED: 

633 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

634 assert isinstance(record, ChainedCollectionRecord) 

635 children = CollectionWildcard.from_expression(children).require_ordered() 

636 if children != record.children or flatten: 

637 self._managers.collections.update_chain(record, children, flatten=flatten) 

638 

639 def getCollectionParentChains(self, collection: str) -> set[str]: 

640 """Return the CHAINED collections that directly contain the given one. 

641 

642 Parameters 

643 ---------- 

644 collection : `str` 

645 Name of the collection. 

646 

647 Returns 

648 ------- 

649 chains : `set` of `str` 

650 Set of `~CollectionType.CHAINED` collection names. 

651 """ 

652 return self._managers.collections.getParentChains(self._managers.collections.find(collection).key) 

653 

654 def getCollectionDocumentation(self, collection: str) -> str | None: 

655 """Retrieve the documentation string for a collection. 

656 

657 Parameters 

658 ---------- 

659 collection : `str` 

660 Name of the collection. 

661 

662 Returns 

663 ------- 

664 docs : `str` or `None` 

665 Docstring for the collection with the given name. 

666 """ 

667 return self._managers.collections.getDocumentation(self._managers.collections.find(collection).key) 

668 

669 def setCollectionDocumentation(self, collection: str, doc: str | None) -> None: 

670 """Set the documentation string for a collection. 

671 

672 Parameters 

673 ---------- 

674 collection : `str` 

675 Name of the collection. 

676 doc : `str` or `None` 

677 Docstring for the collection with the given name; will replace any 

678 existing docstring. Passing `None` will remove any existing 

679 docstring. 

680 """ 

681 self._managers.collections.setDocumentation(self._managers.collections.find(collection).key, doc) 

682 

683 def getCollectionSummary(self, collection: str) -> CollectionSummary: 

684 """Return a summary for the given collection. 

685 

686 Parameters 

687 ---------- 

688 collection : `str` 

689 Name of the collection for which a summary is to be retrieved. 

690 

691 Returns 

692 ------- 

693 summary : `~lsst.daf.butler.registry.CollectionSummary` 

694 Summary of the dataset types and governor dimension values in 

695 this collection. 

696 """ 

697 record = self._managers.collections.find(collection) 

698 return self._managers.datasets.getCollectionSummary(record) 

699 

700 def registerDatasetType(self, datasetType: DatasetType) -> bool: 

701 """Add a new `DatasetType` to the Registry. 

702 

703 It is not an error to register the same `DatasetType` twice. 

704 

705 Parameters 

706 ---------- 

707 datasetType : `DatasetType` 

708 The `DatasetType` to be added. 

709 

710 Returns 

711 ------- 

712 inserted : `bool` 

713 `True` if ``datasetType`` was inserted, `False` if an identical 

714 existing `DatasetType` was found. Note that in either case the 

715 DatasetType is guaranteed to be defined in the Registry 

716 consistently with the given definition. 

717 

718 Raises 

719 ------ 

720 ValueError 

721 Raised if the dimensions or storage class are invalid. 

722 lsst.daf.butler.registry.ConflictingDefinitionError 

723 Raised if this `DatasetType` is already registered with a different 

724 definition. 

725 

726 Notes 

727 ----- 

728 This method cannot be called within transactions, as it needs to be 

729 able to perform its own transaction to be concurrent. 

730 """ 

731 return self._managers.datasets.register(datasetType) 

732 

733 def removeDatasetType(self, name: str | tuple[str, ...]) -> None: 

734 """Remove the named `DatasetType` from the registry. 

735 

736 .. warning:: 

737 

738 Registry implementations can cache the dataset type definitions. 

739 This means that deleting the dataset type definition may result in 

740 unexpected behavior from other butler processes that are active 

741 that have not seen the deletion. 

742 

743 Parameters 

744 ---------- 

745 name : `str` or `tuple` [`str`] 

746 Name of the type to be removed or tuple containing a list of type 

747 names to be removed. Wildcards are allowed. 

748 

749 Raises 

750 ------ 

751 lsst.daf.butler.registry.OrphanedRecordError 

752 Raised if an attempt is made to remove the dataset type definition 

753 when there are already datasets associated with it. 

754 

755 Notes 

756 ----- 

757 If the dataset type is not registered the method will return without 

758 action. 

759 """ 

760 for datasetTypeExpression in ensure_iterable(name): 

761 # Catch any warnings from the caller specifying a component 

762 # dataset type. This will result in an error later but the 

763 # warning could be confusing when the caller is not querying 

764 # anything. 

765 with warnings.catch_warnings(): 

766 warnings.simplefilter("ignore", category=FutureWarning) 

767 datasetTypes = list(self.queryDatasetTypes(datasetTypeExpression)) 

768 if not datasetTypes: 

769 _LOG.info("Dataset type %r not defined", datasetTypeExpression) 

770 else: 

771 for datasetType in datasetTypes: 

772 self._managers.datasets.remove(datasetType.name) 

773 _LOG.info("Removed dataset type %r", datasetType.name) 

774 

775 def getDatasetType(self, name: str) -> DatasetType: 

776 """Get the `DatasetType`. 

777 

778 Parameters 

779 ---------- 

780 name : `str` 

781 Name of the type. 

782 

783 Returns 

784 ------- 

785 type : `DatasetType` 

786 The `DatasetType` associated with the given name. 

787 

788 Raises 

789 ------ 

790 lsst.daf.butler.registry.MissingDatasetTypeError 

791 Raised if the requested dataset type has not been registered. 

792 

793 Notes 

794 ----- 

795 This method handles component dataset types automatically, though most 

796 other registry operations do not. 

797 """ 

798 parent_name, component = DatasetType.splitDatasetTypeName(name) 

799 storage = self._managers.datasets[parent_name] 

800 if component is None: 

801 return storage.datasetType 

802 else: 

803 return storage.datasetType.makeComponentDatasetType(component) 

804 

805 def supportsIdGenerationMode(self, mode: DatasetIdGenEnum) -> bool: 

806 """Test whether the given dataset ID generation mode is supported by 

807 `insertDatasets`. 

808 

809 Parameters 

810 ---------- 

811 mode : `DatasetIdGenEnum` 

812 Enum value for the mode to test. 

813 

814 Returns 

815 ------- 

816 supported : `bool` 

817 Whether the given mode is supported. 

818 """ 

819 return self._managers.datasets.supportsIdGenerationMode(mode) 

820 

821 def findDataset( 

822 self, 

823 datasetType: DatasetType | str, 

824 dataId: DataId | None = None, 

825 *, 

826 collections: CollectionArgType | None = None, 

827 timespan: Timespan | None = None, 

828 datastore_records: bool = False, 

829 **kwargs: Any, 

830 ) -> DatasetRef | None: 

831 """Find a dataset given its `DatasetType` and data ID. 

832 

833 This can be used to obtain a `DatasetRef` that permits the dataset to 

834 be read from a `Datastore`. If the dataset is a component and can not 

835 be found using the provided dataset type, a dataset ref for the parent 

836 will be returned instead but with the correct dataset type. 

837 

838 Parameters 

839 ---------- 

840 datasetType : `DatasetType` or `str` 

841 A `DatasetType` or the name of one. If this is a `DatasetType` 

842 instance, its storage class will be respected and propagated to 

843 the output, even if it differs from the dataset type definition 

844 in the registry, as long as the storage classes are convertible. 

845 dataId : `dict` or `DataCoordinate`, optional 

846 A `dict`-like object containing the `Dimension` links that identify 

847 the dataset within a collection. 

848 collections : collection expression, optional 

849 An expression that fully or partially identifies the collections to 

850 search for the dataset; see 

851 :ref:`daf_butler_collection_expressions` for more information. 

852 Defaults to ``self.defaults.collections``. 

853 timespan : `Timespan`, optional 

854 A timespan that the validity range of the dataset must overlap. 

855 If not provided, any `~CollectionType.CALIBRATION` collections 

856 matched by the ``collections`` argument will not be searched. 

857 datastore_records : `bool`, optional 

858 Whether to attach datastore records to the `DatasetRef`. 

859 **kwargs 

860 Additional keyword arguments passed to 

861 `DataCoordinate.standardize` to convert ``dataId`` to a true 

862 `DataCoordinate` or augment an existing one. 

863 

864 Returns 

865 ------- 

866 ref : `DatasetRef` 

867 A reference to the dataset, or `None` if no matching Dataset 

868 was found. 

869 

870 Raises 

871 ------ 

872 lsst.daf.butler.registry.NoDefaultCollectionError 

873 Raised if ``collections`` is `None` and 

874 ``self.defaults.collections`` is `None`. 

875 LookupError 

876 Raised if one or more data ID keys are missing. 

877 lsst.daf.butler.registry.MissingDatasetTypeError 

878 Raised if the dataset type does not exist. 

879 lsst.daf.butler.registry.MissingCollectionError 

880 Raised if any of ``collections`` does not exist in the registry. 

881 

882 Notes 

883 ----- 

884 This method simply returns `None` and does not raise an exception even 

885 when the set of collections searched is intrinsically incompatible with 

886 the dataset type, e.g. if ``datasetType.isCalibration() is False``, but 

887 only `~CollectionType.CALIBRATION` collections are being searched. 

888 This may make it harder to debug some lookup failures, but the behavior 

889 is intentional; we consider it more important that failed searches are 

890 reported consistently, regardless of the reason, and that adding 

891 additional collections that do not contain a match to the search path 

892 never changes the behavior. 

893 

894 This method handles component dataset types automatically, though most 

895 other registry operations do not. 

896 """ 

897 if collections is None: 

898 if not self.defaults.collections: 

899 raise NoDefaultCollectionError( 

900 "No collections provided to findDataset, and no defaults from registry construction." 

901 ) 

902 collections = self.defaults.collections 

903 backend = queries.SqlQueryBackend(self._db, self._managers, self.dimension_record_cache) 

904 with backend.caching_context(): 

905 collection_wildcard = CollectionWildcard.from_expression(collections, require_ordered=True) 

906 if collection_wildcard.empty(): 

907 return None 

908 matched_collections = backend.resolve_collection_wildcard(collection_wildcard) 

909 resolved_dataset_type = backend.resolve_single_dataset_type_wildcard(datasetType) 

910 dataId = DataCoordinate.standardize( 

911 dataId, 

912 dimensions=resolved_dataset_type.dimensions, 

913 universe=self.dimensions, 

914 defaults=self.defaults.dataId, 

915 **kwargs, 

916 ) 

917 governor_constraints = {name: {cast(str, dataId[name])} for name in dataId.dimensions.governors} 

918 (filtered_collections,) = backend.filter_dataset_collections( 

919 [resolved_dataset_type], 

920 matched_collections, 

921 governor_constraints=governor_constraints, 

922 ).values() 

923 if not filtered_collections: 

924 return None 

925 if timespan is None: 

926 filtered_collections = [ 

927 collection_record 

928 for collection_record in filtered_collections 

929 if collection_record.type is not CollectionType.CALIBRATION 

930 ] 

931 if filtered_collections: 

932 requested_columns = {"dataset_id", "run", "collection"} 

933 with backend.context() as context: 

934 predicate = context.make_data_coordinate_predicate( 

935 dataId.subset(resolved_dataset_type.dimensions), full=False 

936 ) 

937 if timespan is not None: 

938 requested_columns.add("timespan") 

939 predicate = predicate.logical_and( 

940 context.make_timespan_overlap_predicate( 

941 DatasetColumnTag(resolved_dataset_type.name, "timespan"), timespan 

942 ) 

943 ) 

944 relation = backend.make_dataset_query_relation( 

945 resolved_dataset_type, filtered_collections, requested_columns, context 

946 ).with_rows_satisfying(predicate) 

947 rows = list(context.fetch_iterable(relation)) 

948 else: 

949 rows = [] 

950 if not rows: 

951 return None 

952 elif len(rows) == 1: 

953 best_row = rows[0] 

954 else: 

955 rank_by_collection_key = {record.key: n for n, record in enumerate(filtered_collections)} 

956 collection_tag = DatasetColumnTag(resolved_dataset_type.name, "collection") 

957 row_iter = iter(rows) 

958 best_row = next(row_iter) 

959 best_rank = rank_by_collection_key[best_row[collection_tag]] 

960 have_tie = False 

961 for row in row_iter: 

962 if (rank := rank_by_collection_key[row[collection_tag]]) < best_rank: 

963 best_row = row 

964 best_rank = rank 

965 have_tie = False 

966 elif rank == best_rank: 

967 have_tie = True 

968 assert timespan is not None, "Rank ties should be impossible given DB constraints." 

969 if have_tie: 

970 raise LookupError( 

971 f"Ambiguous calibration lookup for {resolved_dataset_type.name} in collections " 

972 f"{collection_wildcard.strings} with timespan {timespan}." 

973 ) 

974 reader = queries.DatasetRefReader( 

975 resolved_dataset_type, 

976 translate_collection=lambda k: self._managers.collections[k].name, 

977 ) 

978 ref = reader.read(best_row, data_id=dataId) 

979 if datastore_records: 

980 ref = self.get_datastore_records(ref) 

981 

982 return ref 

983 

984 @transactional 

985 def insertDatasets( 

986 self, 

987 datasetType: DatasetType | str, 

988 dataIds: Iterable[DataId], 

989 run: str | None = None, 

990 expand: bool = True, 

991 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

992 ) -> list[DatasetRef]: 

993 """Insert one or more datasets into the `Registry`. 

994 

995 This always adds new datasets; to associate existing datasets with 

996 a new collection, use ``associate``. 

997 

998 Parameters 

999 ---------- 

1000 datasetType : `DatasetType` or `str` 

1001 A `DatasetType` or the name of one. 

1002 dataIds : `~collections.abc.Iterable` of `dict` or `DataCoordinate` 

1003 Dimension-based identifiers for the new datasets. 

1004 run : `str`, optional 

1005 The name of the run that produced the datasets. Defaults to 

1006 ``self.defaults.run``. 

1007 expand : `bool`, optional 

1008 If `True` (default), expand data IDs as they are inserted. This is 

1009 necessary in general to allow datastore to generate file templates, 

1010 but it may be disabled if the caller can guarantee this is 

1011 unnecessary. 

1012 idGenerationMode : `DatasetIdGenEnum`, optional 

1013 Specifies option for generating dataset IDs. By default unique IDs 

1014 are generated for each inserted dataset. 

1015 

1016 Returns 

1017 ------- 

1018 refs : `list` of `DatasetRef` 

1019 Resolved `DatasetRef` instances for all given data IDs (in the same 

1020 order). 

1021 

1022 Raises 

1023 ------ 

1024 lsst.daf.butler.registry.DatasetTypeError 

1025 Raised if ``datasetType`` is not known to registry. 

1026 lsst.daf.butler.registry.CollectionTypeError 

1027 Raised if ``run`` collection type is not `~CollectionType.RUN`. 

1028 lsst.daf.butler.registry.NoDefaultCollectionError 

1029 Raised if ``run`` is `None` and ``self.defaults.run`` is `None`. 

1030 lsst.daf.butler.registry.ConflictingDefinitionError 

1031 If a dataset with the same dataset type and data ID as one of those 

1032 given already exists in ``run``. 

1033 lsst.daf.butler.registry.MissingCollectionError 

1034 Raised if ``run`` does not exist in the registry. 

1035 """ 

1036 if isinstance(datasetType, DatasetType): 

1037 storage = self._managers.datasets.find(datasetType.name) 

1038 if storage is None: 

1039 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.") 

1040 else: 

1041 storage = self._managers.datasets.find(datasetType) 

1042 if storage is None: 

1043 raise DatasetTypeError(f"DatasetType with name '{datasetType}' has not been registered.") 

1044 if run is None: 

1045 if self.defaults.run is None: 

1046 raise NoDefaultCollectionError( 

1047 "No run provided to insertDatasets, and no default from registry construction." 

1048 ) 

1049 run = self.defaults.run 

1050 runRecord = self._managers.collections.find(run) 

1051 if runRecord.type is not CollectionType.RUN: 

1052 raise CollectionTypeError( 

1053 f"Given collection is of type {runRecord.type.name}; RUN collection required." 

1054 ) 

1055 assert isinstance(runRecord, RunRecord) 

1056 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG) 

1057 if expand: 

1058 expandedDataIds = [ 

1059 self.expandDataId(dataId, dimensions=storage.datasetType.dimensions) 

1060 for dataId in progress.wrap(dataIds, f"Expanding {storage.datasetType.name} data IDs") 

1061 ] 

1062 else: 

1063 expandedDataIds = [ 

1064 DataCoordinate.standardize(dataId, graph=storage.datasetType.dimensions) for dataId in dataIds 

1065 ] 

1066 try: 

1067 refs = list(storage.insert(runRecord, expandedDataIds, idGenerationMode)) 

1068 if self._managers.obscore: 

1069 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

1070 self._managers.obscore.add_datasets(refs, context) 

1071 except sqlalchemy.exc.IntegrityError as err: 

1072 raise ConflictingDefinitionError( 

1073 "A database constraint failure was triggered by inserting " 

1074 f"one or more datasets of type {storage.datasetType} into " 

1075 f"collection '{run}'. " 

1076 "This probably means a dataset with the same data ID " 

1077 "and dataset type already exists, but it may also mean a " 

1078 "dimension row is missing." 

1079 ) from err 

1080 return refs 

1081 

1082 @transactional 

1083 def _importDatasets( 

1084 self, 

1085 datasets: Iterable[DatasetRef], 

1086 expand: bool = True, 

1087 ) -> list[DatasetRef]: 

1088 """Import one or more datasets into the `Registry`. 

1089 

1090 Difference from `insertDatasets` method is that this method accepts 

1091 `DatasetRef` instances which should already be resolved and have a 

1092 dataset ID. If registry supports globally-unique dataset IDs (e.g. 

1093 `uuid.UUID`) then datasets which already exist in the registry will be 

1094 ignored if imported again. 

1095 

1096 Parameters 

1097 ---------- 

1098 datasets : `~collections.abc.Iterable` of `DatasetRef` 

1099 Datasets to be inserted. All `DatasetRef` instances must have 

1100 identical ``datasetType`` and ``run`` attributes. ``run`` 

1101 attribute can be `None` and defaults to ``self.defaults.run``. 

1102 Datasets can specify ``id`` attribute which will be used for 

1103 inserted datasets. All dataset IDs must have the same type 

1104 (`int` or `uuid.UUID`), if type of dataset IDs does not match 

1105 configured backend then IDs will be ignored and new IDs will be 

1106 generated by backend. 

1107 expand : `bool`, optional 

1108 If `True` (default), expand data IDs as they are inserted. This is 

1109 necessary in general, but it may be disabled if the caller can 

1110 guarantee this is unnecessary. 

1111 

1112 Returns 

1113 ------- 

1114 refs : `list` of `DatasetRef` 

1115 Resolved `DatasetRef` instances for all given data IDs (in the same 

1116 order). If any of ``datasets`` has an ID which already exists in 

1117 the database then it will not be inserted or updated, but a 

1118 resolved `DatasetRef` will be returned for it in any case. 

1119 

1120 Raises 

1121 ------ 

1122 lsst.daf.butler.registry.NoDefaultCollectionError 

1123 Raised if ``run`` is `None` and ``self.defaults.run`` is `None`. 

1124 lsst.daf.butler.registry.DatasetTypeError 

1125 Raised if datasets correspond to more than one dataset type or 

1126 dataset type is not known to registry. 

1127 lsst.daf.butler.registry.ConflictingDefinitionError 

1128 If a dataset with the same dataset type and data ID as one of those 

1129 given already exists in ``run``. 

1130 lsst.daf.butler.registry.MissingCollectionError 

1131 Raised if ``run`` does not exist in the registry. 

1132 

1133 Notes 

1134 ----- 

1135 This method is considered package-private and internal to Butler 

1136 implementation. Clients outside daf_butler package should not use this 

1137 method. 

1138 """ 

1139 datasets = list(datasets) 

1140 if not datasets: 

1141 # nothing to do 

1142 return [] 

1143 

1144 # find dataset type 

1145 datasetTypes = {dataset.datasetType for dataset in datasets} 

1146 if len(datasetTypes) != 1: 

1147 raise DatasetTypeError(f"Multiple dataset types in input datasets: {datasetTypes}") 

1148 datasetType = datasetTypes.pop() 

1149 

1150 # get storage handler for this dataset type 

1151 storage = self._managers.datasets.find(datasetType.name) 

1152 if storage is None: 

1153 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.") 

1154 

1155 # find run name 

1156 runs = {dataset.run for dataset in datasets} 

1157 if len(runs) != 1: 

1158 raise ValueError(f"Multiple run names in input datasets: {runs}") 

1159 run = runs.pop() 

1160 

1161 runRecord = self._managers.collections.find(run) 

1162 if runRecord.type is not CollectionType.RUN: 

1163 raise CollectionTypeError( 

1164 f"Given collection '{runRecord.name}' is of type {runRecord.type.name};" 

1165 " RUN collection required." 

1166 ) 

1167 assert isinstance(runRecord, RunRecord) 

1168 

1169 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG) 

1170 if expand: 

1171 expandedDatasets = [ 

1172 dataset.expanded(self.expandDataId(dataset.dataId, dimensions=storage.datasetType.dimensions)) 

1173 for dataset in progress.wrap(datasets, f"Expanding {storage.datasetType.name} data IDs") 

1174 ] 

1175 else: 

1176 expandedDatasets = [ 

1177 DatasetRef(datasetType, dataset.dataId, id=dataset.id, run=dataset.run, conform=True) 

1178 for dataset in datasets 

1179 ] 

1180 

1181 try: 

1182 refs = list(storage.import_(runRecord, expandedDatasets)) 

1183 if self._managers.obscore: 

1184 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

1185 self._managers.obscore.add_datasets(refs, context) 

1186 except sqlalchemy.exc.IntegrityError as err: 

1187 raise ConflictingDefinitionError( 

1188 "A database constraint failure was triggered by inserting " 

1189 f"one or more datasets of type {storage.datasetType} into " 

1190 f"collection '{run}'. " 

1191 "This probably means a dataset with the same data ID " 

1192 "and dataset type already exists, but it may also mean a " 

1193 "dimension row is missing." 

1194 ) from err 

1195 # Check that imported dataset IDs match the input 

1196 for imported_ref, input_ref in zip(refs, datasets, strict=True): 

1197 if imported_ref.id != input_ref.id: 

1198 raise RegistryConsistencyError( 

1199 "Imported dataset ID differs from input dataset ID, " 

1200 f"input ref: {input_ref}, imported ref: {imported_ref}" 

1201 ) 

1202 return refs 

1203 

1204 def getDataset(self, id: DatasetId) -> DatasetRef | None: 

1205 """Retrieve a Dataset entry. 

1206 

1207 Parameters 

1208 ---------- 

1209 id : `DatasetId` 

1210 The unique identifier for the dataset. 

1211 

1212 Returns 

1213 ------- 

1214 ref : `DatasetRef` or `None` 

1215 A ref to the Dataset, or `None` if no matching Dataset 

1216 was found. 

1217 """ 

1218 return self._managers.datasets.getDatasetRef(id) 

1219 

1220 @transactional 

1221 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None: 

1222 """Remove datasets from the Registry. 

1223 

1224 The datasets will be removed unconditionally from all collections, and 

1225 any `Quantum` that consumed this dataset will instead be marked with 

1226 having a NULL input. `Datastore` records will *not* be deleted; the 

1227 caller is responsible for ensuring that the dataset has already been 

1228 removed from all Datastores. 

1229 

1230 Parameters 

1231 ---------- 

1232 refs : `~collections.abc.Iterable` [`DatasetRef`] 

1233 References to the datasets to be removed. Must include a valid 

1234 ``id`` attribute, and should be considered invalidated upon return. 

1235 

1236 Raises 

1237 ------ 

1238 lsst.daf.butler.AmbiguousDatasetError 

1239 Raised if any ``ref.id`` is `None`. 

1240 lsst.daf.butler.registry.OrphanedRecordError 

1241 Raised if any dataset is still present in any `Datastore`. 

1242 """ 

1243 progress = Progress("lsst.daf.butler.Registry.removeDatasets", level=logging.DEBUG) 

1244 for datasetType, refsForType in progress.iter_item_chunks( 

1245 DatasetRef.iter_by_type(refs), desc="Removing datasets by type" 

1246 ): 

1247 storage = self._managers.datasets[datasetType.name] 

1248 try: 

1249 storage.delete(refsForType) 

1250 except sqlalchemy.exc.IntegrityError as err: 

1251 raise OrphanedRecordError( 

1252 "One or more datasets is still present in one or more Datastores." 

1253 ) from err 

1254 

1255 @transactional 

1256 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

1257 """Add existing datasets to a `~CollectionType.TAGGED` collection. 

1258 

1259 If a DatasetRef with the same exact ID is already in a collection 

1260 nothing is changed. If a `DatasetRef` with the same `DatasetType` and 

1261 data ID but with different ID exists in the collection, 

1262 `~lsst.daf.butler.registry.ConflictingDefinitionError` is raised. 

1263 

1264 Parameters 

1265 ---------- 

1266 collection : `str` 

1267 Indicates the collection the datasets should be associated with. 

1268 refs : `~collections.abc.Iterable` [ `DatasetRef` ] 

1269 An iterable of resolved `DatasetRef` instances that already exist 

1270 in this `Registry`. 

1271 

1272 Raises 

1273 ------ 

1274 lsst.daf.butler.registry.ConflictingDefinitionError 

1275 If a Dataset with the given `DatasetRef` already exists in the 

1276 given collection. 

1277 lsst.daf.butler.registry.MissingCollectionError 

1278 Raised if ``collection`` does not exist in the registry. 

1279 lsst.daf.butler.registry.CollectionTypeError 

1280 Raise adding new datasets to the given ``collection`` is not 

1281 allowed. 

1282 """ 

1283 progress = Progress("lsst.daf.butler.Registry.associate", level=logging.DEBUG) 

1284 collectionRecord = self._managers.collections.find(collection) 

1285 if collectionRecord.type is not CollectionType.TAGGED: 

1286 raise CollectionTypeError( 

1287 f"Collection '{collection}' has type {collectionRecord.type.name}, not TAGGED." 

1288 ) 

1289 for datasetType, refsForType in progress.iter_item_chunks( 

1290 DatasetRef.iter_by_type(refs), desc="Associating datasets by type" 

1291 ): 

1292 storage = self._managers.datasets[datasetType.name] 

1293 try: 

1294 storage.associate(collectionRecord, refsForType) 

1295 if self._managers.obscore: 

1296 # If a TAGGED collection is being monitored by ObsCore 

1297 # manager then we may need to save the dataset. 

1298 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

1299 self._managers.obscore.associate(refsForType, collectionRecord, context) 

1300 except sqlalchemy.exc.IntegrityError as err: 

1301 raise ConflictingDefinitionError( 

1302 f"Constraint violation while associating dataset of type {datasetType.name} with " 

1303 f"collection {collection}. This probably means that one or more datasets with the same " 

1304 "dataset type and data ID already exist in the collection, but it may also indicate " 

1305 "that the datasets do not exist." 

1306 ) from err 

1307 

1308 @transactional 

1309 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

1310 """Remove existing datasets from a `~CollectionType.TAGGED` collection. 

1311 

1312 ``collection`` and ``ref`` combinations that are not currently 

1313 associated are silently ignored. 

1314 

1315 Parameters 

1316 ---------- 

1317 collection : `str` 

1318 The collection the datasets should no longer be associated with. 

1319 refs : `~collections.abc.Iterable` [ `DatasetRef` ] 

1320 An iterable of resolved `DatasetRef` instances that already exist 

1321 in this `Registry`. 

1322 

1323 Raises 

1324 ------ 

1325 lsst.daf.butler.AmbiguousDatasetError 

1326 Raised if any of the given dataset references is unresolved. 

1327 lsst.daf.butler.registry.MissingCollectionError 

1328 Raised if ``collection`` does not exist in the registry. 

1329 lsst.daf.butler.registry.CollectionTypeError 

1330 Raise adding new datasets to the given ``collection`` is not 

1331 allowed. 

1332 """ 

1333 progress = Progress("lsst.daf.butler.Registry.disassociate", level=logging.DEBUG) 

1334 collectionRecord = self._managers.collections.find(collection) 

1335 if collectionRecord.type is not CollectionType.TAGGED: 

1336 raise CollectionTypeError( 

1337 f"Collection '{collection}' has type {collectionRecord.type.name}; expected TAGGED." 

1338 ) 

1339 for datasetType, refsForType in progress.iter_item_chunks( 

1340 DatasetRef.iter_by_type(refs), desc="Disassociating datasets by type" 

1341 ): 

1342 storage = self._managers.datasets[datasetType.name] 

1343 storage.disassociate(collectionRecord, refsForType) 

1344 if self._managers.obscore: 

1345 self._managers.obscore.disassociate(refsForType, collectionRecord) 

1346 

1347 @transactional 

1348 def certify(self, collection: str, refs: Iterable[DatasetRef], timespan: Timespan) -> None: 

1349 """Associate one or more datasets with a calibration collection and a 

1350 validity range within it. 

1351 

1352 Parameters 

1353 ---------- 

1354 collection : `str` 

1355 The name of an already-registered `~CollectionType.CALIBRATION` 

1356 collection. 

1357 refs : `~collections.abc.Iterable` [ `DatasetRef` ] 

1358 Datasets to be associated. 

1359 timespan : `Timespan` 

1360 The validity range for these datasets within the collection. 

1361 

1362 Raises 

1363 ------ 

1364 lsst.daf.butler.AmbiguousDatasetError 

1365 Raised if any of the given `DatasetRef` instances is unresolved. 

1366 lsst.daf.butler.registry.ConflictingDefinitionError 

1367 Raised if the collection already contains a different dataset with 

1368 the same `DatasetType` and data ID and an overlapping validity 

1369 range. 

1370 lsst.daf.butler.registry.CollectionTypeError 

1371 Raised if ``collection`` is not a `~CollectionType.CALIBRATION` 

1372 collection or if one or more datasets are of a dataset type for 

1373 which `DatasetType.isCalibration` returns `False`. 

1374 """ 

1375 progress = Progress("lsst.daf.butler.Registry.certify", level=logging.DEBUG) 

1376 collectionRecord = self._managers.collections.find(collection) 

1377 for datasetType, refsForType in progress.iter_item_chunks( 

1378 DatasetRef.iter_by_type(refs), desc="Certifying datasets by type" 

1379 ): 

1380 storage = self._managers.datasets[datasetType.name] 

1381 storage.certify( 

1382 collectionRecord, 

1383 refsForType, 

1384 timespan, 

1385 context=queries.SqlQueryContext(self._db, self._managers.column_types), 

1386 ) 

1387 

1388 @transactional 

1389 def decertify( 

1390 self, 

1391 collection: str, 

1392 datasetType: str | DatasetType, 

1393 timespan: Timespan, 

1394 *, 

1395 dataIds: Iterable[DataId] | None = None, 

1396 ) -> None: 

1397 """Remove or adjust datasets to clear a validity range within a 

1398 calibration collection. 

1399 

1400 Parameters 

1401 ---------- 

1402 collection : `str` 

1403 The name of an already-registered `~CollectionType.CALIBRATION` 

1404 collection. 

1405 datasetType : `str` or `DatasetType` 

1406 Name or `DatasetType` instance for the datasets to be decertified. 

1407 timespan : `Timespan`, optional 

1408 The validity range to remove datasets from within the collection. 

1409 Datasets that overlap this range but are not contained by it will 

1410 have their validity ranges adjusted to not overlap it, which may 

1411 split a single dataset validity range into two. 

1412 dataIds : iterable [`dict` or `DataCoordinate`], optional 

1413 Data IDs that should be decertified within the given validity range 

1414 If `None`, all data IDs for ``self.datasetType`` will be 

1415 decertified. 

1416 

1417 Raises 

1418 ------ 

1419 lsst.daf.butler.registry.CollectionTypeError 

1420 Raised if ``collection`` is not a `~CollectionType.CALIBRATION` 

1421 collection or if ``datasetType.isCalibration() is False``. 

1422 """ 

1423 collectionRecord = self._managers.collections.find(collection) 

1424 if isinstance(datasetType, str): 

1425 storage = self._managers.datasets[datasetType] 

1426 else: 

1427 storage = self._managers.datasets[datasetType.name] 

1428 standardizedDataIds = None 

1429 if dataIds is not None: 

1430 standardizedDataIds = [ 

1431 DataCoordinate.standardize(d, dimensions=storage.datasetType.dimensions) for d in dataIds 

1432 ] 

1433 storage.decertify( 

1434 collectionRecord, 

1435 timespan, 

1436 dataIds=standardizedDataIds, 

1437 context=queries.SqlQueryContext(self._db, self._managers.column_types), 

1438 ) 

1439 

1440 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager: 

1441 """Return an object that allows a new `Datastore` instance to 

1442 communicate with this `Registry`. 

1443 

1444 Returns 

1445 ------- 

1446 manager : `~.interfaces.DatastoreRegistryBridgeManager` 

1447 Object that mediates communication between this `Registry` and its 

1448 associated datastores. 

1449 """ 

1450 return self._managers.datastores 

1451 

1452 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]: 

1453 """Retrieve datastore locations for a given dataset. 

1454 

1455 Parameters 

1456 ---------- 

1457 ref : `DatasetRef` 

1458 A reference to the dataset for which to retrieve storage 

1459 information. 

1460 

1461 Returns 

1462 ------- 

1463 datastores : `~collections.abc.Iterable` [ `str` ] 

1464 All the matching datastores holding this dataset. 

1465 

1466 Raises 

1467 ------ 

1468 lsst.daf.butler.AmbiguousDatasetError 

1469 Raised if ``ref.id`` is `None`. 

1470 """ 

1471 return self._managers.datastores.findDatastores(ref) 

1472 

1473 def expandDataId( 

1474 self, 

1475 dataId: DataId | None = None, 

1476 *, 

1477 dimensions: Iterable[str] | DimensionGroup | DimensionGraph | None = None, 

1478 graph: DimensionGraph | None = None, 

1479 records: NameLookupMapping[DimensionElement, DimensionRecord | None] | None = None, 

1480 withDefaults: bool = True, 

1481 **kwargs: Any, 

1482 ) -> DataCoordinate: 

1483 """Expand a dimension-based data ID to include additional information. 

1484 

1485 Parameters 

1486 ---------- 

1487 dataId : `DataCoordinate` or `dict`, optional 

1488 Data ID to be expanded; augmented and overridden by ``kwargs``. 

1489 dimensions : `~collections.abc.Iterable` [ `str` ], \ 

1490 `DimensionGroup`, or `DimensionGraph`, optional 

1491 The dimensions to be identified by the new `DataCoordinate`. 

1492 If not provided, will be inferred from the keys of ``mapping`` and 

1493 ``**kwargs``, and ``universe`` must be provided unless ``mapping`` 

1494 is already a `DataCoordinate`. 

1495 graph : `DimensionGraph`, optional 

1496 Like ``dimensions``, but as a ``DimensionGraph`` instance. Ignored 

1497 if ``dimensions`` is provided. Deprecated and will be removed 

1498 after v27. 

1499 records : `~collections.abc.Mapping` [`str`, `DimensionRecord`], \ 

1500 optional 

1501 Dimension record data to use before querying the database for that 

1502 data, keyed by element name. 

1503 withDefaults : `bool`, optional 

1504 Utilize ``self.defaults.dataId`` to fill in missing governor 

1505 dimension key-value pairs. Defaults to `True` (i.e. defaults are 

1506 used). 

1507 **kwargs 

1508 Additional keywords are treated like additional key-value pairs for 

1509 ``dataId``, extending and overriding. 

1510 

1511 Returns 

1512 ------- 

1513 expanded : `DataCoordinate` 

1514 A data ID that includes full metadata for all of the dimensions it 

1515 identifies, i.e. guarantees that ``expanded.hasRecords()`` and 

1516 ``expanded.hasFull()`` both return `True`. 

1517 

1518 Raises 

1519 ------ 

1520 lsst.daf.butler.registry.DataIdError 

1521 Raised when ``dataId`` or keyword arguments specify unknown 

1522 dimensions or values, or when a resulting data ID contains 

1523 contradictory key-value pairs, according to dimension 

1524 relationships. 

1525 

1526 Notes 

1527 ----- 

1528 This method cannot be relied upon to reject invalid data ID values 

1529 for dimensions that do actually not have any record columns. For 

1530 efficiency reasons the records for these dimensions (which have only 

1531 dimension key values that are given by the caller) may be constructed 

1532 directly rather than obtained from the registry database. 

1533 """ 

1534 if not withDefaults: 

1535 defaults = None 

1536 else: 

1537 defaults = self.defaults.dataId 

1538 try: 

1539 standardized = DataCoordinate.standardize( 

1540 dataId, 

1541 graph=graph, 

1542 dimensions=dimensions, 

1543 universe=self.dimensions, 

1544 defaults=defaults, 

1545 **kwargs, 

1546 ) 

1547 except KeyError as exc: 

1548 # This means either kwargs have some odd name or required 

1549 # dimension is missing. 

1550 raise DimensionNameError(str(exc)) from exc 

1551 if standardized.hasRecords(): 

1552 return standardized 

1553 if records is None: 

1554 records = {} 

1555 elif isinstance(records, NamedKeyMapping): 

1556 records = records.byName() 

1557 else: 

1558 records = dict(records) 

1559 if isinstance(dataId, DataCoordinate) and dataId.hasRecords(): 

1560 for element_name in dataId.dimensions.elements: 

1561 records[element_name] = dataId.records[element_name] 

1562 keys = dict(standardized.mapping) 

1563 for element_name in standardized.dimensions.lookup_order: 

1564 element = self.dimensions[element_name] 

1565 record = records.get(element_name, ...) # Use ... to mean not found; None might mean NULL 

1566 if record is ...: 

1567 if element_name in self.dimensions.dimensions.names and keys.get(element_name) is None: 

1568 if element_name in standardized.dimensions.required: 

1569 raise DimensionNameError( 

1570 f"No value or null value for required dimension {element_name}." 

1571 ) 

1572 keys[element_name] = None 

1573 record = None 

1574 else: 

1575 record = self._managers.dimensions.fetch_one( 

1576 element_name, 

1577 DataCoordinate.standardize(keys, dimensions=element.minimal_group), 

1578 self.dimension_record_cache, 

1579 ) 

1580 records[element_name] = record 

1581 if record is not None: 

1582 for d in element.implied: 

1583 value = getattr(record, d.name) 

1584 if keys.setdefault(d.name, value) != value: 

1585 raise InconsistentDataIdError( 

1586 f"Data ID {standardized} has {d.name}={keys[d.name]!r}, " 

1587 f"but {element_name} implies {d.name}={value!r}." 

1588 ) 

1589 else: 

1590 if element_name in standardized.dimensions.required: 

1591 raise DataIdValueError( 

1592 f"Could not fetch record for required dimension {element.name} via keys {keys}." 

1593 ) 

1594 if element.defines_relationships: 

1595 raise InconsistentDataIdError( 

1596 f"Could not fetch record for element {element_name} via keys {keys}, ", 

1597 "but it is marked as defining relationships; this means one or more dimensions are " 

1598 "have inconsistent values.", 

1599 ) 

1600 for d in element.implied: 

1601 keys.setdefault(d.name, None) 

1602 records.setdefault(d.name, None) 

1603 return DataCoordinate.standardize(keys, dimensions=standardized.dimensions).expanded(records=records) 

1604 

1605 def insertDimensionData( 

1606 self, 

1607 element: DimensionElement | str, 

1608 *data: Mapping[str, Any] | DimensionRecord, 

1609 conform: bool = True, 

1610 replace: bool = False, 

1611 skip_existing: bool = False, 

1612 ) -> None: 

1613 """Insert one or more dimension records into the database. 

1614 

1615 Parameters 

1616 ---------- 

1617 element : `DimensionElement` or `str` 

1618 The `DimensionElement` or name thereof that identifies the table 

1619 records will be inserted into. 

1620 *data : `dict` or `DimensionRecord` 

1621 One or more records to insert. 

1622 conform : `bool`, optional 

1623 If `False` (`True` is default) perform no checking or conversions, 

1624 and assume that ``element`` is a `DimensionElement` instance and 

1625 ``data`` is a one or more `DimensionRecord` instances of the 

1626 appropriate subclass. 

1627 replace : `bool`, optional 

1628 If `True` (`False` is default), replace existing records in the 

1629 database if there is a conflict. 

1630 skip_existing : `bool`, optional 

1631 If `True` (`False` is default), skip insertion if a record with 

1632 the same primary key values already exists. Unlike 

1633 `syncDimensionData`, this will not detect when the given record 

1634 differs from what is in the database, and should not be used when 

1635 this is a concern. 

1636 """ 

1637 if isinstance(element, str): 

1638 element = self.dimensions[element] 

1639 if conform: 

1640 records = [ 

1641 row if isinstance(row, DimensionRecord) else element.RecordClass(**row) for row in data 

1642 ] 

1643 else: 

1644 # Ignore typing since caller said to trust them with conform=False. 

1645 records = data # type: ignore 

1646 self._managers.dimensions.insert( 

1647 element, 

1648 *records, 

1649 cache=self.dimension_record_cache, 

1650 replace=replace, 

1651 skip_existing=skip_existing, 

1652 ) 

1653 

1654 def syncDimensionData( 

1655 self, 

1656 element: DimensionElement | str, 

1657 row: Mapping[str, Any] | DimensionRecord, 

1658 conform: bool = True, 

1659 update: bool = False, 

1660 ) -> bool | dict[str, Any]: 

1661 """Synchronize the given dimension record with the database, inserting 

1662 if it does not already exist and comparing values if it does. 

1663 

1664 Parameters 

1665 ---------- 

1666 element : `DimensionElement` or `str` 

1667 The `DimensionElement` or name thereof that identifies the table 

1668 records will be inserted into. 

1669 row : `dict` or `DimensionRecord` 

1670 The record to insert. 

1671 conform : `bool`, optional 

1672 If `False` (`True` is default) perform no checking or conversions, 

1673 and assume that ``element`` is a `DimensionElement` instance and 

1674 ``data`` is a one or more `DimensionRecord` instances of the 

1675 appropriate subclass. 

1676 update : `bool`, optional 

1677 If `True` (`False` is default), update the existing record in the 

1678 database if there is a conflict. 

1679 

1680 Returns 

1681 ------- 

1682 inserted_or_updated : `bool` or `dict` 

1683 `True` if a new row was inserted, `False` if no changes were 

1684 needed, or a `dict` mapping updated column names to their old 

1685 values if an update was performed (only possible if 

1686 ``update=True``). 

1687 

1688 Raises 

1689 ------ 

1690 lsst.daf.butler.registry.ConflictingDefinitionError 

1691 Raised if the record exists in the database (according to primary 

1692 key lookup) but is inconsistent with the given one. 

1693 """ 

1694 if conform: 

1695 if isinstance(element, str): 

1696 element = self.dimensions[element] 

1697 record = row if isinstance(row, DimensionRecord) else element.RecordClass(**row) 

1698 else: 

1699 # Ignore typing since caller said to trust them with conform=False. 

1700 record = row # type: ignore 

1701 return self._managers.dimensions.sync(record, self.dimension_record_cache, update=update) 

1702 

1703 def queryDatasetTypes( 

1704 self, 

1705 expression: Any = ..., 

1706 *, 

1707 components: bool | _Marker = _DefaultMarker, 

1708 missing: list[str] | None = None, 

1709 ) -> Iterable[DatasetType]: 

1710 """Iterate over the dataset types whose names match an expression. 

1711 

1712 Parameters 

1713 ---------- 

1714 expression : dataset type expression, optional 

1715 An expression that fully or partially identifies the dataset types 

1716 to return, such as a `str`, `re.Pattern`, or iterable thereof. 

1717 ``...`` can be used to return all dataset types, and is the 

1718 default. See :ref:`daf_butler_dataset_type_expressions` for more 

1719 information. 

1720 components : `bool`, optional 

1721 Must be `False`. Provided only for backwards compatibility. After 

1722 v27 this argument will be removed entirely. 

1723 missing : `list` of `str`, optional 

1724 String dataset type names that were explicitly given (i.e. not 

1725 regular expression patterns) but not found will be appended to this 

1726 list, if it is provided. 

1727 

1728 Returns 

1729 ------- 

1730 dataset_types : `~collections.abc.Iterable` [ `DatasetType`] 

1731 An `~collections.abc.Iterable` of `DatasetType` instances whose 

1732 names match ``expression``. 

1733 

1734 Raises 

1735 ------ 

1736 lsst.daf.butler.registry.DatasetTypeExpressionError 

1737 Raised when ``expression`` is invalid. 

1738 """ 

1739 if components is not _DefaultMarker: 

1740 if components is not False: 

1741 raise DatasetTypeError( 

1742 "Dataset component queries are no longer supported by Registry. Use " 

1743 "DatasetType methods to obtain components from parent dataset types instead." 

1744 ) 

1745 else: 

1746 warnings.warn( 

1747 "The components parameter is ignored. It will be removed after v27.", 

1748 category=FutureWarning, 

1749 stacklevel=find_outside_stacklevel("lsst.daf.butler"), 

1750 ) 

1751 wildcard = DatasetTypeWildcard.from_expression(expression) 

1752 return self._managers.datasets.resolve_wildcard(wildcard, missing=missing) 

1753 

1754 def queryCollections( 

1755 self, 

1756 expression: Any = ..., 

1757 datasetType: DatasetType | None = None, 

1758 collectionTypes: Iterable[CollectionType] | CollectionType = CollectionType.all(), 

1759 flattenChains: bool = False, 

1760 includeChains: bool | None = None, 

1761 ) -> Sequence[str]: 

1762 """Iterate over the collections whose names match an expression. 

1763 

1764 Parameters 

1765 ---------- 

1766 expression : collection expression, optional 

1767 An expression that identifies the collections to return, such as 

1768 a `str` (for full matches or partial matches via globs), 

1769 `re.Pattern` (for partial matches), or iterable thereof. ``...`` 

1770 can be used to return all collections, and is the default. 

1771 See :ref:`daf_butler_collection_expressions` for more information. 

1772 datasetType : `DatasetType`, optional 

1773 If provided, only yield collections that may contain datasets of 

1774 this type. This is a conservative approximation in general; it may 

1775 yield collections that do not have any such datasets. 

1776 collectionTypes : `~collections.abc.Set` [`CollectionType`] or \ 

1777 `CollectionType`, optional 

1778 If provided, only yield collections of these types. 

1779 flattenChains : `bool`, optional 

1780 If `True` (`False` is default), recursively yield the child 

1781 collections of matching `~CollectionType.CHAINED` collections. 

1782 includeChains : `bool`, optional 

1783 If `True`, yield records for matching `~CollectionType.CHAINED` 

1784 collections. Default is the opposite of ``flattenChains``: include 

1785 either CHAINED collections or their children, but not both. 

1786 

1787 Returns 

1788 ------- 

1789 collections : `~collections.abc.Sequence` [ `str` ] 

1790 The names of collections that match ``expression``. 

1791 

1792 Raises 

1793 ------ 

1794 lsst.daf.butler.registry.CollectionExpressionError 

1795 Raised when ``expression`` is invalid. 

1796 

1797 Notes 

1798 ----- 

1799 The order in which collections are returned is unspecified, except that 

1800 the children of a `~CollectionType.CHAINED` collection are guaranteed 

1801 to be in the order in which they are searched. When multiple parent 

1802 `~CollectionType.CHAINED` collections match the same criteria, the 

1803 order in which the two lists appear is unspecified, and the lists of 

1804 children may be incomplete if a child has multiple parents. 

1805 """ 

1806 # Right now the datasetTypes argument is completely ignored, but that 

1807 # is consistent with its [lack of] guarantees. DM-24939 or a follow-up 

1808 # ticket will take care of that. 

1809 try: 

1810 wildcard = CollectionWildcard.from_expression(expression) 

1811 except TypeError as exc: 

1812 raise CollectionExpressionError(f"Invalid collection expression '{expression}'") from exc 

1813 collectionTypes = ensure_iterable(collectionTypes) 

1814 return [ 

1815 record.name 

1816 for record in self._managers.collections.resolve_wildcard( 

1817 wildcard, 

1818 collection_types=frozenset(collectionTypes), 

1819 flatten_chains=flattenChains, 

1820 include_chains=includeChains, 

1821 ) 

1822 ] 

1823 

1824 def _makeQueryBuilder( 

1825 self, 

1826 summary: queries.QuerySummary, 

1827 doomed_by: Iterable[str] = (), 

1828 ) -> queries.QueryBuilder: 

1829 """Return a `QueryBuilder` instance capable of constructing and 

1830 managing more complex queries than those obtainable via `Registry` 

1831 interfaces. 

1832 

1833 This is an advanced interface; downstream code should prefer 

1834 `Registry.queryDataIds` and `Registry.queryDatasets` whenever those 

1835 are sufficient. 

1836 

1837 Parameters 

1838 ---------- 

1839 summary : `queries.QuerySummary` 

1840 Object describing and categorizing the full set of dimensions that 

1841 will be included in the query. 

1842 doomed_by : `~collections.abc.Iterable` of `str`, optional 

1843 A list of diagnostic messages that indicate why the query is going 

1844 to yield no results and should not even be executed. If an empty 

1845 container (default) the query will be executed unless other code 

1846 determines that it is doomed. 

1847 

1848 Returns 

1849 ------- 

1850 builder : `queries.QueryBuilder` 

1851 Object that can be used to construct and perform advanced queries. 

1852 """ 

1853 doomed_by = list(doomed_by) 

1854 backend = queries.SqlQueryBackend(self._db, self._managers, self.dimension_record_cache) 

1855 context = backend.context() 

1856 relation: Relation | None = None 

1857 if doomed_by: 

1858 relation = LeafRelation.make_doomed(context.sql_engine, set(), doomed_by) 

1859 return queries.QueryBuilder( 

1860 summary, 

1861 backend=backend, 

1862 context=context, 

1863 relation=relation, 

1864 ) 

1865 

1866 def _standardize_query_data_id_args( 

1867 self, data_id: DataId | None, *, doomed_by: list[str], **kwargs: Any 

1868 ) -> DataCoordinate: 

1869 """Preprocess the data ID arguments passed to query* methods. 

1870 

1871 Parameters 

1872 ---------- 

1873 data_id : `DataId` or `None` 

1874 Data ID that constrains the query results. 

1875 doomed_by : `list` [ `str` ] 

1876 List to append messages indicating why the query is doomed to 

1877 yield no results. 

1878 **kwargs 

1879 Additional data ID key-value pairs, extending and overriding 

1880 ``data_id``. 

1881 

1882 Returns 

1883 ------- 

1884 data_id : `DataCoordinate` 

1885 Standardized data ID. Will be fully expanded unless expansion 

1886 fails, in which case a message will be appended to ``doomed_by`` 

1887 on return. 

1888 """ 

1889 try: 

1890 return self.expandDataId(data_id, **kwargs) 

1891 except DataIdValueError as err: 

1892 doomed_by.append(str(err)) 

1893 return DataCoordinate.standardize( 

1894 data_id, **kwargs, universe=self.dimensions, defaults=self.defaults.dataId 

1895 ) 

1896 

1897 def _standardize_query_dataset_args( 

1898 self, 

1899 datasets: Any, 

1900 collections: CollectionArgType | None, 

1901 mode: Literal["find_first"] | Literal["find_all"] | Literal["constrain"] = "constrain", 

1902 *, 

1903 doomed_by: list[str], 

1904 ) -> tuple[list[DatasetType], CollectionWildcard | None]: 

1905 """Preprocess dataset arguments passed to query* methods. 

1906 

1907 Parameters 

1908 ---------- 

1909 datasets : `DatasetType`, `str`, `re.Pattern`, or iterable of these 

1910 Expression identifying dataset types. See `queryDatasetTypes` for 

1911 details. 

1912 collections : `str`, `re.Pattern`, or iterable of these 

1913 Expression identifying collections to be searched. See 

1914 `queryCollections` for details. 

1915 mode : `str`, optional 

1916 The way in which datasets are being used in this query; one of: 

1917 

1918 - "find_first": this is a query for the first dataset in an 

1919 ordered list of collections. Prohibits collection wildcards, 

1920 but permits dataset type wildcards. 

1921 

1922 - "find_all": this is a query for all datasets in all matched 

1923 collections. Permits collection and dataset type wildcards. 

1924 

1925 - "constrain": this is a query for something other than datasets, 

1926 with results constrained by dataset existence. Permits 

1927 collection wildcards and prohibits ``...`` as a dataset type 

1928 wildcard. 

1929 doomed_by : `list` [ `str` ] 

1930 List to append messages indicating why the query is doomed to 

1931 yield no results. 

1932 

1933 Returns 

1934 ------- 

1935 dataset_types : `list` [ `DatasetType` ] 

1936 List of matched dataset types. 

1937 collections : `CollectionWildcard` 

1938 Processed collection expression. 

1939 """ 

1940 dataset_types: list[DatasetType] = [] 

1941 collection_wildcard: CollectionWildcard | None = None 

1942 if datasets is not None: 

1943 if collections is None: 

1944 if not self.defaults.collections: 

1945 raise NoDefaultCollectionError("No collections, and no registry default collections.") 

1946 collection_wildcard = CollectionWildcard.from_expression(self.defaults.collections) 

1947 else: 

1948 collection_wildcard = CollectionWildcard.from_expression(collections) 

1949 if mode == "find_first" and collection_wildcard.patterns: 

1950 raise TypeError( 

1951 f"Collection pattern(s) {collection_wildcard.patterns} not allowed in this context." 

1952 ) 

1953 missing: list[str] = [] 

1954 dataset_types = self._managers.datasets.resolve_wildcard( 

1955 datasets, missing=missing, explicit_only=(mode == "constrain") 

1956 ) 

1957 if missing and mode == "constrain": 

1958 raise MissingDatasetTypeError( 

1959 f"Dataset type(s) {missing} are not registered.", 

1960 ) 

1961 doomed_by.extend(f"Dataset type {name} is not registered." for name in missing) 

1962 elif collections: 

1963 # I think this check should actually be `collections is not None`, 

1964 # but it looks like some CLI scripts use empty tuple as default. 

1965 raise ArgumentError(f"Cannot pass 'collections' (='{collections}') without 'datasets'.") 

1966 return dataset_types, collection_wildcard 

1967 

1968 def queryDatasets( 

1969 self, 

1970 datasetType: Any, 

1971 *, 

1972 collections: CollectionArgType | None = None, 

1973 dimensions: Iterable[Dimension | str] | None = None, 

1974 dataId: DataId | None = None, 

1975 where: str = "", 

1976 findFirst: bool = False, 

1977 components: bool | _Marker = _DefaultMarker, 

1978 bind: Mapping[str, Any] | None = None, 

1979 check: bool = True, 

1980 **kwargs: Any, 

1981 ) -> queries.DatasetQueryResults: 

1982 """Query for and iterate over dataset references matching user-provided 

1983 criteria. 

1984 

1985 Parameters 

1986 ---------- 

1987 datasetType : dataset type expression 

1988 An expression that fully or partially identifies the dataset types 

1989 to be queried. Allowed types include `DatasetType`, `str`, 

1990 `re.Pattern`, and iterables thereof. The special value ``...`` can 

1991 be used to query all dataset types. See 

1992 :ref:`daf_butler_dataset_type_expressions` for more information. 

1993 collections : collection expression, optional 

1994 An expression that identifies the collections to search, such as a 

1995 `str` (for full matches or partial matches via globs), `re.Pattern` 

1996 (for partial matches), or iterable thereof. ``...`` can be used to 

1997 search all collections (actually just all `~CollectionType.RUN` 

1998 collections, because this will still find all datasets). 

1999 If not provided, ``self.default.collections`` is used. See 

2000 :ref:`daf_butler_collection_expressions` for more information. 

2001 dimensions : `~collections.abc.Iterable` of `Dimension` or `str` 

2002 Dimensions to include in the query (in addition to those used 

2003 to identify the queried dataset type(s)), either to constrain 

2004 the resulting datasets to those for which a matching dimension 

2005 exists, or to relate the dataset type's dimensions to dimensions 

2006 referenced by the ``dataId`` or ``where`` arguments. 

2007 dataId : `dict` or `DataCoordinate`, optional 

2008 A data ID whose key-value pairs are used as equality constraints 

2009 in the query. 

2010 where : `str`, optional 

2011 A string expression similar to a SQL WHERE clause. May involve 

2012 any column of a dimension table or (as a shortcut for the primary 

2013 key column of a dimension table) dimension name. See 

2014 :ref:`daf_butler_dimension_expressions` for more information. 

2015 findFirst : `bool`, optional 

2016 If `True` (`False` is default), for each result data ID, only 

2017 yield one `DatasetRef` of each `DatasetType`, from the first 

2018 collection in which a dataset of that dataset type appears 

2019 (according to the order of ``collections`` passed in). If `True`, 

2020 ``collections`` must not contain regular expressions and may not 

2021 be ``...``. 

2022 components : `bool`, optional 

2023 Must be `False`. Provided only for backwards compatibility. After 

2024 v27 this argument will be removed entirely. 

2025 bind : `~collections.abc.Mapping`, optional 

2026 Mapping containing literal values that should be injected into the 

2027 ``where`` expression, keyed by the identifiers they replace. 

2028 Values of collection type can be expanded in some cases; see 

2029 :ref:`daf_butler_dimension_expressions_identifiers` for more 

2030 information. 

2031 check : `bool`, optional 

2032 If `True` (default) check the query for consistency before 

2033 executing it. This may reject some valid queries that resemble 

2034 common mistakes (e.g. queries for visits without specifying an 

2035 instrument). 

2036 **kwargs 

2037 Additional keyword arguments are forwarded to 

2038 `DataCoordinate.standardize` when processing the ``dataId`` 

2039 argument (and may be used to provide a constraining data ID even 

2040 when the ``dataId`` argument is `None`). 

2041 

2042 Returns 

2043 ------- 

2044 refs : `.queries.DatasetQueryResults` 

2045 Dataset references matching the given query criteria. Nested data 

2046 IDs are guaranteed to include values for all implied dimensions 

2047 (i.e. `DataCoordinate.hasFull` will return `True`), but will not 

2048 include dimension records (`DataCoordinate.hasRecords` will be 

2049 `False`) unless `~.queries.DatasetQueryResults.expanded` is 

2050 called on the result object (which returns a new one). 

2051 

2052 Raises 

2053 ------ 

2054 lsst.daf.butler.registry.DatasetTypeExpressionError 

2055 Raised when ``datasetType`` expression is invalid. 

2056 TypeError 

2057 Raised when the arguments are incompatible, such as when a 

2058 collection wildcard is passed when ``findFirst`` is `True`, or 

2059 when ``collections`` is `None` and ``self.defaults.collections`` is 

2060 also `None`. 

2061 lsst.daf.butler.registry.DataIdError 

2062 Raised when ``dataId`` or keyword arguments specify unknown 

2063 dimensions or values, or when they contain inconsistent values. 

2064 lsst.daf.butler.registry.UserExpressionError 

2065 Raised when ``where`` expression is invalid. 

2066 

2067 Notes 

2068 ----- 

2069 When multiple dataset types are queried in a single call, the 

2070 results of this operation are equivalent to querying for each dataset 

2071 type separately in turn, and no information about the relationships 

2072 between datasets of different types is included. In contexts where 

2073 that kind of information is important, the recommended pattern is to 

2074 use `queryDataIds` to first obtain data IDs (possibly with the 

2075 desired dataset types and collections passed as constraints to the 

2076 query), and then use multiple (generally much simpler) calls to 

2077 `queryDatasets` with the returned data IDs passed as constraints. 

2078 """ 

2079 if components is not _DefaultMarker: 

2080 if components is not False: 

2081 raise DatasetTypeError( 

2082 "Dataset component queries are no longer supported by Registry. Use " 

2083 "DatasetType methods to obtain components from parent dataset types instead." 

2084 ) 

2085 else: 

2086 warnings.warn( 

2087 "The components parameter is ignored. It will be removed after v27.", 

2088 category=FutureWarning, 

2089 stacklevel=find_outside_stacklevel("lsst.daf.butler"), 

2090 ) 

2091 doomed_by: list[str] = [] 

2092 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs) 

2093 resolved_dataset_types, collection_wildcard = self._standardize_query_dataset_args( 

2094 datasetType, 

2095 collections, 

2096 mode="find_first" if findFirst else "find_all", 

2097 doomed_by=doomed_by, 

2098 ) 

2099 if collection_wildcard is not None and collection_wildcard.empty(): 

2100 doomed_by.append("No datasets can be found because collection list is empty.") 

2101 return queries.ChainedDatasetQueryResults([], doomed_by=doomed_by) 

2102 parent_results: list[queries.ParentDatasetQueryResults] = [] 

2103 for resolved_dataset_type in resolved_dataset_types: 

2104 # The full set of dimensions in the query is the combination of 

2105 # those needed for the DatasetType and those explicitly requested, 

2106 # if any. 

2107 dimension_names = set(resolved_dataset_type.dimensions.names) 

2108 if dimensions is not None: 

2109 dimension_names.update(self.dimensions.conform(dimensions).names) 

2110 # Construct the summary structure needed to construct a 

2111 # QueryBuilder. 

2112 summary = queries.QuerySummary( 

2113 requested=self.dimensions.conform(dimension_names), 

2114 column_types=self._managers.column_types, 

2115 data_id=data_id, 

2116 expression=where, 

2117 bind=bind, 

2118 defaults=self.defaults.dataId, 

2119 check=check, 

2120 datasets=[resolved_dataset_type], 

2121 ) 

2122 builder = self._makeQueryBuilder(summary) 

2123 # Add the dataset subquery to the query, telling the QueryBuilder 

2124 # to include the rank of the selected collection in the results 

2125 # only if we need to findFirst. Note that if any of the 

2126 # collections are actually wildcard expressions, and 

2127 # findFirst=True, this will raise TypeError for us. 

2128 builder.joinDataset( 

2129 resolved_dataset_type, collection_wildcard, isResult=True, findFirst=findFirst 

2130 ) 

2131 query = builder.finish() 

2132 parent_results.append( 

2133 queries.ParentDatasetQueryResults(query, resolved_dataset_type, components=[None]) 

2134 ) 

2135 if not parent_results: 

2136 doomed_by.extend( 

2137 f"No registered dataset type matching {t!r} found, so no matching datasets can " 

2138 "exist in any collection." 

2139 for t in ensure_iterable(datasetType) 

2140 ) 

2141 return queries.ChainedDatasetQueryResults([], doomed_by=doomed_by) 

2142 elif len(parent_results) == 1: 

2143 return parent_results[0] 

2144 else: 

2145 return queries.ChainedDatasetQueryResults(parent_results) 

2146 

2147 def queryDataIds( 

2148 self, 

2149 # TODO: Drop Dimension support on DM-41326. 

2150 dimensions: DimensionGroup | Iterable[Dimension | str] | Dimension | str, 

2151 *, 

2152 dataId: DataId | None = None, 

2153 datasets: Any = None, 

2154 collections: CollectionArgType | None = None, 

2155 where: str = "", 

2156 components: bool | _Marker = _DefaultMarker, 

2157 bind: Mapping[str, Any] | None = None, 

2158 check: bool = True, 

2159 **kwargs: Any, 

2160 ) -> queries.DataCoordinateQueryResults: 

2161 """Query for data IDs matching user-provided criteria. 

2162 

2163 Parameters 

2164 ---------- 

2165 dimensions : `DimensionGroup`, `Dimension`, or `str`, or \ 

2166 `~collections.abc.Iterable` [ `Dimension` or `str` ] 

2167 The dimensions of the data IDs to yield, as either `Dimension` 

2168 instances or `str`. Will be automatically expanded to a complete 

2169 `DimensionGroup`. Support for `Dimension` instances is deprecated 

2170 and will not be supported after v27. 

2171 dataId : `dict` or `DataCoordinate`, optional 

2172 A data ID whose key-value pairs are used as equality constraints 

2173 in the query. 

2174 datasets : dataset type expression, optional 

2175 An expression that fully or partially identifies dataset types 

2176 that should constrain the yielded data IDs. For example, including 

2177 "raw" here would constrain the yielded ``instrument``, 

2178 ``exposure``, ``detector``, and ``physical_filter`` values to only 

2179 those for which at least one "raw" dataset exists in 

2180 ``collections``. Allowed types include `DatasetType`, `str`, 

2181 and iterables thereof. Regular expression objects (i.e. 

2182 `re.Pattern`) are deprecated and will be removed after the v26 

2183 release. See :ref:`daf_butler_dataset_type_expressions` for more 

2184 information. 

2185 collections : collection expression, optional 

2186 An expression that identifies the collections to search for 

2187 datasets, such as a `str` (for full matches or partial matches 

2188 via globs), `re.Pattern` (for partial matches), or iterable 

2189 thereof. ``...`` can be used to search all collections (actually 

2190 just all `~CollectionType.RUN` collections, because this will 

2191 still find all datasets). If not provided, 

2192 ``self.default.collections`` is used. Ignored unless ``datasets`` 

2193 is also passed. See :ref:`daf_butler_collection_expressions` for 

2194 more information. 

2195 where : `str`, optional 

2196 A string expression similar to a SQL WHERE clause. May involve 

2197 any column of a dimension table or (as a shortcut for the primary 

2198 key column of a dimension table) dimension name. See 

2199 :ref:`daf_butler_dimension_expressions` for more information. 

2200 components : `bool`, optional 

2201 Must be `False`. Provided only for backwards compatibility. After 

2202 v27 this argument will be removed entirely. 

2203 bind : `~collections.abc.Mapping`, optional 

2204 Mapping containing literal values that should be injected into the 

2205 ``where`` expression, keyed by the identifiers they replace. 

2206 Values of collection type can be expanded in some cases; see 

2207 :ref:`daf_butler_dimension_expressions_identifiers` for more 

2208 information. 

2209 check : `bool`, optional 

2210 If `True` (default) check the query for consistency before 

2211 executing it. This may reject some valid queries that resemble 

2212 common mistakes (e.g. queries for visits without specifying an 

2213 instrument). 

2214 **kwargs 

2215 Additional keyword arguments are forwarded to 

2216 `DataCoordinate.standardize` when processing the ``dataId`` 

2217 argument (and may be used to provide a constraining data ID even 

2218 when the ``dataId`` argument is `None`). 

2219 

2220 Returns 

2221 ------- 

2222 dataIds : `.queries.DataCoordinateQueryResults` 

2223 Data IDs matching the given query parameters. These are guaranteed 

2224 to identify all dimensions (`DataCoordinate.hasFull` returns 

2225 `True`), but will not contain `DimensionRecord` objects 

2226 (`DataCoordinate.hasRecords` returns `False`). Call 

2227 `~.queries.DataCoordinateQueryResults.expanded` on the 

2228 returned object to fetch those (and consider using 

2229 `~.queries.DataCoordinateQueryResults.materialize` on the 

2230 returned object first if the expected number of rows is very 

2231 large). See documentation for those methods for additional 

2232 information. 

2233 

2234 Raises 

2235 ------ 

2236 lsst.daf.butler.registry.NoDefaultCollectionError 

2237 Raised if ``collections`` is `None` and 

2238 ``self.defaults.collections`` is `None`. 

2239 lsst.daf.butler.registry.CollectionExpressionError 

2240 Raised when ``collections`` expression is invalid. 

2241 lsst.daf.butler.registry.DataIdError 

2242 Raised when ``dataId`` or keyword arguments specify unknown 

2243 dimensions or values, or when they contain inconsistent values. 

2244 lsst.daf.butler.registry.DatasetTypeExpressionError 

2245 Raised when ``datasetType`` expression is invalid. 

2246 lsst.daf.butler.registry.UserExpressionError 

2247 Raised when ``where`` expression is invalid. 

2248 """ 

2249 if components is not _DefaultMarker: 

2250 if components is not False: 

2251 raise DatasetTypeError( 

2252 "Dataset component queries are no longer supported by Registry. Use " 

2253 "DatasetType methods to obtain components from parent dataset types instead." 

2254 ) 

2255 else: 

2256 warnings.warn( 

2257 "The components parameter is ignored. It will be removed after v27.", 

2258 category=FutureWarning, 

2259 stacklevel=find_outside_stacklevel("lsst.daf.butler"), 

2260 ) 

2261 requested_dimensions = self.dimensions.conform(dimensions) 

2262 doomed_by: list[str] = [] 

2263 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs) 

2264 resolved_dataset_types, collection_wildcard = self._standardize_query_dataset_args( 

2265 datasets, collections, doomed_by=doomed_by 

2266 ) 

2267 if collection_wildcard is not None and collection_wildcard.empty(): 

2268 doomed_by.append("No data coordinates can be found because collection list is empty.") 

2269 summary = queries.QuerySummary( 

2270 requested=requested_dimensions, 

2271 column_types=self._managers.column_types, 

2272 data_id=data_id, 

2273 expression=where, 

2274 bind=bind, 

2275 defaults=self.defaults.dataId, 

2276 check=check, 

2277 datasets=resolved_dataset_types, 

2278 ) 

2279 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by) 

2280 for datasetType in resolved_dataset_types: 

2281 builder.joinDataset(datasetType, collection_wildcard, isResult=False) 

2282 query = builder.finish() 

2283 

2284 return queries.DataCoordinateQueryResults(query) 

2285 

2286 def queryDimensionRecords( 

2287 self, 

2288 element: DimensionElement | str, 

2289 *, 

2290 dataId: DataId | None = None, 

2291 datasets: Any = None, 

2292 collections: CollectionArgType | None = None, 

2293 where: str = "", 

2294 components: bool | _Marker = _DefaultMarker, 

2295 bind: Mapping[str, Any] | None = None, 

2296 check: bool = True, 

2297 **kwargs: Any, 

2298 ) -> queries.DimensionRecordQueryResults: 

2299 """Query for dimension information matching user-provided criteria. 

2300 

2301 Parameters 

2302 ---------- 

2303 element : `DimensionElement` or `str` 

2304 The dimension element to obtain records for. 

2305 dataId : `dict` or `DataCoordinate`, optional 

2306 A data ID whose key-value pairs are used as equality constraints 

2307 in the query. 

2308 datasets : dataset type expression, optional 

2309 An expression that fully or partially identifies dataset types 

2310 that should constrain the yielded records. See `queryDataIds` and 

2311 :ref:`daf_butler_dataset_type_expressions` for more information. 

2312 collections : collection expression, optional 

2313 An expression that identifies the collections to search for 

2314 datasets, such as a `str` (for full matches or partial matches 

2315 via globs), `re.Pattern` (for partial matches), or iterable 

2316 thereof. ``...`` can be used to search all collections (actually 

2317 just all `~CollectionType.RUN` collections, because this will 

2318 still find all datasets). If not provided, 

2319 ``self.default.collections`` is used. Ignored unless ``datasets`` 

2320 is also passed. See :ref:`daf_butler_collection_expressions` for 

2321 more information. 

2322 where : `str`, optional 

2323 A string expression similar to a SQL WHERE clause. See 

2324 `queryDataIds` and :ref:`daf_butler_dimension_expressions` for more 

2325 information. 

2326 components : `bool`, optional 

2327 Whether to apply dataset expressions to components as well. 

2328 See `queryDataIds` for more information. 

2329 

2330 Must be `False`. Provided only for backwards compatibility. After 

2331 v27 this argument will be removed entirely. 

2332 bind : `~collections.abc.Mapping`, optional 

2333 Mapping containing literal values that should be injected into the 

2334 ``where`` expression, keyed by the identifiers they replace. 

2335 Values of collection type can be expanded in some cases; see 

2336 :ref:`daf_butler_dimension_expressions_identifiers` for more 

2337 information. 

2338 check : `bool`, optional 

2339 If `True` (default) check the query for consistency before 

2340 executing it. This may reject some valid queries that resemble 

2341 common mistakes (e.g. queries for visits without specifying an 

2342 instrument). 

2343 **kwargs 

2344 Additional keyword arguments are forwarded to 

2345 `DataCoordinate.standardize` when processing the ``dataId`` 

2346 argument (and may be used to provide a constraining data ID even 

2347 when the ``dataId`` argument is `None`). 

2348 

2349 Returns 

2350 ------- 

2351 dataIds : `.queries.DimensionRecordQueryResults` 

2352 Data IDs matching the given query parameters. 

2353 

2354 Raises 

2355 ------ 

2356 lsst.daf.butler.registry.NoDefaultCollectionError 

2357 Raised if ``collections`` is `None` and 

2358 ``self.defaults.collections`` is `None`. 

2359 lsst.daf.butler.registry.CollectionExpressionError 

2360 Raised when ``collections`` expression is invalid. 

2361 lsst.daf.butler.registry.DataIdError 

2362 Raised when ``dataId`` or keyword arguments specify unknown 

2363 dimensions or values, or when they contain inconsistent values. 

2364 lsst.daf.butler.registry.DatasetTypeExpressionError 

2365 Raised when ``datasetType`` expression is invalid. 

2366 lsst.daf.butler.registry.UserExpressionError 

2367 Raised when ``where`` expression is invalid. 

2368 """ 

2369 if components is not _DefaultMarker: 

2370 if components is not False: 

2371 raise DatasetTypeError( 

2372 "Dataset component queries are no longer supported by Registry. Use " 

2373 "DatasetType methods to obtain components from parent dataset types instead." 

2374 ) 

2375 else: 

2376 warnings.warn( 

2377 "The components parameter is ignored. It will be removed after v27.", 

2378 category=FutureWarning, 

2379 stacklevel=find_outside_stacklevel("lsst.daf.butler"), 

2380 ) 

2381 if not isinstance(element, DimensionElement): 

2382 try: 

2383 element = self.dimensions[element] 

2384 except KeyError as e: 

2385 raise DimensionNameError( 

2386 f"No such dimension '{element}', available dimensions: " + str(self.dimensions.elements) 

2387 ) from e 

2388 doomed_by: list[str] = [] 

2389 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs) 

2390 resolved_dataset_types, collection_wildcard = self._standardize_query_dataset_args( 

2391 datasets, collections, doomed_by=doomed_by 

2392 ) 

2393 if collection_wildcard is not None and collection_wildcard.empty(): 

2394 doomed_by.append("No dimension records can be found because collection list is empty.") 

2395 summary = queries.QuerySummary( 

2396 requested=element.minimal_group, 

2397 column_types=self._managers.column_types, 

2398 data_id=data_id, 

2399 expression=where, 

2400 bind=bind, 

2401 defaults=self.defaults.dataId, 

2402 check=check, 

2403 datasets=resolved_dataset_types, 

2404 ) 

2405 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by) 

2406 for datasetType in resolved_dataset_types: 

2407 builder.joinDataset(datasetType, collection_wildcard, isResult=False) 

2408 query = builder.finish().with_record_columns(element.name) 

2409 return queries.DatabaseDimensionRecordQueryResults(query, element) 

2410 

2411 def queryDatasetAssociations( 

2412 self, 

2413 datasetType: str | DatasetType, 

2414 collections: CollectionArgType | None = ..., 

2415 *, 

2416 collectionTypes: Iterable[CollectionType] = CollectionType.all(), 

2417 flattenChains: bool = False, 

2418 ) -> Iterator[DatasetAssociation]: 

2419 """Iterate over dataset-collection combinations where the dataset is in 

2420 the collection. 

2421 

2422 This method is a temporary placeholder for better support for 

2423 association results in `queryDatasets`. It will probably be 

2424 removed in the future, and should be avoided in production code 

2425 whenever possible. 

2426 

2427 Parameters 

2428 ---------- 

2429 datasetType : `DatasetType` or `str` 

2430 A dataset type object or the name of one. 

2431 collections : collection expression, optional 

2432 An expression that identifies the collections to search for 

2433 datasets, such as a `str` (for full matches or partial matches 

2434 via globs), `re.Pattern` (for partial matches), or iterable 

2435 thereof. ``...`` can be used to search all collections (actually 

2436 just all `~CollectionType.RUN` collections, because this will still 

2437 find all datasets). If not provided, ``self.default.collections`` 

2438 is used. See :ref:`daf_butler_collection_expressions` for more 

2439 information. 

2440 collectionTypes : `~collections.abc.Set` [ `CollectionType` ], optional 

2441 If provided, only yield associations from collections of these 

2442 types. 

2443 flattenChains : `bool`, optional 

2444 If `True`, search in the children of `~CollectionType.CHAINED` 

2445 collections. If `False`, ``CHAINED`` collections are ignored. 

2446 

2447 Yields 

2448 ------ 

2449 association : `.DatasetAssociation` 

2450 Object representing the relationship between a single dataset and 

2451 a single collection. 

2452 

2453 Raises 

2454 ------ 

2455 lsst.daf.butler.registry.NoDefaultCollectionError 

2456 Raised if ``collections`` is `None` and 

2457 ``self.defaults.collections`` is `None`. 

2458 lsst.daf.butler.registry.CollectionExpressionError 

2459 Raised when ``collections`` expression is invalid. 

2460 """ 

2461 if collections is None: 

2462 if not self.defaults.collections: 

2463 raise NoDefaultCollectionError( 

2464 "No collections provided to queryDatasetAssociations, " 

2465 "and no defaults from registry construction." 

2466 ) 

2467 collections = self.defaults.collections 

2468 collection_wildcard = CollectionWildcard.from_expression(collections) 

2469 backend = queries.SqlQueryBackend(self._db, self._managers, self.dimension_record_cache) 

2470 parent_dataset_type = backend.resolve_single_dataset_type_wildcard(datasetType) 

2471 timespan_tag = DatasetColumnTag(parent_dataset_type.name, "timespan") 

2472 collection_tag = DatasetColumnTag(parent_dataset_type.name, "collection") 

2473 for parent_collection_record in backend.resolve_collection_wildcard( 

2474 collection_wildcard, 

2475 collection_types=frozenset(collectionTypes), 

2476 flatten_chains=flattenChains, 

2477 ): 

2478 # Resolve this possibly-chained collection into a list of 

2479 # non-CHAINED collections that actually hold datasets of this 

2480 # type. 

2481 candidate_collection_records = backend.resolve_dataset_collections( 

2482 parent_dataset_type, 

2483 CollectionWildcard.from_names([parent_collection_record.name]), 

2484 allow_calibration_collections=True, 

2485 governor_constraints={}, 

2486 ) 

2487 if not candidate_collection_records: 

2488 continue 

2489 with backend.context() as context: 

2490 relation = backend.make_dataset_query_relation( 

2491 parent_dataset_type, 

2492 candidate_collection_records, 

2493 columns={"dataset_id", "run", "timespan", "collection"}, 

2494 context=context, 

2495 ) 

2496 reader = queries.DatasetRefReader( 

2497 parent_dataset_type, 

2498 translate_collection=lambda k: self._managers.collections[k].name, 

2499 full=False, 

2500 ) 

2501 for row in context.fetch_iterable(relation): 

2502 ref = reader.read(row) 

2503 collection_record = self._managers.collections[row[collection_tag]] 

2504 if collection_record.type is CollectionType.CALIBRATION: 

2505 timespan = row[timespan_tag] 

2506 else: 

2507 # For backwards compatibility and (possibly?) user 

2508 # convenience we continue to define the timespan of a 

2509 # DatasetAssociation row for a non-CALIBRATION 

2510 # collection to be None rather than a fully unbounded 

2511 # timespan. 

2512 timespan = None 

2513 yield DatasetAssociation(ref=ref, collection=collection_record.name, timespan=timespan) 

2514 

2515 def get_datastore_records(self, ref: DatasetRef) -> DatasetRef: 

2516 """Retrieve datastore records for given ref. 

2517 

2518 Parameters 

2519 ---------- 

2520 ref : `DatasetRef` 

2521 Dataset reference for which to retrieve its corresponding datastore 

2522 records. 

2523 

2524 Returns 

2525 ------- 

2526 updated_ref : `DatasetRef` 

2527 Dataset reference with filled datastore records. 

2528 

2529 Notes 

2530 ----- 

2531 If this method is called with the dataset ref that is not known to the 

2532 registry then the reference with an empty set of records is returned. 

2533 """ 

2534 datastore_records: dict[str, list[StoredDatastoreItemInfo]] = {} 

2535 for opaque, record_class in self._datastore_record_classes.items(): 

2536 records = self.fetchOpaqueData(opaque, dataset_id=ref.id) 

2537 datastore_records[opaque] = [record_class.from_record(record) for record in records] 

2538 return ref.replace(datastore_records=datastore_records) 

2539 

2540 def store_datastore_records(self, refs: Mapping[str, DatasetRef]) -> None: 

2541 """Store datastore records for given refs. 

2542 

2543 Parameters 

2544 ---------- 

2545 refs : `~collections.abc.Mapping` [`str`, `DatasetRef`] 

2546 Mapping of a datastore name to dataset reference stored in that 

2547 datastore, reference must include datastore records. 

2548 """ 

2549 for datastore_name, ref in refs.items(): 

2550 # Store ref IDs in the bridge table. 

2551 bridge = self._managers.datastores.register(datastore_name) 

2552 bridge.insert([ref]) 

2553 

2554 # store records in opaque tables 

2555 assert ref._datastore_records is not None, "Dataset ref must have datastore records" 

2556 for table_name, records in ref._datastore_records.items(): 

2557 opaque_table = self._managers.opaque.get(table_name) 

2558 assert opaque_table is not None, f"Unexpected opaque table name {table_name}" 

2559 opaque_table.insert(*(record.to_record(dataset_id=ref.id) for record in records)) 

2560 

2561 def make_datastore_tables(self, tables: Mapping[str, DatastoreOpaqueTable]) -> None: 

2562 """Create opaque tables used by datastores. 

2563 

2564 Parameters 

2565 ---------- 

2566 tables : `~collections.abc.Mapping` 

2567 Maps opaque table name to its definition. 

2568 

2569 Notes 

2570 ----- 

2571 This method should disappear in the future when opaque table 

2572 definitions will be provided during `Registry` construction. 

2573 """ 

2574 datastore_record_classes = {} 

2575 for table_name, table_def in tables.items(): 

2576 datastore_record_classes[table_name] = table_def.record_class 

2577 try: 

2578 self._managers.opaque.register(table_name, table_def.table_spec) 

2579 except ReadOnlyDatabaseError: 

2580 # If the database is read only and we just tried and failed to 

2581 # create a table, it means someone is trying to create a 

2582 # read-only butler client for an empty repo. That should be 

2583 # okay, as long as they then try to get any datasets before 

2584 # some other client creates the table. Chances are they're 

2585 # just validating configuration. 

2586 pass 

2587 self._datastore_record_classes = datastore_record_classes 

2588 

2589 @property 

2590 def obsCoreTableManager(self) -> ObsCoreTableManager | None: 

2591 """The ObsCore manager instance for this registry 

2592 (`~.interfaces.ObsCoreTableManager` 

2593 or `None`). 

2594 

2595 ObsCore manager may not be implemented for all registry backend, or 

2596 may not be enabled for many repositories. 

2597 """ 

2598 return self._managers.obscore 

2599 

2600 storageClasses: StorageClassFactory 

2601 """All storage classes known to the registry (`StorageClassFactory`). 

2602 """ 

2603 

2604 _defaults: RegistryDefaults 

2605 """Default collections used for registry queries (`RegistryDefaults`)."""