Coverage for python/lsst/daf/butler/registry/sql_registry.py: 18%

575 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-05 11:07 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30from .. import ddl 

31 

32__all__ = ("SqlRegistry",) 

33 

34import contextlib 

35import logging 

36import warnings 

37from collections.abc import Iterable, Iterator, Mapping, Sequence 

38from typing import TYPE_CHECKING, Any, Literal, cast 

39 

40import sqlalchemy 

41from lsst.daf.relation import LeafRelation, Relation 

42from lsst.resources import ResourcePathExpression 

43from lsst.utils.introspection import find_outside_stacklevel 

44from lsst.utils.iteration import ensure_iterable 

45 

46from .._column_tags import DatasetColumnTag 

47from .._config import Config 

48from .._dataset_association import DatasetAssociation 

49from .._dataset_ref import DatasetId, DatasetIdGenEnum, DatasetRef 

50from .._dataset_type import DatasetType 

51from .._named import NamedKeyMapping, NameLookupMapping 

52from .._storage_class import StorageClassFactory 

53from .._timespan import Timespan 

54from ..dimensions import ( 

55 DataCoordinate, 

56 DataId, 

57 Dimension, 

58 DimensionConfig, 

59 DimensionElement, 

60 DimensionGraph, 

61 DimensionGroup, 

62 DimensionRecord, 

63 DimensionUniverse, 

64) 

65from ..progress import Progress 

66from ..registry import ( 

67 ArgumentError, 

68 CollectionExpressionError, 

69 CollectionSummary, 

70 CollectionType, 

71 CollectionTypeError, 

72 ConflictingDefinitionError, 

73 DataIdValueError, 

74 DatasetTypeError, 

75 DimensionNameError, 

76 InconsistentDataIdError, 

77 NoDefaultCollectionError, 

78 OrphanedRecordError, 

79 RegistryConfig, 

80 RegistryConsistencyError, 

81 RegistryDefaults, 

82 queries, 

83) 

84from ..registry.interfaces import ChainedCollectionRecord, ReadOnlyDatabaseError, RunRecord 

85from ..registry.managers import RegistryManagerInstances, RegistryManagerTypes 

86from ..registry.wildcards import CollectionWildcard, DatasetTypeWildcard 

87from ..utils import transactional 

88 

89if TYPE_CHECKING: 

90 from .._butler_config import ButlerConfig 

91 from ..datastore._datastore import DatastoreOpaqueTable 

92 from ..datastore.stored_file_info import StoredDatastoreItemInfo 

93 from ..registry._registry import CollectionArgType 

94 from ..registry.interfaces import ( 

95 CollectionRecord, 

96 Database, 

97 DatastoreRegistryBridgeManager, 

98 ObsCoreTableManager, 

99 ) 

100 

101 

102_LOG = logging.getLogger(__name__) 

103 

104 

105class SqlRegistry: 

106 """Butler Registry implementation that uses SQL database as backend. 

107 

108 Parameters 

109 ---------- 

110 database : `Database` 

111 Database instance to store Registry. 

112 defaults : `RegistryDefaults` 

113 Default collection search path and/or output `~CollectionType.RUN` 

114 collection. 

115 managers : `RegistryManagerInstances` 

116 All the managers required for this registry. 

117 """ 

118 

119 defaultConfigFile: str | None = None 

120 """Path to configuration defaults. Accessed within the ``configs`` resource 

121 or relative to a search path. Can be None if no defaults specified. 

122 """ 

123 

124 @classmethod 

125 def forceRegistryConfig( 

126 cls, config: ButlerConfig | RegistryConfig | Config | str | None 

127 ) -> RegistryConfig: 

128 """Force the supplied config to a `RegistryConfig`. 

129 

130 Parameters 

131 ---------- 

132 config : `RegistryConfig`, `Config` or `str` or `None` 

133 Registry configuration, if missing then default configuration will 

134 be loaded from registry.yaml. 

135 

136 Returns 

137 ------- 

138 registry_config : `RegistryConfig` 

139 A registry config. 

140 """ 

141 if not isinstance(config, RegistryConfig): 

142 if isinstance(config, str | Config) or config is None: 

143 config = RegistryConfig(config) 

144 else: 

145 raise ValueError(f"Incompatible Registry configuration: {config}") 

146 return config 

147 

148 @classmethod 

149 def createFromConfig( 

150 cls, 

151 config: RegistryConfig | str | None = None, 

152 dimensionConfig: DimensionConfig | str | None = None, 

153 butlerRoot: ResourcePathExpression | None = None, 

154 ) -> SqlRegistry: 

155 """Create registry database and return `SqlRegistry` instance. 

156 

157 This method initializes database contents, database must be empty 

158 prior to calling this method. 

159 

160 Parameters 

161 ---------- 

162 config : `RegistryConfig` or `str`, optional 

163 Registry configuration, if missing then default configuration will 

164 be loaded from registry.yaml. 

165 dimensionConfig : `DimensionConfig` or `str`, optional 

166 Dimensions configuration, if missing then default configuration 

167 will be loaded from dimensions.yaml. 

168 butlerRoot : convertible to `lsst.resources.ResourcePath`, optional 

169 Path to the repository root this `SqlRegistry` will manage. 

170 

171 Returns 

172 ------- 

173 registry : `SqlRegistry` 

174 A new `SqlRegistry` instance. 

175 """ 

176 config = cls.forceRegistryConfig(config) 

177 config.replaceRoot(butlerRoot) 

178 

179 if isinstance(dimensionConfig, str): 

180 dimensionConfig = DimensionConfig(dimensionConfig) 

181 elif dimensionConfig is None: 

182 dimensionConfig = DimensionConfig() 

183 elif not isinstance(dimensionConfig, DimensionConfig): 

184 raise TypeError(f"Incompatible Dimension configuration type: {type(dimensionConfig)}") 

185 

186 DatabaseClass = config.getDatabaseClass() 

187 database = DatabaseClass.fromUri( 

188 config.connectionString, origin=config.get("origin", 0), namespace=config.get("namespace") 

189 ) 

190 managerTypes = RegistryManagerTypes.fromConfig(config) 

191 managers = managerTypes.makeRepo(database, dimensionConfig) 

192 return cls(database, RegistryDefaults(), managers) 

193 

194 @classmethod 

195 def fromConfig( 

196 cls, 

197 config: ButlerConfig | RegistryConfig | Config | str, 

198 butlerRoot: ResourcePathExpression | None = None, 

199 writeable: bool = True, 

200 defaults: RegistryDefaults | None = None, 

201 ) -> SqlRegistry: 

202 """Create `Registry` subclass instance from `config`. 

203 

204 Registry database must be initialized prior to calling this method. 

205 

206 Parameters 

207 ---------- 

208 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str` 

209 Registry configuration 

210 butlerRoot : `lsst.resources.ResourcePathExpression`, optional 

211 Path to the repository root this `Registry` will manage. 

212 writeable : `bool`, optional 

213 If `True` (default) create a read-write connection to the database. 

214 defaults : `RegistryDefaults`, optional 

215 Default collection search path and/or output `~CollectionType.RUN` 

216 collection. 

217 

218 Returns 

219 ------- 

220 registry : `SqlRegistry` 

221 A new `SqlRegistry` subclass instance. 

222 """ 

223 config = cls.forceRegistryConfig(config) 

224 config.replaceRoot(butlerRoot) 

225 DatabaseClass = config.getDatabaseClass() 

226 database = DatabaseClass.fromUri( 

227 config.connectionString, 

228 origin=config.get("origin", 0), 

229 namespace=config.get("namespace"), 

230 writeable=writeable, 

231 ) 

232 managerTypes = RegistryManagerTypes.fromConfig(config) 

233 with database.session(): 

234 managers = managerTypes.loadRepo(database) 

235 if defaults is None: 

236 defaults = RegistryDefaults() 

237 return cls(database, defaults, managers) 

238 

239 def __init__(self, database: Database, defaults: RegistryDefaults, managers: RegistryManagerInstances): 

240 self._db = database 

241 self._managers = managers 

242 self.storageClasses = StorageClassFactory() 

243 # Intentionally invoke property setter to initialize defaults. This 

244 # can only be done after most of the rest of Registry has already been 

245 # initialized, and must be done before the property getter is used. 

246 self.defaults = defaults 

247 

248 # TODO: This is currently initialized by `make_datastore_tables`, 

249 # eventually we'll need to do it during construction. 

250 # The mapping is indexed by the opaque table name. 

251 self._datastore_record_classes: Mapping[str, type[StoredDatastoreItemInfo]] = {} 

252 

253 def __str__(self) -> str: 

254 return str(self._db) 

255 

256 def __repr__(self) -> str: 

257 return f"SqlRegistry({self._db!r}, {self.dimensions!r})" 

258 

259 def isWriteable(self) -> bool: 

260 """Return `True` if this registry allows write operations, and `False` 

261 otherwise. 

262 """ 

263 return self._db.isWriteable() 

264 

265 def copy(self, defaults: RegistryDefaults | None = None) -> SqlRegistry: 

266 """Create a new `SqlRegistry` backed by the same data repository 

267 and connection as this one, but independent defaults. 

268 

269 Parameters 

270 ---------- 

271 defaults : `~lsst.daf.butler.registry.RegistryDefaults`, optional 

272 Default collections and data ID values for the new registry. If 

273 not provided, ``self.defaults`` will be used (but future changes 

274 to either registry's defaults will not affect the other). 

275 

276 Returns 

277 ------- 

278 copy : `SqlRegistry` 

279 A new `SqlRegistry` instance with its own defaults. 

280 

281 Notes 

282 ----- 

283 Because the new registry shares a connection with the original, they 

284 also share transaction state (despite the fact that their `transaction` 

285 context manager methods do not reflect this), and must be used with 

286 care. 

287 """ 

288 if defaults is None: 

289 # No need to copy, because `RegistryDefaults` is immutable; we 

290 # effectively copy on write. 

291 defaults = self.defaults 

292 return type(self)(self._db, defaults, self._managers) 

293 

294 @property 

295 def dimensions(self) -> DimensionUniverse: 

296 """Definitions of all dimensions recognized by this `Registry` 

297 (`DimensionUniverse`). 

298 """ 

299 return self._managers.dimensions.universe 

300 

301 @property 

302 def defaults(self) -> RegistryDefaults: 

303 """Default collection search path and/or output `~CollectionType.RUN` 

304 collection (`~lsst.daf.butler.registry.RegistryDefaults`). 

305 

306 This is an immutable struct whose components may not be set 

307 individually, but the entire struct can be set by assigning to this 

308 property. 

309 """ 

310 return self._defaults 

311 

312 @defaults.setter 

313 def defaults(self, value: RegistryDefaults) -> None: 

314 if value.run is not None: 

315 self.registerRun(value.run) 

316 value.finish(self) 

317 self._defaults = value 

318 

319 def refresh(self) -> None: 

320 """Refresh all in-memory state by querying the database. 

321 

322 This may be necessary to enable querying for entities added by other 

323 registry instances after this one was constructed. 

324 """ 

325 with self._db.transaction(): 

326 self._managers.refresh() 

327 

328 @contextlib.contextmanager 

329 def caching_context(self) -> Iterator[None]: 

330 """Context manager that enables caching.""" 

331 self._managers.caching_context.enable() 

332 yield 

333 self._managers.caching_context.disable() 

334 

335 @contextlib.contextmanager 

336 def transaction(self, *, savepoint: bool = False) -> Iterator[None]: 

337 """Return a context manager that represents a transaction.""" 

338 try: 

339 with self._db.transaction(savepoint=savepoint): 

340 yield 

341 except BaseException: 

342 # TODO: this clears the caches sometimes when we wouldn't actually 

343 # need to. Can we avoid that? 

344 self._managers.dimensions.clearCaches() 

345 raise 

346 

347 def resetConnectionPool(self) -> None: 

348 """Reset SQLAlchemy connection pool for `SqlRegistry` database. 

349 

350 This operation is useful when using registry with fork-based 

351 multiprocessing. To use registry across fork boundary one has to make 

352 sure that there are no currently active connections (no session or 

353 transaction is in progress) and connection pool is reset using this 

354 method. This method should be called by the child process immediately 

355 after the fork. 

356 """ 

357 self._db._engine.dispose() 

358 

359 def registerOpaqueTable(self, tableName: str, spec: ddl.TableSpec) -> None: 

360 """Add an opaque (to the `Registry`) table for use by a `Datastore` or 

361 other data repository client. 

362 

363 Opaque table records can be added via `insertOpaqueData`, retrieved via 

364 `fetchOpaqueData`, and removed via `deleteOpaqueData`. 

365 

366 Parameters 

367 ---------- 

368 tableName : `str` 

369 Logical name of the opaque table. This may differ from the 

370 actual name used in the database by a prefix and/or suffix. 

371 spec : `ddl.TableSpec` 

372 Specification for the table to be added. 

373 """ 

374 self._managers.opaque.register(tableName, spec) 

375 

376 @transactional 

377 def insertOpaqueData(self, tableName: str, *data: dict) -> None: 

378 """Insert records into an opaque table. 

379 

380 Parameters 

381 ---------- 

382 tableName : `str` 

383 Logical name of the opaque table. Must match the name used in a 

384 previous call to `registerOpaqueTable`. 

385 data 

386 Each additional positional argument is a dictionary that represents 

387 a single row to be added. 

388 """ 

389 self._managers.opaque[tableName].insert(*data) 

390 

391 def fetchOpaqueData(self, tableName: str, **where: Any) -> Iterator[Mapping[str, Any]]: 

392 """Retrieve records from an opaque table. 

393 

394 Parameters 

395 ---------- 

396 tableName : `str` 

397 Logical name of the opaque table. Must match the name used in a 

398 previous call to `registerOpaqueTable`. 

399 where 

400 Additional keyword arguments are interpreted as equality 

401 constraints that restrict the returned rows (combined with AND); 

402 keyword arguments are column names and values are the values they 

403 must have. 

404 

405 Yields 

406 ------ 

407 row : `dict` 

408 A dictionary representing a single result row. 

409 """ 

410 yield from self._managers.opaque[tableName].fetch(**where) 

411 

412 @transactional 

413 def deleteOpaqueData(self, tableName: str, **where: Any) -> None: 

414 """Remove records from an opaque table. 

415 

416 Parameters 

417 ---------- 

418 tableName : `str` 

419 Logical name of the opaque table. Must match the name used in a 

420 previous call to `registerOpaqueTable`. 

421 where 

422 Additional keyword arguments are interpreted as equality 

423 constraints that restrict the deleted rows (combined with AND); 

424 keyword arguments are column names and values are the values they 

425 must have. 

426 """ 

427 self._managers.opaque[tableName].delete(where.keys(), where) 

428 

429 def registerCollection( 

430 self, name: str, type: CollectionType = CollectionType.TAGGED, doc: str | None = None 

431 ) -> bool: 

432 """Add a new collection if one with the given name does not exist. 

433 

434 Parameters 

435 ---------- 

436 name : `str` 

437 The name of the collection to create. 

438 type : `CollectionType` 

439 Enum value indicating the type of collection to create. 

440 doc : `str`, optional 

441 Documentation string for the collection. 

442 

443 Returns 

444 ------- 

445 registered : `bool` 

446 Boolean indicating whether the collection was already registered 

447 or was created by this call. 

448 

449 Notes 

450 ----- 

451 This method cannot be called within transactions, as it needs to be 

452 able to perform its own transaction to be concurrent. 

453 """ 

454 _, registered = self._managers.collections.register(name, type, doc=doc) 

455 return registered 

456 

457 def getCollectionType(self, name: str) -> CollectionType: 

458 """Return an enumeration value indicating the type of the given 

459 collection. 

460 

461 Parameters 

462 ---------- 

463 name : `str` 

464 The name of the collection. 

465 

466 Returns 

467 ------- 

468 type : `CollectionType` 

469 Enum value indicating the type of this collection. 

470 

471 Raises 

472 ------ 

473 lsst.daf.butler.registry.MissingCollectionError 

474 Raised if no collection with the given name exists. 

475 """ 

476 return self._managers.collections.find(name).type 

477 

478 def _get_collection_record(self, name: str) -> CollectionRecord: 

479 """Return the record for this collection. 

480 

481 Parameters 

482 ---------- 

483 name : `str` 

484 Name of the collection for which the record is to be retrieved. 

485 

486 Returns 

487 ------- 

488 record : `CollectionRecord` 

489 The record for this collection. 

490 """ 

491 return self._managers.collections.find(name) 

492 

493 def registerRun(self, name: str, doc: str | None = None) -> bool: 

494 """Add a new run if one with the given name does not exist. 

495 

496 Parameters 

497 ---------- 

498 name : `str` 

499 The name of the run to create. 

500 doc : `str`, optional 

501 Documentation string for the collection. 

502 

503 Returns 

504 ------- 

505 registered : `bool` 

506 Boolean indicating whether a new run was registered. `False` 

507 if it already existed. 

508 

509 Notes 

510 ----- 

511 This method cannot be called within transactions, as it needs to be 

512 able to perform its own transaction to be concurrent. 

513 """ 

514 _, registered = self._managers.collections.register(name, CollectionType.RUN, doc=doc) 

515 return registered 

516 

517 @transactional 

518 def removeCollection(self, name: str) -> None: 

519 """Remove the given collection from the registry. 

520 

521 Parameters 

522 ---------- 

523 name : `str` 

524 The name of the collection to remove. 

525 

526 Raises 

527 ------ 

528 lsst.daf.butler.registry.MissingCollectionError 

529 Raised if no collection with the given name exists. 

530 sqlalchemy.exc.IntegrityError 

531 Raised if the database rows associated with the collection are 

532 still referenced by some other table, such as a dataset in a 

533 datastore (for `~CollectionType.RUN` collections only) or a 

534 `~CollectionType.CHAINED` collection of which this collection is 

535 a child. 

536 

537 Notes 

538 ----- 

539 If this is a `~CollectionType.RUN` collection, all datasets and quanta 

540 in it will removed from the `Registry` database. This requires that 

541 those datasets be removed (or at least trashed) from any datastores 

542 that hold them first. 

543 

544 A collection may not be deleted as long as it is referenced by a 

545 `~CollectionType.CHAINED` collection; the ``CHAINED`` collection must 

546 be deleted or redefined first. 

547 """ 

548 self._managers.collections.remove(name) 

549 

550 def getCollectionChain(self, parent: str) -> tuple[str, ...]: 

551 """Return the child collections in a `~CollectionType.CHAINED` 

552 collection. 

553 

554 Parameters 

555 ---------- 

556 parent : `str` 

557 Name of the chained collection. Must have already been added via 

558 a call to `Registry.registerCollection`. 

559 

560 Returns 

561 ------- 

562 children : `~collections.abc.Sequence` [ `str` ] 

563 An ordered sequence of collection names that are searched when the 

564 given chained collection is searched. 

565 

566 Raises 

567 ------ 

568 lsst.daf.butler.registry.MissingCollectionError 

569 Raised if ``parent`` does not exist in the `Registry`. 

570 lsst.daf.butler.registry.CollectionTypeError 

571 Raised if ``parent`` does not correspond to a 

572 `~CollectionType.CHAINED` collection. 

573 """ 

574 record = self._managers.collections.find(parent) 

575 if record.type is not CollectionType.CHAINED: 

576 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

577 assert isinstance(record, ChainedCollectionRecord) 

578 return record.children 

579 

580 @transactional 

581 def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None: 

582 """Define or redefine a `~CollectionType.CHAINED` collection. 

583 

584 Parameters 

585 ---------- 

586 parent : `str` 

587 Name of the chained collection. Must have already been added via 

588 a call to `Registry.registerCollection`. 

589 children : collection expression 

590 An expression defining an ordered search of child collections, 

591 generally an iterable of `str`; see 

592 :ref:`daf_butler_collection_expressions` for more information. 

593 flatten : `bool`, optional 

594 If `True` (`False` is default), recursively flatten out any nested 

595 `~CollectionType.CHAINED` collections in ``children`` first. 

596 

597 Raises 

598 ------ 

599 lsst.daf.butler.registry.MissingCollectionError 

600 Raised when any of the given collections do not exist in the 

601 `Registry`. 

602 lsst.daf.butler.registry.CollectionTypeError 

603 Raised if ``parent`` does not correspond to a 

604 `~CollectionType.CHAINED` collection. 

605 ValueError 

606 Raised if the given collections contains a cycle. 

607 """ 

608 record = self._managers.collections.find(parent) 

609 if record.type is not CollectionType.CHAINED: 

610 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

611 assert isinstance(record, ChainedCollectionRecord) 

612 children = CollectionWildcard.from_expression(children).require_ordered() 

613 if children != record.children or flatten: 

614 self._managers.collections.update_chain(record, children, flatten=flatten) 

615 

616 def getCollectionParentChains(self, collection: str) -> set[str]: 

617 """Return the CHAINED collections that directly contain the given one. 

618 

619 Parameters 

620 ---------- 

621 name : `str` 

622 Name of the collection. 

623 

624 Returns 

625 ------- 

626 chains : `set` of `str` 

627 Set of `~CollectionType.CHAINED` collection names. 

628 """ 

629 return self._managers.collections.getParentChains(self._managers.collections.find(collection).key) 

630 

631 def getCollectionDocumentation(self, collection: str) -> str | None: 

632 """Retrieve the documentation string for a collection. 

633 

634 Parameters 

635 ---------- 

636 name : `str` 

637 Name of the collection. 

638 

639 Returns 

640 ------- 

641 docs : `str` or `None` 

642 Docstring for the collection with the given name. 

643 """ 

644 return self._managers.collections.getDocumentation(self._managers.collections.find(collection).key) 

645 

646 def setCollectionDocumentation(self, collection: str, doc: str | None) -> None: 

647 """Set the documentation string for a collection. 

648 

649 Parameters 

650 ---------- 

651 name : `str` 

652 Name of the collection. 

653 docs : `str` or `None` 

654 Docstring for the collection with the given name; will replace any 

655 existing docstring. Passing `None` will remove any existing 

656 docstring. 

657 """ 

658 self._managers.collections.setDocumentation(self._managers.collections.find(collection).key, doc) 

659 

660 def getCollectionSummary(self, collection: str) -> CollectionSummary: 

661 """Return a summary for the given collection. 

662 

663 Parameters 

664 ---------- 

665 collection : `str` 

666 Name of the collection for which a summary is to be retrieved. 

667 

668 Returns 

669 ------- 

670 summary : `~lsst.daf.butler.registry.CollectionSummary` 

671 Summary of the dataset types and governor dimension values in 

672 this collection. 

673 """ 

674 record = self._managers.collections.find(collection) 

675 return self._managers.datasets.getCollectionSummary(record) 

676 

677 def registerDatasetType(self, datasetType: DatasetType) -> bool: 

678 """Add a new `DatasetType` to the Registry. 

679 

680 It is not an error to register the same `DatasetType` twice. 

681 

682 Parameters 

683 ---------- 

684 datasetType : `DatasetType` 

685 The `DatasetType` to be added. 

686 

687 Returns 

688 ------- 

689 inserted : `bool` 

690 `True` if ``datasetType`` was inserted, `False` if an identical 

691 existing `DatasetType` was found. Note that in either case the 

692 DatasetType is guaranteed to be defined in the Registry 

693 consistently with the given definition. 

694 

695 Raises 

696 ------ 

697 ValueError 

698 Raised if the dimensions or storage class are invalid. 

699 lsst.daf.butler.registry.ConflictingDefinitionError 

700 Raised if this `DatasetType` is already registered with a different 

701 definition. 

702 

703 Notes 

704 ----- 

705 This method cannot be called within transactions, as it needs to be 

706 able to perform its own transaction to be concurrent. 

707 """ 

708 return self._managers.datasets.register(datasetType) 

709 

710 def removeDatasetType(self, name: str | tuple[str, ...]) -> None: 

711 """Remove the named `DatasetType` from the registry. 

712 

713 .. warning:: 

714 

715 Registry implementations can cache the dataset type definitions. 

716 This means that deleting the dataset type definition may result in 

717 unexpected behavior from other butler processes that are active 

718 that have not seen the deletion. 

719 

720 Parameters 

721 ---------- 

722 name : `str` or `tuple` [`str`] 

723 Name of the type to be removed or tuple containing a list of type 

724 names to be removed. Wildcards are allowed. 

725 

726 Raises 

727 ------ 

728 lsst.daf.butler.registry.OrphanedRecordError 

729 Raised if an attempt is made to remove the dataset type definition 

730 when there are already datasets associated with it. 

731 

732 Notes 

733 ----- 

734 If the dataset type is not registered the method will return without 

735 action. 

736 """ 

737 for datasetTypeExpression in ensure_iterable(name): 

738 # Catch any warnings from the caller specifying a component 

739 # dataset type. This will result in an error later but the 

740 # warning could be confusing when the caller is not querying 

741 # anything. 

742 with warnings.catch_warnings(): 

743 warnings.simplefilter("ignore", category=FutureWarning) 

744 datasetTypes = list(self.queryDatasetTypes(datasetTypeExpression)) 

745 if not datasetTypes: 

746 _LOG.info("Dataset type %r not defined", datasetTypeExpression) 

747 else: 

748 for datasetType in datasetTypes: 

749 self._managers.datasets.remove(datasetType.name) 

750 _LOG.info("Removed dataset type %r", datasetType.name) 

751 

752 def getDatasetType(self, name: str) -> DatasetType: 

753 """Get the `DatasetType`. 

754 

755 Parameters 

756 ---------- 

757 name : `str` 

758 Name of the type. 

759 

760 Returns 

761 ------- 

762 type : `DatasetType` 

763 The `DatasetType` associated with the given name. 

764 

765 Raises 

766 ------ 

767 lsst.daf.butler.registry.MissingDatasetTypeError 

768 Raised if the requested dataset type has not been registered. 

769 

770 Notes 

771 ----- 

772 This method handles component dataset types automatically, though most 

773 other registry operations do not. 

774 """ 

775 parent_name, component = DatasetType.splitDatasetTypeName(name) 

776 storage = self._managers.datasets[parent_name] 

777 if component is None: 

778 return storage.datasetType 

779 else: 

780 return storage.datasetType.makeComponentDatasetType(component) 

781 

782 def supportsIdGenerationMode(self, mode: DatasetIdGenEnum) -> bool: 

783 """Test whether the given dataset ID generation mode is supported by 

784 `insertDatasets`. 

785 

786 Parameters 

787 ---------- 

788 mode : `DatasetIdGenEnum` 

789 Enum value for the mode to test. 

790 

791 Returns 

792 ------- 

793 supported : `bool` 

794 Whether the given mode is supported. 

795 """ 

796 return self._managers.datasets.supportsIdGenerationMode(mode) 

797 

798 def findDataset( 

799 self, 

800 datasetType: DatasetType | str, 

801 dataId: DataId | None = None, 

802 *, 

803 collections: CollectionArgType | None = None, 

804 timespan: Timespan | None = None, 

805 datastore_records: bool = False, 

806 **kwargs: Any, 

807 ) -> DatasetRef | None: 

808 """Find a dataset given its `DatasetType` and data ID. 

809 

810 This can be used to obtain a `DatasetRef` that permits the dataset to 

811 be read from a `Datastore`. If the dataset is a component and can not 

812 be found using the provided dataset type, a dataset ref for the parent 

813 will be returned instead but with the correct dataset type. 

814 

815 Parameters 

816 ---------- 

817 datasetType : `DatasetType` or `str` 

818 A `DatasetType` or the name of one. If this is a `DatasetType` 

819 instance, its storage class will be respected and propagated to 

820 the output, even if it differs from the dataset type definition 

821 in the registry, as long as the storage classes are convertible. 

822 dataId : `dict` or `DataCoordinate`, optional 

823 A `dict`-like object containing the `Dimension` links that identify 

824 the dataset within a collection. 

825 collections : collection expression, optional 

826 An expression that fully or partially identifies the collections to 

827 search for the dataset; see 

828 :ref:`daf_butler_collection_expressions` for more information. 

829 Defaults to ``self.defaults.collections``. 

830 timespan : `Timespan`, optional 

831 A timespan that the validity range of the dataset must overlap. 

832 If not provided, any `~CollectionType.CALIBRATION` collections 

833 matched by the ``collections`` argument will not be searched. 

834 **kwargs 

835 Additional keyword arguments passed to 

836 `DataCoordinate.standardize` to convert ``dataId`` to a true 

837 `DataCoordinate` or augment an existing one. 

838 

839 Returns 

840 ------- 

841 ref : `DatasetRef` 

842 A reference to the dataset, or `None` if no matching Dataset 

843 was found. 

844 

845 Raises 

846 ------ 

847 lsst.daf.butler.registry.NoDefaultCollectionError 

848 Raised if ``collections`` is `None` and 

849 ``self.defaults.collections`` is `None`. 

850 LookupError 

851 Raised if one or more data ID keys are missing. 

852 lsst.daf.butler.registry.MissingDatasetTypeError 

853 Raised if the dataset type does not exist. 

854 lsst.daf.butler.registry.MissingCollectionError 

855 Raised if any of ``collections`` does not exist in the registry. 

856 

857 Notes 

858 ----- 

859 This method simply returns `None` and does not raise an exception even 

860 when the set of collections searched is intrinsically incompatible with 

861 the dataset type, e.g. if ``datasetType.isCalibration() is False``, but 

862 only `~CollectionType.CALIBRATION` collections are being searched. 

863 This may make it harder to debug some lookup failures, but the behavior 

864 is intentional; we consider it more important that failed searches are 

865 reported consistently, regardless of the reason, and that adding 

866 additional collections that do not contain a match to the search path 

867 never changes the behavior. 

868 

869 This method handles component dataset types automatically, though most 

870 other registry operations do not. 

871 """ 

872 if collections is None: 

873 if not self.defaults.collections: 

874 raise NoDefaultCollectionError( 

875 "No collections provided to findDataset, and no defaults from registry construction." 

876 ) 

877 collections = self.defaults.collections 

878 backend = queries.SqlQueryBackend(self._db, self._managers) 

879 collection_wildcard = CollectionWildcard.from_expression(collections, require_ordered=True) 

880 if collection_wildcard.empty(): 

881 return None 

882 matched_collections = backend.resolve_collection_wildcard(collection_wildcard) 

883 parent_dataset_type, components = backend.resolve_single_dataset_type_wildcard( 

884 datasetType, components_deprecated=False 

885 ) 

886 if len(components) > 1: 

887 raise DatasetTypeError( 

888 f"findDataset requires exactly one dataset type; got multiple components {components} " 

889 f"for parent dataset type {parent_dataset_type.name}." 

890 ) 

891 component = components[0] 

892 dataId = DataCoordinate.standardize( 

893 dataId, 

894 dimensions=parent_dataset_type.dimensions, 

895 universe=self.dimensions, 

896 defaults=self.defaults.dataId, 

897 **kwargs, 

898 ) 

899 governor_constraints = {name: {cast(str, dataId[name])} for name in dataId.dimensions.governors} 

900 (filtered_collections,) = backend.filter_dataset_collections( 

901 [parent_dataset_type], 

902 matched_collections, 

903 governor_constraints=governor_constraints, 

904 ).values() 

905 if not filtered_collections: 

906 return None 

907 if timespan is None: 

908 filtered_collections = [ 

909 collection_record 

910 for collection_record in filtered_collections 

911 if collection_record.type is not CollectionType.CALIBRATION 

912 ] 

913 if filtered_collections: 

914 requested_columns = {"dataset_id", "run", "collection"} 

915 with backend.context() as context: 

916 predicate = context.make_data_coordinate_predicate( 

917 dataId.subset(parent_dataset_type.dimensions), full=False 

918 ) 

919 if timespan is not None: 

920 requested_columns.add("timespan") 

921 predicate = predicate.logical_and( 

922 context.make_timespan_overlap_predicate( 

923 DatasetColumnTag(parent_dataset_type.name, "timespan"), timespan 

924 ) 

925 ) 

926 relation = backend.make_dataset_query_relation( 

927 parent_dataset_type, filtered_collections, requested_columns, context 

928 ).with_rows_satisfying(predicate) 

929 rows = list(context.fetch_iterable(relation)) 

930 else: 

931 rows = [] 

932 if not rows: 

933 return None 

934 elif len(rows) == 1: 

935 best_row = rows[0] 

936 else: 

937 rank_by_collection_key = {record.key: n for n, record in enumerate(filtered_collections)} 

938 collection_tag = DatasetColumnTag(parent_dataset_type.name, "collection") 

939 row_iter = iter(rows) 

940 best_row = next(row_iter) 

941 best_rank = rank_by_collection_key[best_row[collection_tag]] 

942 have_tie = False 

943 for row in row_iter: 

944 if (rank := rank_by_collection_key[row[collection_tag]]) < best_rank: 

945 best_row = row 

946 best_rank = rank 

947 have_tie = False 

948 elif rank == best_rank: 

949 have_tie = True 

950 assert timespan is not None, "Rank ties should be impossible given DB constraints." 

951 if have_tie: 

952 raise LookupError( 

953 f"Ambiguous calibration lookup for {parent_dataset_type.name} in collections " 

954 f"{collection_wildcard.strings} with timespan {timespan}." 

955 ) 

956 reader = queries.DatasetRefReader( 

957 parent_dataset_type, 

958 translate_collection=lambda k: self._managers.collections[k].name, 

959 ) 

960 ref = reader.read(best_row, data_id=dataId) 

961 if component is not None: 

962 ref = ref.makeComponentRef(component) 

963 if datastore_records: 

964 ref = self.get_datastore_records(ref) 

965 

966 return ref 

967 

968 @transactional 

969 def insertDatasets( 

970 self, 

971 datasetType: DatasetType | str, 

972 dataIds: Iterable[DataId], 

973 run: str | None = None, 

974 expand: bool = True, 

975 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

976 ) -> list[DatasetRef]: 

977 """Insert one or more datasets into the `Registry`. 

978 

979 This always adds new datasets; to associate existing datasets with 

980 a new collection, use ``associate``. 

981 

982 Parameters 

983 ---------- 

984 datasetType : `DatasetType` or `str` 

985 A `DatasetType` or the name of one. 

986 dataIds : `~collections.abc.Iterable` of `dict` or `DataCoordinate` 

987 Dimension-based identifiers for the new datasets. 

988 run : `str`, optional 

989 The name of the run that produced the datasets. Defaults to 

990 ``self.defaults.run``. 

991 expand : `bool`, optional 

992 If `True` (default), expand data IDs as they are inserted. This is 

993 necessary in general to allow datastore to generate file templates, 

994 but it may be disabled if the caller can guarantee this is 

995 unnecessary. 

996 idGenerationMode : `DatasetIdGenEnum`, optional 

997 Specifies option for generating dataset IDs. By default unique IDs 

998 are generated for each inserted dataset. 

999 

1000 Returns 

1001 ------- 

1002 refs : `list` of `DatasetRef` 

1003 Resolved `DatasetRef` instances for all given data IDs (in the same 

1004 order). 

1005 

1006 Raises 

1007 ------ 

1008 lsst.daf.butler.registry.DatasetTypeError 

1009 Raised if ``datasetType`` is not known to registry. 

1010 lsst.daf.butler.registry.CollectionTypeError 

1011 Raised if ``run`` collection type is not `~CollectionType.RUN`. 

1012 lsst.daf.butler.registry.NoDefaultCollectionError 

1013 Raised if ``run`` is `None` and ``self.defaults.run`` is `None`. 

1014 lsst.daf.butler.registry.ConflictingDefinitionError 

1015 If a dataset with the same dataset type and data ID as one of those 

1016 given already exists in ``run``. 

1017 lsst.daf.butler.registry.MissingCollectionError 

1018 Raised if ``run`` does not exist in the registry. 

1019 """ 

1020 if isinstance(datasetType, DatasetType): 

1021 storage = self._managers.datasets.find(datasetType.name) 

1022 if storage is None: 

1023 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.") 

1024 else: 

1025 storage = self._managers.datasets.find(datasetType) 

1026 if storage is None: 

1027 raise DatasetTypeError(f"DatasetType with name '{datasetType}' has not been registered.") 

1028 if run is None: 

1029 if self.defaults.run is None: 

1030 raise NoDefaultCollectionError( 

1031 "No run provided to insertDatasets, and no default from registry construction." 

1032 ) 

1033 run = self.defaults.run 

1034 runRecord = self._managers.collections.find(run) 

1035 if runRecord.type is not CollectionType.RUN: 

1036 raise CollectionTypeError( 

1037 f"Given collection is of type {runRecord.type.name}; RUN collection required." 

1038 ) 

1039 assert isinstance(runRecord, RunRecord) 

1040 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG) 

1041 if expand: 

1042 expandedDataIds = [ 

1043 self.expandDataId(dataId, dimensions=storage.datasetType.dimensions) 

1044 for dataId in progress.wrap(dataIds, f"Expanding {storage.datasetType.name} data IDs") 

1045 ] 

1046 else: 

1047 expandedDataIds = [ 

1048 DataCoordinate.standardize(dataId, graph=storage.datasetType.dimensions) for dataId in dataIds 

1049 ] 

1050 try: 

1051 refs = list(storage.insert(runRecord, expandedDataIds, idGenerationMode)) 

1052 if self._managers.obscore: 

1053 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

1054 self._managers.obscore.add_datasets(refs, context) 

1055 except sqlalchemy.exc.IntegrityError as err: 

1056 raise ConflictingDefinitionError( 

1057 "A database constraint failure was triggered by inserting " 

1058 f"one or more datasets of type {storage.datasetType} into " 

1059 f"collection '{run}'. " 

1060 "This probably means a dataset with the same data ID " 

1061 "and dataset type already exists, but it may also mean a " 

1062 "dimension row is missing." 

1063 ) from err 

1064 return refs 

1065 

1066 @transactional 

1067 def _importDatasets( 

1068 self, 

1069 datasets: Iterable[DatasetRef], 

1070 expand: bool = True, 

1071 ) -> list[DatasetRef]: 

1072 """Import one or more datasets into the `Registry`. 

1073 

1074 Difference from `insertDatasets` method is that this method accepts 

1075 `DatasetRef` instances which should already be resolved and have a 

1076 dataset ID. If registry supports globally-unique dataset IDs (e.g. 

1077 `uuid.UUID`) then datasets which already exist in the registry will be 

1078 ignored if imported again. 

1079 

1080 Parameters 

1081 ---------- 

1082 datasets : `~collections.abc.Iterable` of `DatasetRef` 

1083 Datasets to be inserted. All `DatasetRef` instances must have 

1084 identical ``datasetType`` and ``run`` attributes. ``run`` 

1085 attribute can be `None` and defaults to ``self.defaults.run``. 

1086 Datasets can specify ``id`` attribute which will be used for 

1087 inserted datasets. All dataset IDs must have the same type 

1088 (`int` or `uuid.UUID`), if type of dataset IDs does not match 

1089 configured backend then IDs will be ignored and new IDs will be 

1090 generated by backend. 

1091 expand : `bool`, optional 

1092 If `True` (default), expand data IDs as they are inserted. This is 

1093 necessary in general, but it may be disabled if the caller can 

1094 guarantee this is unnecessary. 

1095 

1096 Returns 

1097 ------- 

1098 refs : `list` of `DatasetRef` 

1099 Resolved `DatasetRef` instances for all given data IDs (in the same 

1100 order). If any of ``datasets`` has an ID which already exists in 

1101 the database then it will not be inserted or updated, but a 

1102 resolved `DatasetRef` will be returned for it in any case. 

1103 

1104 Raises 

1105 ------ 

1106 lsst.daf.butler.registry.NoDefaultCollectionError 

1107 Raised if ``run`` is `None` and ``self.defaults.run`` is `None`. 

1108 lsst.daf.butler.registry.DatasetTypeError 

1109 Raised if datasets correspond to more than one dataset type or 

1110 dataset type is not known to registry. 

1111 lsst.daf.butler.registry.ConflictingDefinitionError 

1112 If a dataset with the same dataset type and data ID as one of those 

1113 given already exists in ``run``. 

1114 lsst.daf.butler.registry.MissingCollectionError 

1115 Raised if ``run`` does not exist in the registry. 

1116 

1117 Notes 

1118 ----- 

1119 This method is considered package-private and internal to Butler 

1120 implementation. Clients outside daf_butler package should not use this 

1121 method. 

1122 """ 

1123 datasets = list(datasets) 

1124 if not datasets: 

1125 # nothing to do 

1126 return [] 

1127 

1128 # find dataset type 

1129 datasetTypes = {dataset.datasetType for dataset in datasets} 

1130 if len(datasetTypes) != 1: 

1131 raise DatasetTypeError(f"Multiple dataset types in input datasets: {datasetTypes}") 

1132 datasetType = datasetTypes.pop() 

1133 

1134 # get storage handler for this dataset type 

1135 storage = self._managers.datasets.find(datasetType.name) 

1136 if storage is None: 

1137 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.") 

1138 

1139 # find run name 

1140 runs = {dataset.run for dataset in datasets} 

1141 if len(runs) != 1: 

1142 raise ValueError(f"Multiple run names in input datasets: {runs}") 

1143 run = runs.pop() 

1144 

1145 runRecord = self._managers.collections.find(run) 

1146 if runRecord.type is not CollectionType.RUN: 

1147 raise CollectionTypeError( 

1148 f"Given collection '{runRecord.name}' is of type {runRecord.type.name};" 

1149 " RUN collection required." 

1150 ) 

1151 assert isinstance(runRecord, RunRecord) 

1152 

1153 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG) 

1154 if expand: 

1155 expandedDatasets = [ 

1156 dataset.expanded(self.expandDataId(dataset.dataId, dimensions=storage.datasetType.dimensions)) 

1157 for dataset in progress.wrap(datasets, f"Expanding {storage.datasetType.name} data IDs") 

1158 ] 

1159 else: 

1160 expandedDatasets = [ 

1161 DatasetRef(datasetType, dataset.dataId, id=dataset.id, run=dataset.run, conform=True) 

1162 for dataset in datasets 

1163 ] 

1164 

1165 try: 

1166 refs = list(storage.import_(runRecord, expandedDatasets)) 

1167 if self._managers.obscore: 

1168 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

1169 self._managers.obscore.add_datasets(refs, context) 

1170 except sqlalchemy.exc.IntegrityError as err: 

1171 raise ConflictingDefinitionError( 

1172 "A database constraint failure was triggered by inserting " 

1173 f"one or more datasets of type {storage.datasetType} into " 

1174 f"collection '{run}'. " 

1175 "This probably means a dataset with the same data ID " 

1176 "and dataset type already exists, but it may also mean a " 

1177 "dimension row is missing." 

1178 ) from err 

1179 # Check that imported dataset IDs match the input 

1180 for imported_ref, input_ref in zip(refs, datasets, strict=True): 

1181 if imported_ref.id != input_ref.id: 

1182 raise RegistryConsistencyError( 

1183 "Imported dataset ID differs from input dataset ID, " 

1184 f"input ref: {input_ref}, imported ref: {imported_ref}" 

1185 ) 

1186 return refs 

1187 

1188 def getDataset(self, id: DatasetId) -> DatasetRef | None: 

1189 """Retrieve a Dataset entry. 

1190 

1191 Parameters 

1192 ---------- 

1193 id : `DatasetId` 

1194 The unique identifier for the dataset. 

1195 

1196 Returns 

1197 ------- 

1198 ref : `DatasetRef` or `None` 

1199 A ref to the Dataset, or `None` if no matching Dataset 

1200 was found. 

1201 """ 

1202 return self._managers.datasets.getDatasetRef(id) 

1203 

1204 @transactional 

1205 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None: 

1206 """Remove datasets from the Registry. 

1207 

1208 The datasets will be removed unconditionally from all collections, and 

1209 any `Quantum` that consumed this dataset will instead be marked with 

1210 having a NULL input. `Datastore` records will *not* be deleted; the 

1211 caller is responsible for ensuring that the dataset has already been 

1212 removed from all Datastores. 

1213 

1214 Parameters 

1215 ---------- 

1216 refs : `~collections.abc.Iterable` [`DatasetRef`] 

1217 References to the datasets to be removed. Must include a valid 

1218 ``id`` attribute, and should be considered invalidated upon return. 

1219 

1220 Raises 

1221 ------ 

1222 lsst.daf.butler.AmbiguousDatasetError 

1223 Raised if any ``ref.id`` is `None`. 

1224 lsst.daf.butler.registry.OrphanedRecordError 

1225 Raised if any dataset is still present in any `Datastore`. 

1226 """ 

1227 progress = Progress("lsst.daf.butler.Registry.removeDatasets", level=logging.DEBUG) 

1228 for datasetType, refsForType in progress.iter_item_chunks( 

1229 DatasetRef.iter_by_type(refs), desc="Removing datasets by type" 

1230 ): 

1231 storage = self._managers.datasets[datasetType.name] 

1232 try: 

1233 storage.delete(refsForType) 

1234 except sqlalchemy.exc.IntegrityError as err: 

1235 raise OrphanedRecordError( 

1236 "One or more datasets is still present in one or more Datastores." 

1237 ) from err 

1238 

1239 @transactional 

1240 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

1241 """Add existing datasets to a `~CollectionType.TAGGED` collection. 

1242 

1243 If a DatasetRef with the same exact ID is already in a collection 

1244 nothing is changed. If a `DatasetRef` with the same `DatasetType` and 

1245 data ID but with different ID exists in the collection, 

1246 `~lsst.daf.butler.registry.ConflictingDefinitionError` is raised. 

1247 

1248 Parameters 

1249 ---------- 

1250 collection : `str` 

1251 Indicates the collection the datasets should be associated with. 

1252 refs : `~collections.abc.Iterable` [ `DatasetRef` ] 

1253 An iterable of resolved `DatasetRef` instances that already exist 

1254 in this `Registry`. 

1255 

1256 Raises 

1257 ------ 

1258 lsst.daf.butler.registry.ConflictingDefinitionError 

1259 If a Dataset with the given `DatasetRef` already exists in the 

1260 given collection. 

1261 lsst.daf.butler.registry.MissingCollectionError 

1262 Raised if ``collection`` does not exist in the registry. 

1263 lsst.daf.butler.registry.CollectionTypeError 

1264 Raise adding new datasets to the given ``collection`` is not 

1265 allowed. 

1266 """ 

1267 progress = Progress("lsst.daf.butler.Registry.associate", level=logging.DEBUG) 

1268 collectionRecord = self._managers.collections.find(collection) 

1269 if collectionRecord.type is not CollectionType.TAGGED: 

1270 raise CollectionTypeError( 

1271 f"Collection '{collection}' has type {collectionRecord.type.name}, not TAGGED." 

1272 ) 

1273 for datasetType, refsForType in progress.iter_item_chunks( 

1274 DatasetRef.iter_by_type(refs), desc="Associating datasets by type" 

1275 ): 

1276 storage = self._managers.datasets[datasetType.name] 

1277 try: 

1278 storage.associate(collectionRecord, refsForType) 

1279 if self._managers.obscore: 

1280 # If a TAGGED collection is being monitored by ObsCore 

1281 # manager then we may need to save the dataset. 

1282 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

1283 self._managers.obscore.associate(refsForType, collectionRecord, context) 

1284 except sqlalchemy.exc.IntegrityError as err: 

1285 raise ConflictingDefinitionError( 

1286 f"Constraint violation while associating dataset of type {datasetType.name} with " 

1287 f"collection {collection}. This probably means that one or more datasets with the same " 

1288 "dataset type and data ID already exist in the collection, but it may also indicate " 

1289 "that the datasets do not exist." 

1290 ) from err 

1291 

1292 @transactional 

1293 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

1294 """Remove existing datasets from a `~CollectionType.TAGGED` collection. 

1295 

1296 ``collection`` and ``ref`` combinations that are not currently 

1297 associated are silently ignored. 

1298 

1299 Parameters 

1300 ---------- 

1301 collection : `str` 

1302 The collection the datasets should no longer be associated with. 

1303 refs : `~collections.abc.Iterable` [ `DatasetRef` ] 

1304 An iterable of resolved `DatasetRef` instances that already exist 

1305 in this `Registry`. 

1306 

1307 Raises 

1308 ------ 

1309 lsst.daf.butler.AmbiguousDatasetError 

1310 Raised if any of the given dataset references is unresolved. 

1311 lsst.daf.butler.registry.MissingCollectionError 

1312 Raised if ``collection`` does not exist in the registry. 

1313 lsst.daf.butler.registry.CollectionTypeError 

1314 Raise adding new datasets to the given ``collection`` is not 

1315 allowed. 

1316 """ 

1317 progress = Progress("lsst.daf.butler.Registry.disassociate", level=logging.DEBUG) 

1318 collectionRecord = self._managers.collections.find(collection) 

1319 if collectionRecord.type is not CollectionType.TAGGED: 

1320 raise CollectionTypeError( 

1321 f"Collection '{collection}' has type {collectionRecord.type.name}; expected TAGGED." 

1322 ) 

1323 for datasetType, refsForType in progress.iter_item_chunks( 

1324 DatasetRef.iter_by_type(refs), desc="Disassociating datasets by type" 

1325 ): 

1326 storage = self._managers.datasets[datasetType.name] 

1327 storage.disassociate(collectionRecord, refsForType) 

1328 if self._managers.obscore: 

1329 self._managers.obscore.disassociate(refsForType, collectionRecord) 

1330 

1331 @transactional 

1332 def certify(self, collection: str, refs: Iterable[DatasetRef], timespan: Timespan) -> None: 

1333 """Associate one or more datasets with a calibration collection and a 

1334 validity range within it. 

1335 

1336 Parameters 

1337 ---------- 

1338 collection : `str` 

1339 The name of an already-registered `~CollectionType.CALIBRATION` 

1340 collection. 

1341 refs : `~collections.abc.Iterable` [ `DatasetRef` ] 

1342 Datasets to be associated. 

1343 timespan : `Timespan` 

1344 The validity range for these datasets within the collection. 

1345 

1346 Raises 

1347 ------ 

1348 lsst.daf.butler.AmbiguousDatasetError 

1349 Raised if any of the given `DatasetRef` instances is unresolved. 

1350 lsst.daf.butler.registry.ConflictingDefinitionError 

1351 Raised if the collection already contains a different dataset with 

1352 the same `DatasetType` and data ID and an overlapping validity 

1353 range. 

1354 lsst.daf.butler.registry.CollectionTypeError 

1355 Raised if ``collection`` is not a `~CollectionType.CALIBRATION` 

1356 collection or if one or more datasets are of a dataset type for 

1357 which `DatasetType.isCalibration` returns `False`. 

1358 """ 

1359 progress = Progress("lsst.daf.butler.Registry.certify", level=logging.DEBUG) 

1360 collectionRecord = self._managers.collections.find(collection) 

1361 for datasetType, refsForType in progress.iter_item_chunks( 

1362 DatasetRef.iter_by_type(refs), desc="Certifying datasets by type" 

1363 ): 

1364 storage = self._managers.datasets[datasetType.name] 

1365 storage.certify( 

1366 collectionRecord, 

1367 refsForType, 

1368 timespan, 

1369 context=queries.SqlQueryContext(self._db, self._managers.column_types), 

1370 ) 

1371 

1372 @transactional 

1373 def decertify( 

1374 self, 

1375 collection: str, 

1376 datasetType: str | DatasetType, 

1377 timespan: Timespan, 

1378 *, 

1379 dataIds: Iterable[DataId] | None = None, 

1380 ) -> None: 

1381 """Remove or adjust datasets to clear a validity range within a 

1382 calibration collection. 

1383 

1384 Parameters 

1385 ---------- 

1386 collection : `str` 

1387 The name of an already-registered `~CollectionType.CALIBRATION` 

1388 collection. 

1389 datasetType : `str` or `DatasetType` 

1390 Name or `DatasetType` instance for the datasets to be decertified. 

1391 timespan : `Timespan`, optional 

1392 The validity range to remove datasets from within the collection. 

1393 Datasets that overlap this range but are not contained by it will 

1394 have their validity ranges adjusted to not overlap it, which may 

1395 split a single dataset validity range into two. 

1396 dataIds : iterable [`dict` or `DataCoordinate`], optional 

1397 Data IDs that should be decertified within the given validity range 

1398 If `None`, all data IDs for ``self.datasetType`` will be 

1399 decertified. 

1400 

1401 Raises 

1402 ------ 

1403 lsst.daf.butler.registry.CollectionTypeError 

1404 Raised if ``collection`` is not a `~CollectionType.CALIBRATION` 

1405 collection or if ``datasetType.isCalibration() is False``. 

1406 """ 

1407 collectionRecord = self._managers.collections.find(collection) 

1408 if isinstance(datasetType, str): 

1409 storage = self._managers.datasets[datasetType] 

1410 else: 

1411 storage = self._managers.datasets[datasetType.name] 

1412 standardizedDataIds = None 

1413 if dataIds is not None: 

1414 standardizedDataIds = [ 

1415 DataCoordinate.standardize(d, dimensions=storage.datasetType.dimensions) for d in dataIds 

1416 ] 

1417 storage.decertify( 

1418 collectionRecord, 

1419 timespan, 

1420 dataIds=standardizedDataIds, 

1421 context=queries.SqlQueryContext(self._db, self._managers.column_types), 

1422 ) 

1423 

1424 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager: 

1425 """Return an object that allows a new `Datastore` instance to 

1426 communicate with this `Registry`. 

1427 

1428 Returns 

1429 ------- 

1430 manager : `~.interfaces.DatastoreRegistryBridgeManager` 

1431 Object that mediates communication between this `Registry` and its 

1432 associated datastores. 

1433 """ 

1434 return self._managers.datastores 

1435 

1436 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]: 

1437 """Retrieve datastore locations for a given dataset. 

1438 

1439 Parameters 

1440 ---------- 

1441 ref : `DatasetRef` 

1442 A reference to the dataset for which to retrieve storage 

1443 information. 

1444 

1445 Returns 

1446 ------- 

1447 datastores : `~collections.abc.Iterable` [ `str` ] 

1448 All the matching datastores holding this dataset. 

1449 

1450 Raises 

1451 ------ 

1452 lsst.daf.butler.AmbiguousDatasetError 

1453 Raised if ``ref.id`` is `None`. 

1454 """ 

1455 return self._managers.datastores.findDatastores(ref) 

1456 

1457 def expandDataId( 

1458 self, 

1459 dataId: DataId | None = None, 

1460 *, 

1461 dimensions: Iterable[str] | DimensionGroup | DimensionGraph | None = None, 

1462 graph: DimensionGraph | None = None, 

1463 records: NameLookupMapping[DimensionElement, DimensionRecord | None] | None = None, 

1464 withDefaults: bool = True, 

1465 **kwargs: Any, 

1466 ) -> DataCoordinate: 

1467 """Expand a dimension-based data ID to include additional information. 

1468 

1469 Parameters 

1470 ---------- 

1471 dataId : `DataCoordinate` or `dict`, optional 

1472 Data ID to be expanded; augmented and overridden by ``kwargs``. 

1473 dimensions : `~collections.abc.Iterable` [ `str` ], \ 

1474 `DimensionGroup`, or `DimensionGraph`, optional 

1475 The dimensions to be identified by the new `DataCoordinate`. 

1476 If not provided, will be inferred from the keys of ``mapping`` and 

1477 ``**kwargs``, and ``universe`` must be provided unless ``mapping`` 

1478 is already a `DataCoordinate`. 

1479 graph : `DimensionGraph`, optional 

1480 Like ``dimensions``, but as a ``DimensionGraph`` instance. Ignored 

1481 if ``dimensions`` is provided. Deprecated and will be removed 

1482 after v27. 

1483 records : `~collections.abc.Mapping` [`str`, `DimensionRecord`], \ 

1484 optional 

1485 Dimension record data to use before querying the database for that 

1486 data, keyed by element name. 

1487 withDefaults : `bool`, optional 

1488 Utilize ``self.defaults.dataId`` to fill in missing governor 

1489 dimension key-value pairs. Defaults to `True` (i.e. defaults are 

1490 used). 

1491 **kwargs 

1492 Additional keywords are treated like additional key-value pairs for 

1493 ``dataId``, extending and overriding 

1494 

1495 Returns 

1496 ------- 

1497 expanded : `DataCoordinate` 

1498 A data ID that includes full metadata for all of the dimensions it 

1499 identifies, i.e. guarantees that ``expanded.hasRecords()`` and 

1500 ``expanded.hasFull()`` both return `True`. 

1501 

1502 Raises 

1503 ------ 

1504 lsst.daf.butler.registry.DataIdError 

1505 Raised when ``dataId`` or keyword arguments specify unknown 

1506 dimensions or values, or when a resulting data ID contains 

1507 contradictory key-value pairs, according to dimension 

1508 relationships. 

1509 

1510 Notes 

1511 ----- 

1512 This method cannot be relied upon to reject invalid data ID values 

1513 for dimensions that do actually not have any record columns. For 

1514 efficiency reasons the records for these dimensions (which have only 

1515 dimension key values that are given by the caller) may be constructed 

1516 directly rather than obtained from the registry database. 

1517 """ 

1518 if not withDefaults: 

1519 defaults = None 

1520 else: 

1521 defaults = self.defaults.dataId 

1522 try: 

1523 standardized = DataCoordinate.standardize( 

1524 dataId, 

1525 graph=graph, 

1526 dimensions=dimensions, 

1527 universe=self.dimensions, 

1528 defaults=defaults, 

1529 **kwargs, 

1530 ) 

1531 except KeyError as exc: 

1532 # This means either kwargs have some odd name or required 

1533 # dimension is missing. 

1534 raise DimensionNameError(str(exc)) from exc 

1535 if standardized.hasRecords(): 

1536 return standardized 

1537 if records is None: 

1538 records = {} 

1539 elif isinstance(records, NamedKeyMapping): 

1540 records = records.byName() 

1541 else: 

1542 records = dict(records) 

1543 if isinstance(dataId, DataCoordinate) and dataId.hasRecords(): 

1544 for element_name in dataId.dimensions.elements: 

1545 records[element_name] = dataId.records[element_name] 

1546 keys = dict(standardized.mapping) 

1547 context = queries.SqlQueryContext(self._db, self._managers.column_types) 

1548 for element_name in standardized.dimensions.lookup_order: 

1549 element = self.dimensions[element_name] 

1550 record = records.get(element_name, ...) # Use ... to mean not found; None might mean NULL 

1551 if record is ...: 

1552 if element_name in self.dimensions.dimensions.names and keys.get(element_name) is None: 

1553 if element_name in standardized.dimensions.required: 

1554 raise DimensionNameError( 

1555 f"No value or null value for required dimension {element_name}." 

1556 ) 

1557 keys[element_name] = None 

1558 record = None 

1559 else: 

1560 storage = self._managers.dimensions[element_name] 

1561 record = storage.fetch_one( 

1562 DataCoordinate.standardize(keys, dimensions=element.minimal_group), context 

1563 ) 

1564 records[element_name] = record 

1565 if record is not None: 

1566 for d in element.implied: 

1567 value = getattr(record, d.name) 

1568 if keys.setdefault(d.name, value) != value: 

1569 raise InconsistentDataIdError( 

1570 f"Data ID {standardized} has {d.name}={keys[d.name]!r}, " 

1571 f"but {element_name} implies {d.name}={value!r}." 

1572 ) 

1573 else: 

1574 if element_name in standardized.dimensions.required: 

1575 raise DataIdValueError( 

1576 f"Could not fetch record for required dimension {element.name} via keys {keys}." 

1577 ) 

1578 if element.alwaysJoin: 

1579 raise InconsistentDataIdError( 

1580 f"Could not fetch record for element {element_name} via keys {keys}, ", 

1581 "but it is marked alwaysJoin=True; this means one or more dimensions are not " 

1582 "related.", 

1583 ) 

1584 for d in element.implied: 

1585 keys.setdefault(d.name, None) 

1586 records.setdefault(d.name, None) 

1587 return DataCoordinate.standardize(keys, dimensions=standardized.dimensions).expanded(records=records) 

1588 

1589 def insertDimensionData( 

1590 self, 

1591 element: DimensionElement | str, 

1592 *data: Mapping[str, Any] | DimensionRecord, 

1593 conform: bool = True, 

1594 replace: bool = False, 

1595 skip_existing: bool = False, 

1596 ) -> None: 

1597 """Insert one or more dimension records into the database. 

1598 

1599 Parameters 

1600 ---------- 

1601 element : `DimensionElement` or `str` 

1602 The `DimensionElement` or name thereof that identifies the table 

1603 records will be inserted into. 

1604 *data : `dict` or `DimensionRecord` 

1605 One or more records to insert. 

1606 conform : `bool`, optional 

1607 If `False` (`True` is default) perform no checking or conversions, 

1608 and assume that ``element`` is a `DimensionElement` instance and 

1609 ``data`` is a one or more `DimensionRecord` instances of the 

1610 appropriate subclass. 

1611 replace : `bool`, optional 

1612 If `True` (`False` is default), replace existing records in the 

1613 database if there is a conflict. 

1614 skip_existing : `bool`, optional 

1615 If `True` (`False` is default), skip insertion if a record with 

1616 the same primary key values already exists. Unlike 

1617 `syncDimensionData`, this will not detect when the given record 

1618 differs from what is in the database, and should not be used when 

1619 this is a concern. 

1620 """ 

1621 if conform: 

1622 if isinstance(element, str): 

1623 element = self.dimensions[element] 

1624 records = [ 

1625 row if isinstance(row, DimensionRecord) else element.RecordClass(**row) for row in data 

1626 ] 

1627 else: 

1628 # Ignore typing since caller said to trust them with conform=False. 

1629 records = data # type: ignore 

1630 storage = self._managers.dimensions[element] 

1631 storage.insert(*records, replace=replace, skip_existing=skip_existing) 

1632 

1633 def syncDimensionData( 

1634 self, 

1635 element: DimensionElement | str, 

1636 row: Mapping[str, Any] | DimensionRecord, 

1637 conform: bool = True, 

1638 update: bool = False, 

1639 ) -> bool | dict[str, Any]: 

1640 """Synchronize the given dimension record with the database, inserting 

1641 if it does not already exist and comparing values if it does. 

1642 

1643 Parameters 

1644 ---------- 

1645 element : `DimensionElement` or `str` 

1646 The `DimensionElement` or name thereof that identifies the table 

1647 records will be inserted into. 

1648 row : `dict` or `DimensionRecord` 

1649 The record to insert. 

1650 conform : `bool`, optional 

1651 If `False` (`True` is default) perform no checking or conversions, 

1652 and assume that ``element`` is a `DimensionElement` instance and 

1653 ``data`` is a one or more `DimensionRecord` instances of the 

1654 appropriate subclass. 

1655 update : `bool`, optional 

1656 If `True` (`False` is default), update the existing record in the 

1657 database if there is a conflict. 

1658 

1659 Returns 

1660 ------- 

1661 inserted_or_updated : `bool` or `dict` 

1662 `True` if a new row was inserted, `False` if no changes were 

1663 needed, or a `dict` mapping updated column names to their old 

1664 values if an update was performed (only possible if 

1665 ``update=True``). 

1666 

1667 Raises 

1668 ------ 

1669 lsst.daf.butler.registry.ConflictingDefinitionError 

1670 Raised if the record exists in the database (according to primary 

1671 key lookup) but is inconsistent with the given one. 

1672 """ 

1673 if conform: 

1674 if isinstance(element, str): 

1675 element = self.dimensions[element] 

1676 record = row if isinstance(row, DimensionRecord) else element.RecordClass(**row) 

1677 else: 

1678 # Ignore typing since caller said to trust them with conform=False. 

1679 record = row # type: ignore 

1680 storage = self._managers.dimensions[element] 

1681 return storage.sync(record, update=update) 

1682 

1683 def queryDatasetTypes( 

1684 self, 

1685 expression: Any = ..., 

1686 *, 

1687 components: bool | None = False, 

1688 missing: list[str] | None = None, 

1689 ) -> Iterable[DatasetType]: 

1690 """Iterate over the dataset types whose names match an expression. 

1691 

1692 Parameters 

1693 ---------- 

1694 expression : dataset type expression, optional 

1695 An expression that fully or partially identifies the dataset types 

1696 to return, such as a `str`, `re.Pattern`, or iterable thereof. 

1697 ``...`` can be used to return all dataset types, and is the 

1698 default. See :ref:`daf_butler_dataset_type_expressions` for more 

1699 information. 

1700 components : `bool`, optional 

1701 If `True`, apply all expression patterns to component dataset type 

1702 names as well. If `False`, never apply patterns to components. 

1703 If `None`, apply patterns to components only if their 

1704 parent datasets were not matched by the expression. 

1705 Fully-specified component datasets (`str` or `DatasetType` 

1706 instances) are always included. 

1707 

1708 Values other than `False` are deprecated, and only `False` will be 

1709 supported after v26. After v27 this argument will be removed 

1710 entirely. 

1711 missing : `list` of `str`, optional 

1712 String dataset type names that were explicitly given (i.e. not 

1713 regular expression patterns) but not found will be appended to this 

1714 list, if it is provided. 

1715 

1716 Returns 

1717 ------- 

1718 dataset_types : `~collections.abc.Iterable` [ `DatasetType`] 

1719 An `~collections.abc.Iterable` of `DatasetType` instances whose 

1720 names match ``expression``. 

1721 

1722 Raises 

1723 ------ 

1724 lsst.daf.butler.registry.DatasetTypeExpressionError 

1725 Raised when ``expression`` is invalid. 

1726 """ 

1727 wildcard = DatasetTypeWildcard.from_expression(expression) 

1728 composition_dict = self._managers.datasets.resolve_wildcard( 

1729 wildcard, 

1730 components=components, 

1731 missing=missing, 

1732 ) 

1733 result: list[DatasetType] = [] 

1734 for parent_dataset_type, components_for_parent in composition_dict.items(): 

1735 result.extend( 

1736 parent_dataset_type.makeComponentDatasetType(c) if c is not None else parent_dataset_type 

1737 for c in components_for_parent 

1738 ) 

1739 return result 

1740 

1741 def queryCollections( 

1742 self, 

1743 expression: Any = ..., 

1744 datasetType: DatasetType | None = None, 

1745 collectionTypes: Iterable[CollectionType] | CollectionType = CollectionType.all(), 

1746 flattenChains: bool = False, 

1747 includeChains: bool | None = None, 

1748 ) -> Sequence[str]: 

1749 """Iterate over the collections whose names match an expression. 

1750 

1751 Parameters 

1752 ---------- 

1753 expression : collection expression, optional 

1754 An expression that identifies the collections to return, such as 

1755 a `str` (for full matches or partial matches via globs), 

1756 `re.Pattern` (for partial matches), or iterable thereof. ``...`` 

1757 can be used to return all collections, and is the default. 

1758 See :ref:`daf_butler_collection_expressions` for more information. 

1759 datasetType : `DatasetType`, optional 

1760 If provided, only yield collections that may contain datasets of 

1761 this type. This is a conservative approximation in general; it may 

1762 yield collections that do not have any such datasets. 

1763 collectionTypes : `~collections.abc.Set` [`CollectionType`] or \ 

1764 `CollectionType`, optional 

1765 If provided, only yield collections of these types. 

1766 flattenChains : `bool`, optional 

1767 If `True` (`False` is default), recursively yield the child 

1768 collections of matching `~CollectionType.CHAINED` collections. 

1769 includeChains : `bool`, optional 

1770 If `True`, yield records for matching `~CollectionType.CHAINED` 

1771 collections. Default is the opposite of ``flattenChains``: include 

1772 either CHAINED collections or their children, but not both. 

1773 

1774 Returns 

1775 ------- 

1776 collections : `~collections.abc.Sequence` [ `str` ] 

1777 The names of collections that match ``expression``. 

1778 

1779 Raises 

1780 ------ 

1781 lsst.daf.butler.registry.CollectionExpressionError 

1782 Raised when ``expression`` is invalid. 

1783 

1784 Notes 

1785 ----- 

1786 The order in which collections are returned is unspecified, except that 

1787 the children of a `~CollectionType.CHAINED` collection are guaranteed 

1788 to be in the order in which they are searched. When multiple parent 

1789 `~CollectionType.CHAINED` collections match the same criteria, the 

1790 order in which the two lists appear is unspecified, and the lists of 

1791 children may be incomplete if a child has multiple parents. 

1792 """ 

1793 # Right now the datasetTypes argument is completely ignored, but that 

1794 # is consistent with its [lack of] guarantees. DM-24939 or a follow-up 

1795 # ticket will take care of that. 

1796 try: 

1797 wildcard = CollectionWildcard.from_expression(expression) 

1798 except TypeError as exc: 

1799 raise CollectionExpressionError(f"Invalid collection expression '{expression}'") from exc 

1800 collectionTypes = ensure_iterable(collectionTypes) 

1801 return [ 

1802 record.name 

1803 for record in self._managers.collections.resolve_wildcard( 

1804 wildcard, 

1805 collection_types=frozenset(collectionTypes), 

1806 flatten_chains=flattenChains, 

1807 include_chains=includeChains, 

1808 ) 

1809 ] 

1810 

1811 def _makeQueryBuilder( 

1812 self, 

1813 summary: queries.QuerySummary, 

1814 doomed_by: Iterable[str] = (), 

1815 ) -> queries.QueryBuilder: 

1816 """Return a `QueryBuilder` instance capable of constructing and 

1817 managing more complex queries than those obtainable via `Registry` 

1818 interfaces. 

1819 

1820 This is an advanced interface; downstream code should prefer 

1821 `Registry.queryDataIds` and `Registry.queryDatasets` whenever those 

1822 are sufficient. 

1823 

1824 Parameters 

1825 ---------- 

1826 summary : `queries.QuerySummary` 

1827 Object describing and categorizing the full set of dimensions that 

1828 will be included in the query. 

1829 doomed_by : `~collections.abc.Iterable` of `str`, optional 

1830 A list of diagnostic messages that indicate why the query is going 

1831 to yield no results and should not even be executed. If an empty 

1832 container (default) the query will be executed unless other code 

1833 determines that it is doomed. 

1834 

1835 Returns 

1836 ------- 

1837 builder : `queries.QueryBuilder` 

1838 Object that can be used to construct and perform advanced queries. 

1839 """ 

1840 doomed_by = list(doomed_by) 

1841 backend = queries.SqlQueryBackend(self._db, self._managers) 

1842 context = backend.context() 

1843 relation: Relation | None = None 

1844 if doomed_by: 

1845 relation = LeafRelation.make_doomed(context.sql_engine, set(), doomed_by) 

1846 return queries.QueryBuilder( 

1847 summary, 

1848 backend=backend, 

1849 context=context, 

1850 relation=relation, 

1851 ) 

1852 

1853 def _standardize_query_data_id_args( 

1854 self, data_id: DataId | None, *, doomed_by: list[str], **kwargs: Any 

1855 ) -> DataCoordinate: 

1856 """Preprocess the data ID arguments passed to query* methods. 

1857 

1858 Parameters 

1859 ---------- 

1860 data_id : `DataId` or `None` 

1861 Data ID that constrains the query results. 

1862 doomed_by : `list` [ `str` ] 

1863 List to append messages indicating why the query is doomed to 

1864 yield no results. 

1865 **kwargs 

1866 Additional data ID key-value pairs, extending and overriding 

1867 ``data_id``. 

1868 

1869 Returns 

1870 ------- 

1871 data_id : `DataCoordinate` 

1872 Standardized data ID. Will be fully expanded unless expansion 

1873 fails, in which case a message will be appended to ``doomed_by`` 

1874 on return. 

1875 """ 

1876 try: 

1877 return self.expandDataId(data_id, **kwargs) 

1878 except DataIdValueError as err: 

1879 doomed_by.append(str(err)) 

1880 return DataCoordinate.standardize( 

1881 data_id, **kwargs, universe=self.dimensions, defaults=self.defaults.dataId 

1882 ) 

1883 

1884 def _standardize_query_dataset_args( 

1885 self, 

1886 datasets: Any, 

1887 collections: CollectionArgType | None, 

1888 components: bool | None, 

1889 mode: Literal["find_first"] | Literal["find_all"] | Literal["constrain"] = "constrain", 

1890 *, 

1891 doomed_by: list[str], 

1892 ) -> tuple[dict[DatasetType, list[str | None]], CollectionWildcard | None]: 

1893 """Preprocess dataset arguments passed to query* methods. 

1894 

1895 Parameters 

1896 ---------- 

1897 datasets : `DatasetType`, `str`, `re.Pattern`, or iterable of these 

1898 Expression identifying dataset types. See `queryDatasetTypes` for 

1899 details. 

1900 collections : `str`, `re.Pattern`, or iterable of these 

1901 Expression identifying collections to be searched. See 

1902 `queryCollections` for details. 

1903 components : `bool`, optional 

1904 If `True`, apply all expression patterns to component dataset type 

1905 names as well. If `False`, never apply patterns to components. 

1906 If `None` (default), apply patterns to components only if their 

1907 parent datasets were not matched by the expression. 

1908 Fully-specified component datasets (`str` or `DatasetType` 

1909 instances) are always included. 

1910 

1911 Values other than `False` are deprecated, and only `False` will be 

1912 supported after v26. After v27 this argument will be removed 

1913 entirely. 

1914 mode : `str`, optional 

1915 The way in which datasets are being used in this query; one of: 

1916 

1917 - "find_first": this is a query for the first dataset in an 

1918 ordered list of collections. Prohibits collection wildcards, 

1919 but permits dataset type wildcards. 

1920 

1921 - "find_all": this is a query for all datasets in all matched 

1922 collections. Permits collection and dataset type wildcards. 

1923 

1924 - "constrain": this is a query for something other than datasets, 

1925 with results constrained by dataset existence. Permits 

1926 collection wildcards and prohibits ``...`` as a dataset type 

1927 wildcard. 

1928 doomed_by : `list` [ `str` ] 

1929 List to append messages indicating why the query is doomed to 

1930 yield no results. 

1931 

1932 Returns 

1933 ------- 

1934 composition : `defaultdict` [ `DatasetType`, `list` [ `str` ] ] 

1935 Dictionary mapping parent dataset type to `list` of components 

1936 matched for that dataset type (or `None` for the parent itself). 

1937 collections : `CollectionWildcard` 

1938 Processed collection expression. 

1939 """ 

1940 composition: dict[DatasetType, list[str | None]] = {} 

1941 collection_wildcard: CollectionWildcard | None = None 

1942 if datasets is not None: 

1943 if collections is None: 

1944 if not self.defaults.collections: 

1945 raise NoDefaultCollectionError("No collections, and no registry default collections.") 

1946 collection_wildcard = CollectionWildcard.from_expression(self.defaults.collections) 

1947 else: 

1948 collection_wildcard = CollectionWildcard.from_expression(collections) 

1949 if mode == "find_first" and collection_wildcard.patterns: 

1950 raise TypeError( 

1951 f"Collection pattern(s) {collection_wildcard.patterns} not allowed in this context." 

1952 ) 

1953 missing: list[str] = [] 

1954 composition = self._managers.datasets.resolve_wildcard( 

1955 datasets, components=components, missing=missing, explicit_only=(mode == "constrain") 

1956 ) 

1957 if missing and mode == "constrain": 

1958 # After v26 this should raise MissingDatasetTypeError, to be 

1959 # implemented on DM-36303. 

1960 warnings.warn( 

1961 f"Dataset type(s) {missing} are not registered; this will be an error after v26.", 

1962 FutureWarning, 

1963 stacklevel=find_outside_stacklevel("lsst.daf.butler"), 

1964 ) 

1965 doomed_by.extend(f"Dataset type {name} is not registered." for name in missing) 

1966 elif collections: 

1967 # I think this check should actually be `collections is not None`, 

1968 # but it looks like some CLI scripts use empty tuple as default. 

1969 raise ArgumentError(f"Cannot pass 'collections' (='{collections}') without 'datasets'.") 

1970 return composition, collection_wildcard 

1971 

1972 def queryDatasets( 

1973 self, 

1974 datasetType: Any, 

1975 *, 

1976 collections: CollectionArgType | None = None, 

1977 dimensions: Iterable[Dimension | str] | None = None, 

1978 dataId: DataId | None = None, 

1979 where: str = "", 

1980 findFirst: bool = False, 

1981 components: bool | None = False, 

1982 bind: Mapping[str, Any] | None = None, 

1983 check: bool = True, 

1984 **kwargs: Any, 

1985 ) -> queries.DatasetQueryResults: 

1986 """Query for and iterate over dataset references matching user-provided 

1987 criteria. 

1988 

1989 Parameters 

1990 ---------- 

1991 datasetType : dataset type expression 

1992 An expression that fully or partially identifies the dataset types 

1993 to be queried. Allowed types include `DatasetType`, `str`, 

1994 `re.Pattern`, and iterables thereof. The special value ``...`` can 

1995 be used to query all dataset types. See 

1996 :ref:`daf_butler_dataset_type_expressions` for more information. 

1997 collections : collection expression, optional 

1998 An expression that identifies the collections to search, such as a 

1999 `str` (for full matches or partial matches via globs), `re.Pattern` 

2000 (for partial matches), or iterable thereof. ``...`` can be used to 

2001 search all collections (actually just all `~CollectionType.RUN` 

2002 collections, because this will still find all datasets). 

2003 If not provided, ``self.default.collections`` is used. See 

2004 :ref:`daf_butler_collection_expressions` for more information. 

2005 dimensions : `~collections.abc.Iterable` of `Dimension` or `str` 

2006 Dimensions to include in the query (in addition to those used 

2007 to identify the queried dataset type(s)), either to constrain 

2008 the resulting datasets to those for which a matching dimension 

2009 exists, or to relate the dataset type's dimensions to dimensions 

2010 referenced by the ``dataId`` or ``where`` arguments. 

2011 dataId : `dict` or `DataCoordinate`, optional 

2012 A data ID whose key-value pairs are used as equality constraints 

2013 in the query. 

2014 where : `str`, optional 

2015 A string expression similar to a SQL WHERE clause. May involve 

2016 any column of a dimension table or (as a shortcut for the primary 

2017 key column of a dimension table) dimension name. See 

2018 :ref:`daf_butler_dimension_expressions` for more information. 

2019 findFirst : `bool`, optional 

2020 If `True` (`False` is default), for each result data ID, only 

2021 yield one `DatasetRef` of each `DatasetType`, from the first 

2022 collection in which a dataset of that dataset type appears 

2023 (according to the order of ``collections`` passed in). If `True`, 

2024 ``collections`` must not contain regular expressions and may not 

2025 be ``...``. 

2026 components : `bool`, optional 

2027 If `True`, apply all dataset expression patterns to component 

2028 dataset type names as well. If `False`, never apply patterns to 

2029 components. If `None`, apply patterns to components only 

2030 if their parent datasets were not matched by the expression. 

2031 Fully-specified component datasets (`str` or `DatasetType` 

2032 instances) are always included. 

2033 

2034 Values other than `False` are deprecated, and only `False` will be 

2035 supported after v26. After v27 this argument will be removed 

2036 entirely. 

2037 bind : `~collections.abc.Mapping`, optional 

2038 Mapping containing literal values that should be injected into the 

2039 ``where`` expression, keyed by the identifiers they replace. 

2040 Values of collection type can be expanded in some cases; see 

2041 :ref:`daf_butler_dimension_expressions_identifiers` for more 

2042 information. 

2043 check : `bool`, optional 

2044 If `True` (default) check the query for consistency before 

2045 executing it. This may reject some valid queries that resemble 

2046 common mistakes (e.g. queries for visits without specifying an 

2047 instrument). 

2048 **kwargs 

2049 Additional keyword arguments are forwarded to 

2050 `DataCoordinate.standardize` when processing the ``dataId`` 

2051 argument (and may be used to provide a constraining data ID even 

2052 when the ``dataId`` argument is `None`). 

2053 

2054 Returns 

2055 ------- 

2056 refs : `.queries.DatasetQueryResults` 

2057 Dataset references matching the given query criteria. Nested data 

2058 IDs are guaranteed to include values for all implied dimensions 

2059 (i.e. `DataCoordinate.hasFull` will return `True`), but will not 

2060 include dimension records (`DataCoordinate.hasRecords` will be 

2061 `False`) unless `~.queries.DatasetQueryResults.expanded` is 

2062 called on the result object (which returns a new one). 

2063 

2064 Raises 

2065 ------ 

2066 lsst.daf.butler.registry.DatasetTypeExpressionError 

2067 Raised when ``datasetType`` expression is invalid. 

2068 TypeError 

2069 Raised when the arguments are incompatible, such as when a 

2070 collection wildcard is passed when ``findFirst`` is `True`, or 

2071 when ``collections`` is `None` and ``self.defaults.collections`` is 

2072 also `None`. 

2073 lsst.daf.butler.registry.DataIdError 

2074 Raised when ``dataId`` or keyword arguments specify unknown 

2075 dimensions or values, or when they contain inconsistent values. 

2076 lsst.daf.butler.registry.UserExpressionError 

2077 Raised when ``where`` expression is invalid. 

2078 

2079 Notes 

2080 ----- 

2081 When multiple dataset types are queried in a single call, the 

2082 results of this operation are equivalent to querying for each dataset 

2083 type separately in turn, and no information about the relationships 

2084 between datasets of different types is included. In contexts where 

2085 that kind of information is important, the recommended pattern is to 

2086 use `queryDataIds` to first obtain data IDs (possibly with the 

2087 desired dataset types and collections passed as constraints to the 

2088 query), and then use multiple (generally much simpler) calls to 

2089 `queryDatasets` with the returned data IDs passed as constraints. 

2090 """ 

2091 doomed_by: list[str] = [] 

2092 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs) 

2093 dataset_composition, collection_wildcard = self._standardize_query_dataset_args( 

2094 datasetType, 

2095 collections, 

2096 components, 

2097 mode="find_first" if findFirst else "find_all", 

2098 doomed_by=doomed_by, 

2099 ) 

2100 if collection_wildcard is not None and collection_wildcard.empty(): 

2101 doomed_by.append("No datasets can be found because collection list is empty.") 

2102 return queries.ChainedDatasetQueryResults([], doomed_by=doomed_by) 

2103 parent_results: list[queries.ParentDatasetQueryResults] = [] 

2104 for parent_dataset_type, components_for_parent in dataset_composition.items(): 

2105 # The full set of dimensions in the query is the combination of 

2106 # those needed for the DatasetType and those explicitly requested, 

2107 # if any. 

2108 dimension_names = set(parent_dataset_type.dimensions.names) 

2109 if dimensions is not None: 

2110 dimension_names.update(self.dimensions.conform(dimensions).names) 

2111 # Construct the summary structure needed to construct a 

2112 # QueryBuilder. 

2113 summary = queries.QuerySummary( 

2114 requested=self.dimensions.conform(dimension_names), 

2115 column_types=self._managers.column_types, 

2116 data_id=data_id, 

2117 expression=where, 

2118 bind=bind, 

2119 defaults=self.defaults.dataId, 

2120 check=check, 

2121 datasets=[parent_dataset_type], 

2122 ) 

2123 builder = self._makeQueryBuilder(summary) 

2124 # Add the dataset subquery to the query, telling the QueryBuilder 

2125 # to include the rank of the selected collection in the results 

2126 # only if we need to findFirst. Note that if any of the 

2127 # collections are actually wildcard expressions, and 

2128 # findFirst=True, this will raise TypeError for us. 

2129 builder.joinDataset(parent_dataset_type, collection_wildcard, isResult=True, findFirst=findFirst) 

2130 query = builder.finish() 

2131 parent_results.append( 

2132 queries.ParentDatasetQueryResults( 

2133 query, parent_dataset_type, components=components_for_parent 

2134 ) 

2135 ) 

2136 if not parent_results: 

2137 doomed_by.extend( 

2138 f"No registered dataset type matching {t!r} found, so no matching datasets can " 

2139 "exist in any collection." 

2140 for t in ensure_iterable(datasetType) 

2141 ) 

2142 return queries.ChainedDatasetQueryResults([], doomed_by=doomed_by) 

2143 elif len(parent_results) == 1: 

2144 return parent_results[0] 

2145 else: 

2146 return queries.ChainedDatasetQueryResults(parent_results) 

2147 

2148 def queryDataIds( 

2149 self, 

2150 # TODO: Drop Dimension support on DM-41326. 

2151 dimensions: DimensionGroup | Iterable[Dimension | str] | Dimension | str, 

2152 *, 

2153 dataId: DataId | None = None, 

2154 datasets: Any = None, 

2155 collections: CollectionArgType | None = None, 

2156 where: str = "", 

2157 components: bool | None = None, 

2158 bind: Mapping[str, Any] | None = None, 

2159 check: bool = True, 

2160 **kwargs: Any, 

2161 ) -> queries.DataCoordinateQueryResults: 

2162 """Query for data IDs matching user-provided criteria. 

2163 

2164 Parameters 

2165 ---------- 

2166 dimensions : `DimensionGroup`, `Dimension`, or `str`, or \ 

2167 `~collections.abc.Iterable` [ `Dimension` or `str` ] 

2168 The dimensions of the data IDs to yield, as either `Dimension` 

2169 instances or `str`. Will be automatically expanded to a complete 

2170 `DimensionGroup`. Support for `Dimension` instances is deprecated 

2171 and will not be supported after v27. 

2172 dataId : `dict` or `DataCoordinate`, optional 

2173 A data ID whose key-value pairs are used as equality constraints 

2174 in the query. 

2175 datasets : dataset type expression, optional 

2176 An expression that fully or partially identifies dataset types 

2177 that should constrain the yielded data IDs. For example, including 

2178 "raw" here would constrain the yielded ``instrument``, 

2179 ``exposure``, ``detector``, and ``physical_filter`` values to only 

2180 those for which at least one "raw" dataset exists in 

2181 ``collections``. Allowed types include `DatasetType`, `str`, 

2182 and iterables thereof. Regular expression objects (i.e. 

2183 `re.Pattern`) are deprecated and will be removed after the v26 

2184 release. See :ref:`daf_butler_dataset_type_expressions` for more 

2185 information. 

2186 collections : collection expression, optional 

2187 An expression that identifies the collections to search for 

2188 datasets, such as a `str` (for full matches or partial matches 

2189 via globs), `re.Pattern` (for partial matches), or iterable 

2190 thereof. ``...`` can be used to search all collections (actually 

2191 just all `~CollectionType.RUN` collections, because this will 

2192 still find all datasets). If not provided, 

2193 ``self.default.collections`` is used. Ignored unless ``datasets`` 

2194 is also passed. See :ref:`daf_butler_collection_expressions` for 

2195 more information. 

2196 where : `str`, optional 

2197 A string expression similar to a SQL WHERE clause. May involve 

2198 any column of a dimension table or (as a shortcut for the primary 

2199 key column of a dimension table) dimension name. See 

2200 :ref:`daf_butler_dimension_expressions` for more information. 

2201 components : `bool`, optional 

2202 If `True`, apply all dataset expression patterns to component 

2203 dataset type names as well. If `False`, never apply patterns to 

2204 components. If `None`, apply patterns to components only 

2205 if their parent datasets were not matched by the expression. 

2206 Fully-specified component datasets (`str` or `DatasetType` 

2207 instances) are always included. 

2208 

2209 Values other than `False` are deprecated, and only `False` will be 

2210 supported after v26. After v27 this argument will be removed 

2211 entirely. 

2212 bind : `~collections.abc.Mapping`, optional 

2213 Mapping containing literal values that should be injected into the 

2214 ``where`` expression, keyed by the identifiers they replace. 

2215 Values of collection type can be expanded in some cases; see 

2216 :ref:`daf_butler_dimension_expressions_identifiers` for more 

2217 information. 

2218 check : `bool`, optional 

2219 If `True` (default) check the query for consistency before 

2220 executing it. This may reject some valid queries that resemble 

2221 common mistakes (e.g. queries for visits without specifying an 

2222 instrument). 

2223 **kwargs 

2224 Additional keyword arguments are forwarded to 

2225 `DataCoordinate.standardize` when processing the ``dataId`` 

2226 argument (and may be used to provide a constraining data ID even 

2227 when the ``dataId`` argument is `None`). 

2228 

2229 Returns 

2230 ------- 

2231 dataIds : `.queries.DataCoordinateQueryResults` 

2232 Data IDs matching the given query parameters. These are guaranteed 

2233 to identify all dimensions (`DataCoordinate.hasFull` returns 

2234 `True`), but will not contain `DimensionRecord` objects 

2235 (`DataCoordinate.hasRecords` returns `False`). Call 

2236 `~.queries.DataCoordinateQueryResults.expanded` on the 

2237 returned object to fetch those (and consider using 

2238 `~.queries.DataCoordinateQueryResults.materialize` on the 

2239 returned object first if the expected number of rows is very 

2240 large). See documentation for those methods for additional 

2241 information. 

2242 

2243 Raises 

2244 ------ 

2245 lsst.daf.butler.registry.NoDefaultCollectionError 

2246 Raised if ``collections`` is `None` and 

2247 ``self.defaults.collections`` is `None`. 

2248 lsst.daf.butler.registry.CollectionExpressionError 

2249 Raised when ``collections`` expression is invalid. 

2250 lsst.daf.butler.registry.DataIdError 

2251 Raised when ``dataId`` or keyword arguments specify unknown 

2252 dimensions or values, or when they contain inconsistent values. 

2253 lsst.daf.butler.registry.DatasetTypeExpressionError 

2254 Raised when ``datasetType`` expression is invalid. 

2255 lsst.daf.butler.registry.UserExpressionError 

2256 Raised when ``where`` expression is invalid. 

2257 """ 

2258 requested_dimensions = self.dimensions.conform(dimensions) 

2259 doomed_by: list[str] = [] 

2260 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs) 

2261 dataset_composition, collection_wildcard = self._standardize_query_dataset_args( 

2262 datasets, collections, components, doomed_by=doomed_by 

2263 ) 

2264 if collection_wildcard is not None and collection_wildcard.empty(): 

2265 doomed_by.append("No data coordinates can be found because collection list is empty.") 

2266 summary = queries.QuerySummary( 

2267 requested=requested_dimensions, 

2268 column_types=self._managers.column_types, 

2269 data_id=data_id, 

2270 expression=where, 

2271 bind=bind, 

2272 defaults=self.defaults.dataId, 

2273 check=check, 

2274 datasets=dataset_composition.keys(), 

2275 ) 

2276 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by) 

2277 for datasetType in dataset_composition: 

2278 builder.joinDataset(datasetType, collection_wildcard, isResult=False) 

2279 query = builder.finish() 

2280 

2281 return queries.DataCoordinateQueryResults(query) 

2282 

2283 def queryDimensionRecords( 

2284 self, 

2285 element: DimensionElement | str, 

2286 *, 

2287 dataId: DataId | None = None, 

2288 datasets: Any = None, 

2289 collections: CollectionArgType | None = None, 

2290 where: str = "", 

2291 components: bool | None = None, 

2292 bind: Mapping[str, Any] | None = None, 

2293 check: bool = True, 

2294 **kwargs: Any, 

2295 ) -> queries.DimensionRecordQueryResults: 

2296 """Query for dimension information matching user-provided criteria. 

2297 

2298 Parameters 

2299 ---------- 

2300 element : `DimensionElement` or `str` 

2301 The dimension element to obtain records for. 

2302 dataId : `dict` or `DataCoordinate`, optional 

2303 A data ID whose key-value pairs are used as equality constraints 

2304 in the query. 

2305 datasets : dataset type expression, optional 

2306 An expression that fully or partially identifies dataset types 

2307 that should constrain the yielded records. See `queryDataIds` and 

2308 :ref:`daf_butler_dataset_type_expressions` for more information. 

2309 collections : collection expression, optional 

2310 An expression that identifies the collections to search for 

2311 datasets, such as a `str` (for full matches or partial matches 

2312 via globs), `re.Pattern` (for partial matches), or iterable 

2313 thereof. ``...`` can be used to search all collections (actually 

2314 just all `~CollectionType.RUN` collections, because this will 

2315 still find all datasets). If not provided, 

2316 ``self.default.collections`` is used. Ignored unless ``datasets`` 

2317 is also passed. See :ref:`daf_butler_collection_expressions` for 

2318 more information. 

2319 where : `str`, optional 

2320 A string expression similar to a SQL WHERE clause. See 

2321 `queryDataIds` and :ref:`daf_butler_dimension_expressions` for more 

2322 information. 

2323 components : `bool`, optional 

2324 Whether to apply dataset expressions to components as well. 

2325 See `queryDataIds` for more information. 

2326 

2327 Values other than `False` are deprecated, and only `False` will be 

2328 supported after v26. After v27 this argument will be removed 

2329 entirely. 

2330 bind : `~collections.abc.Mapping`, optional 

2331 Mapping containing literal values that should be injected into the 

2332 ``where`` expression, keyed by the identifiers they replace. 

2333 Values of collection type can be expanded in some cases; see 

2334 :ref:`daf_butler_dimension_expressions_identifiers` for more 

2335 information. 

2336 check : `bool`, optional 

2337 If `True` (default) check the query for consistency before 

2338 executing it. This may reject some valid queries that resemble 

2339 common mistakes (e.g. queries for visits without specifying an 

2340 instrument). 

2341 **kwargs 

2342 Additional keyword arguments are forwarded to 

2343 `DataCoordinate.standardize` when processing the ``dataId`` 

2344 argument (and may be used to provide a constraining data ID even 

2345 when the ``dataId`` argument is `None`). 

2346 

2347 Returns 

2348 ------- 

2349 dataIds : `.queries.DimensionRecordQueryResults` 

2350 Data IDs matching the given query parameters. 

2351 

2352 Raises 

2353 ------ 

2354 lsst.daf.butler.registry.NoDefaultCollectionError 

2355 Raised if ``collections`` is `None` and 

2356 ``self.defaults.collections`` is `None`. 

2357 lsst.daf.butler.registry.CollectionExpressionError 

2358 Raised when ``collections`` expression is invalid. 

2359 lsst.daf.butler.registry.DataIdError 

2360 Raised when ``dataId`` or keyword arguments specify unknown 

2361 dimensions or values, or when they contain inconsistent values. 

2362 lsst.daf.butler.registry.DatasetTypeExpressionError 

2363 Raised when ``datasetType`` expression is invalid. 

2364 lsst.daf.butler.registry.UserExpressionError 

2365 Raised when ``where`` expression is invalid. 

2366 """ 

2367 if not isinstance(element, DimensionElement): 

2368 try: 

2369 element = self.dimensions[element] 

2370 except KeyError as e: 

2371 raise DimensionNameError( 

2372 f"No such dimension '{element}', available dimensions: " + str(self.dimensions.elements) 

2373 ) from e 

2374 doomed_by: list[str] = [] 

2375 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs) 

2376 dataset_composition, collection_wildcard = self._standardize_query_dataset_args( 

2377 datasets, collections, components, doomed_by=doomed_by 

2378 ) 

2379 if collection_wildcard is not None and collection_wildcard.empty(): 

2380 doomed_by.append("No dimension records can be found because collection list is empty.") 

2381 summary = queries.QuerySummary( 

2382 requested=element.minimal_group, 

2383 column_types=self._managers.column_types, 

2384 data_id=data_id, 

2385 expression=where, 

2386 bind=bind, 

2387 defaults=self.defaults.dataId, 

2388 check=check, 

2389 datasets=dataset_composition.keys(), 

2390 ) 

2391 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by) 

2392 for datasetType in dataset_composition: 

2393 builder.joinDataset(datasetType, collection_wildcard, isResult=False) 

2394 query = builder.finish().with_record_columns(element.name) 

2395 return queries.DatabaseDimensionRecordQueryResults(query, element) 

2396 

2397 def queryDatasetAssociations( 

2398 self, 

2399 datasetType: str | DatasetType, 

2400 collections: CollectionArgType | None = ..., 

2401 *, 

2402 collectionTypes: Iterable[CollectionType] = CollectionType.all(), 

2403 flattenChains: bool = False, 

2404 ) -> Iterator[DatasetAssociation]: 

2405 """Iterate over dataset-collection combinations where the dataset is in 

2406 the collection. 

2407 

2408 This method is a temporary placeholder for better support for 

2409 association results in `queryDatasets`. It will probably be 

2410 removed in the future, and should be avoided in production code 

2411 whenever possible. 

2412 

2413 Parameters 

2414 ---------- 

2415 datasetType : `DatasetType` or `str` 

2416 A dataset type object or the name of one. 

2417 collections : collection expression, optional 

2418 An expression that identifies the collections to search for 

2419 datasets, such as a `str` (for full matches or partial matches 

2420 via globs), `re.Pattern` (for partial matches), or iterable 

2421 thereof. ``...`` can be used to search all collections (actually 

2422 just all `~CollectionType.RUN` collections, because this will still 

2423 find all datasets). If not provided, ``self.default.collections`` 

2424 is used. See :ref:`daf_butler_collection_expressions` for more 

2425 information. 

2426 collectionTypes : `~collections.abc.Set` [ `CollectionType` ], optional 

2427 If provided, only yield associations from collections of these 

2428 types. 

2429 flattenChains : `bool`, optional 

2430 If `True`, search in the children of `~CollectionType.CHAINED` 

2431 collections. If `False`, ``CHAINED`` collections are ignored. 

2432 

2433 Yields 

2434 ------ 

2435 association : `.DatasetAssociation` 

2436 Object representing the relationship between a single dataset and 

2437 a single collection. 

2438 

2439 Raises 

2440 ------ 

2441 lsst.daf.butler.registry.NoDefaultCollectionError 

2442 Raised if ``collections`` is `None` and 

2443 ``self.defaults.collections`` is `None`. 

2444 lsst.daf.butler.registry.CollectionExpressionError 

2445 Raised when ``collections`` expression is invalid. 

2446 """ 

2447 if collections is None: 

2448 if not self.defaults.collections: 

2449 raise NoDefaultCollectionError( 

2450 "No collections provided to queryDatasetAssociations, " 

2451 "and no defaults from registry construction." 

2452 ) 

2453 collections = self.defaults.collections 

2454 collection_wildcard = CollectionWildcard.from_expression(collections) 

2455 backend = queries.SqlQueryBackend(self._db, self._managers) 

2456 parent_dataset_type, _ = backend.resolve_single_dataset_type_wildcard(datasetType, components=False) 

2457 timespan_tag = DatasetColumnTag(parent_dataset_type.name, "timespan") 

2458 collection_tag = DatasetColumnTag(parent_dataset_type.name, "collection") 

2459 for parent_collection_record in backend.resolve_collection_wildcard( 

2460 collection_wildcard, 

2461 collection_types=frozenset(collectionTypes), 

2462 flatten_chains=flattenChains, 

2463 ): 

2464 # Resolve this possibly-chained collection into a list of 

2465 # non-CHAINED collections that actually hold datasets of this 

2466 # type. 

2467 candidate_collection_records = backend.resolve_dataset_collections( 

2468 parent_dataset_type, 

2469 CollectionWildcard.from_names([parent_collection_record.name]), 

2470 allow_calibration_collections=True, 

2471 governor_constraints={}, 

2472 ) 

2473 if not candidate_collection_records: 

2474 continue 

2475 with backend.context() as context: 

2476 relation = backend.make_dataset_query_relation( 

2477 parent_dataset_type, 

2478 candidate_collection_records, 

2479 columns={"dataset_id", "run", "timespan", "collection"}, 

2480 context=context, 

2481 ) 

2482 reader = queries.DatasetRefReader( 

2483 parent_dataset_type, 

2484 translate_collection=lambda k: self._managers.collections[k].name, 

2485 full=False, 

2486 ) 

2487 for row in context.fetch_iterable(relation): 

2488 ref = reader.read(row) 

2489 collection_record = self._managers.collections[row[collection_tag]] 

2490 if collection_record.type is CollectionType.CALIBRATION: 

2491 timespan = row[timespan_tag] 

2492 else: 

2493 # For backwards compatibility and (possibly?) user 

2494 # convenience we continue to define the timespan of a 

2495 # DatasetAssociation row for a non-CALIBRATION 

2496 # collection to be None rather than a fully unbounded 

2497 # timespan. 

2498 timespan = None 

2499 yield DatasetAssociation(ref=ref, collection=collection_record.name, timespan=timespan) 

2500 

2501 def get_datastore_records(self, ref: DatasetRef) -> DatasetRef: 

2502 """Retrieve datastore records for given ref. 

2503 

2504 Parameters 

2505 ---------- 

2506 ref : `DatasetRef` 

2507 Dataset reference for which to retrieve its corresponding datastore 

2508 records. 

2509 

2510 Returns 

2511 ------- 

2512 updated_ref : `DatasetRef` 

2513 Dataset reference with filled datastore records. 

2514 

2515 Notes 

2516 ----- 

2517 If this method is called with the dataset ref that is not known to the 

2518 registry then the reference with an empty set of records is returned. 

2519 """ 

2520 datastore_records: dict[str, list[StoredDatastoreItemInfo]] = {} 

2521 for opaque, record_class in self._datastore_record_classes.items(): 

2522 records = self.fetchOpaqueData(opaque, dataset_id=ref.id) 

2523 datastore_records[opaque] = [record_class.from_record(record) for record in records] 

2524 return ref.replace(datastore_records=datastore_records) 

2525 

2526 def store_datastore_records(self, refs: Mapping[str, DatasetRef]) -> None: 

2527 """Store datastore records for given refs. 

2528 

2529 Parameters 

2530 ---------- 

2531 refs : `~collections.abc.Mapping` [`str`, `DatasetRef`] 

2532 Mapping of a datastore name to dataset reference stored in that 

2533 datastore, reference must include datastore records. 

2534 """ 

2535 for datastore_name, ref in refs.items(): 

2536 # Store ref IDs in the bridge table. 

2537 bridge = self._managers.datastores.register(datastore_name) 

2538 bridge.insert([ref]) 

2539 

2540 # store records in opaque tables 

2541 assert ref._datastore_records is not None, "Dataset ref must have datastore records" 

2542 for table_name, records in ref._datastore_records.items(): 

2543 opaque_table = self._managers.opaque.get(table_name) 

2544 assert opaque_table is not None, f"Unexpected opaque table name {table_name}" 

2545 opaque_table.insert(*(record.to_record(dataset_id=ref.id) for record in records)) 

2546 

2547 def make_datastore_tables(self, tables: Mapping[str, DatastoreOpaqueTable]) -> None: 

2548 """Create opaque tables used by datastores. 

2549 

2550 Parameters 

2551 ---------- 

2552 tables : `~collections.abc.Mapping` 

2553 Maps opaque table name to its definition. 

2554 

2555 Notes 

2556 ----- 

2557 This method should disappear in the future when opaque table 

2558 definitions will be provided during `Registry` construction. 

2559 """ 

2560 datastore_record_classes = {} 

2561 for table_name, table_def in tables.items(): 

2562 datastore_record_classes[table_name] = table_def.record_class 

2563 try: 

2564 self._managers.opaque.register(table_name, table_def.table_spec) 

2565 except ReadOnlyDatabaseError: 

2566 # If the database is read only and we just tried and failed to 

2567 # create a table, it means someone is trying to create a 

2568 # read-only butler client for an empty repo. That should be 

2569 # okay, as long as they then try to get any datasets before 

2570 # some other client creates the table. Chances are they're 

2571 # just validating configuration. 

2572 pass 

2573 self._datastore_record_classes = datastore_record_classes 

2574 

2575 @property 

2576 def obsCoreTableManager(self) -> ObsCoreTableManager | None: 

2577 """The ObsCore manager instance for this registry 

2578 (`~.interfaces.ObsCoreTableManager` 

2579 or `None`). 

2580 

2581 ObsCore manager may not be implemented for all registry backend, or 

2582 may not be enabled for many repositories. 

2583 """ 

2584 return self._managers.obscore 

2585 

2586 storageClasses: StorageClassFactory 

2587 """All storage classes known to the registry (`StorageClassFactory`). 

2588 """ 

2589 

2590 _defaults: RegistryDefaults 

2591 """Default collections used for registry queries (`RegistryDefaults`)."""