Coverage for python/lsst/daf/butler/_butler.py: 10%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

634 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22""" 

23Butler top level classes. 

24""" 

25from __future__ import annotations 

26 

27__all__ = ( 

28 "Butler", 

29 "ButlerValidationError", 

30 "PruneCollectionsArgsError", 

31 "PurgeWithoutUnstorePruneCollectionsError", 

32 "RunWithoutPurgePruneCollectionsError", 

33 "PurgeUnsupportedPruneCollectionsError", 

34) 

35 

36import collections.abc 

37import contextlib 

38import logging 

39import numbers 

40import os 

41from collections import defaultdict 

42from typing import ( 

43 Any, 

44 ClassVar, 

45 Counter, 

46 Dict, 

47 Iterable, 

48 Iterator, 

49 List, 

50 MutableMapping, 

51 Optional, 

52 Set, 

53 TextIO, 

54 Tuple, 

55 Type, 

56 Union, 

57) 

58 

59from lsst.resources import ResourcePath, ResourcePathExpression 

60from lsst.utils import doImportType 

61from lsst.utils.introspection import get_class_of 

62from lsst.utils.logging import VERBOSE, getLogger 

63 

64from ._butlerConfig import ButlerConfig 

65from ._butlerRepoIndex import ButlerRepoIndex 

66from ._deferredDatasetHandle import DeferredDatasetHandle 

67from ._limited_butler import LimitedButler 

68from .core import ( 

69 AmbiguousDatasetError, 

70 Config, 

71 ConfigSubset, 

72 DataCoordinate, 

73 DataId, 

74 DataIdValue, 

75 DatasetRef, 

76 DatasetType, 

77 Datastore, 

78 Dimension, 

79 DimensionConfig, 

80 DimensionUniverse, 

81 FileDataset, 

82 Progress, 

83 StorageClassFactory, 

84 Timespan, 

85 ValidationError, 

86) 

87from .core.repoRelocation import BUTLER_ROOT_TAG 

88from .core.utils import transactional 

89from .registry import ( 

90 CollectionSearch, 

91 CollectionType, 

92 ConflictingDefinitionError, 

93 DataIdError, 

94 DataIdValueError, 

95 DatasetIdGenEnum, 

96 DimensionNameError, 

97 InconsistentDataIdError, 

98 Registry, 

99 RegistryConfig, 

100 RegistryDefaults, 

101) 

102from .transfers import RepoExportContext 

103 

104log = getLogger(__name__) 

105 

106 

107class ButlerValidationError(ValidationError): 

108 """There is a problem with the Butler configuration.""" 

109 

110 pass 

111 

112 

113class PruneCollectionsArgsError(TypeError): 

114 """Base class for errors relating to Butler.pruneCollections input 

115 arguments. 

116 """ 

117 

118 pass 

119 

120 

121class PurgeWithoutUnstorePruneCollectionsError(PruneCollectionsArgsError): 

122 """Raised when purge and unstore are both required to be True, and 

123 purge is True but unstore is False. 

124 """ 

125 

126 def __init__(self) -> None: 

127 super().__init__("Cannot pass purge=True without unstore=True.") 

128 

129 

130class RunWithoutPurgePruneCollectionsError(PruneCollectionsArgsError): 

131 """Raised when pruning a RUN collection but purge is False.""" 

132 

133 def __init__(self, collectionType: CollectionType): 

134 self.collectionType = collectionType 

135 super().__init__(f"Cannot prune RUN collection {self.collectionType.name} without purge=True.") 

136 

137 

138class PurgeUnsupportedPruneCollectionsError(PruneCollectionsArgsError): 

139 """Raised when purge is True but is not supported for the given 

140 collection.""" 

141 

142 def __init__(self, collectionType: CollectionType): 

143 self.collectionType = collectionType 

144 super().__init__( 

145 f"Cannot prune {self.collectionType} collection {self.collectionType.name} with purge=True." 

146 ) 

147 

148 

149class Butler(LimitedButler): 

150 """Main entry point for the data access system. 

151 

152 Parameters 

153 ---------- 

154 config : `ButlerConfig`, `Config` or `str`, optional. 

155 Configuration. Anything acceptable to the 

156 `ButlerConfig` constructor. If a directory path 

157 is given the configuration will be read from a ``butler.yaml`` file in 

158 that location. If `None` is given default values will be used. 

159 butler : `Butler`, optional. 

160 If provided, construct a new Butler that uses the same registry and 

161 datastore as the given one, but with the given collection and run. 

162 Incompatible with the ``config``, ``searchPaths``, and ``writeable`` 

163 arguments. 

164 collections : `str` or `Iterable` [ `str` ], optional 

165 An expression specifying the collections to be searched (in order) when 

166 reading datasets. 

167 This may be a `str` collection name or an iterable thereof. 

168 See :ref:`daf_butler_collection_expressions` for more information. 

169 These collections are not registered automatically and must be 

170 manually registered before they are used by any method, but they may be 

171 manually registered after the `Butler` is initialized. 

172 run : `str`, optional 

173 Name of the `~CollectionType.RUN` collection new datasets should be 

174 inserted into. If ``collections`` is `None` and ``run`` is not `None`, 

175 ``collections`` will be set to ``[run]``. If not `None`, this 

176 collection will automatically be registered. If this is not set (and 

177 ``writeable`` is not set either), a read-only butler will be created. 

178 searchPaths : `list` of `str`, optional 

179 Directory paths to search when calculating the full Butler 

180 configuration. Not used if the supplied config is already a 

181 `ButlerConfig`. 

182 writeable : `bool`, optional 

183 Explicitly sets whether the butler supports write operations. If not 

184 provided, a read-write butler is created if any of ``run``, ``tags``, 

185 or ``chains`` is non-empty. 

186 inferDefaults : `bool`, optional 

187 If `True` (default) infer default data ID values from the values 

188 present in the datasets in ``collections``: if all collections have the 

189 same value (or no value) for a governor dimension, that value will be 

190 the default for that dimension. Nonexistent collections are ignored. 

191 If a default value is provided explicitly for a governor dimension via 

192 ``**kwargs``, no default will be inferred for that dimension. 

193 **kwargs : `str` 

194 Default data ID key-value pairs. These may only identify "governor" 

195 dimensions like ``instrument`` and ``skymap``. 

196 

197 Examples 

198 -------- 

199 While there are many ways to control exactly how a `Butler` interacts with 

200 the collections in its `Registry`, the most common cases are still simple. 

201 

202 For a read-only `Butler` that searches one collection, do:: 

203 

204 butler = Butler("/path/to/repo", collections=["u/alice/DM-50000"]) 

205 

206 For a read-write `Butler` that writes to and reads from a 

207 `~CollectionType.RUN` collection:: 

208 

209 butler = Butler("/path/to/repo", run="u/alice/DM-50000/a") 

210 

211 The `Butler` passed to a ``PipelineTask`` is often much more complex, 

212 because we want to write to one `~CollectionType.RUN` collection but read 

213 from several others (as well):: 

214 

215 butler = Butler("/path/to/repo", run="u/alice/DM-50000/a", 

216 collections=["u/alice/DM-50000/a", 

217 "u/bob/DM-49998", 

218 "HSC/defaults"]) 

219 

220 This butler will `put` new datasets to the run ``u/alice/DM-50000/a``. 

221 Datasets will be read first from that run (since it appears first in the 

222 chain), and then from ``u/bob/DM-49998`` and finally ``HSC/defaults``. 

223 

224 Finally, one can always create a `Butler` with no collections:: 

225 

226 butler = Butler("/path/to/repo", writeable=True) 

227 

228 This can be extremely useful when you just want to use ``butler.registry``, 

229 e.g. for inserting dimension data or managing collections, or when the 

230 collections you want to use with the butler are not consistent. 

231 Passing ``writeable`` explicitly here is only necessary if you want to be 

232 able to make changes to the repo - usually the value for ``writeable`` can 

233 be guessed from the collection arguments provided, but it defaults to 

234 `False` when there are not collection arguments. 

235 """ 

236 

237 def __init__( 

238 self, 

239 config: Union[Config, str, None] = None, 

240 *, 

241 butler: Optional[Butler] = None, 

242 collections: Any = None, 

243 run: Optional[str] = None, 

244 searchPaths: Optional[List[str]] = None, 

245 writeable: Optional[bool] = None, 

246 inferDefaults: bool = True, 

247 **kwargs: str, 

248 ): 

249 defaults = RegistryDefaults(collections=collections, run=run, infer=inferDefaults, **kwargs) 

250 # Load registry, datastore, etc. from config or existing butler. 

251 if butler is not None: 

252 if config is not None or searchPaths is not None or writeable is not None: 

253 raise TypeError( 

254 "Cannot pass 'config', 'searchPaths', or 'writeable' arguments with 'butler' argument." 

255 ) 

256 self.registry = butler.registry.copy(defaults) 

257 self.datastore = butler.datastore 

258 self.storageClasses = butler.storageClasses 

259 self._config: ButlerConfig = butler._config 

260 self._allow_put_of_predefined_dataset = butler._allow_put_of_predefined_dataset 

261 else: 

262 self._config = ButlerConfig(config, searchPaths=searchPaths) 

263 try: 

264 if "root" in self._config: 

265 butlerRoot = self._config["root"] 

266 else: 

267 butlerRoot = self._config.configDir 

268 if writeable is None: 

269 writeable = run is not None 

270 self.registry = Registry.fromConfig( 

271 self._config, butlerRoot=butlerRoot, writeable=writeable, defaults=defaults 

272 ) 

273 self.datastore = Datastore.fromConfig( 

274 self._config, self.registry.getDatastoreBridgeManager(), butlerRoot=butlerRoot 

275 ) 

276 self.storageClasses = StorageClassFactory() 

277 self.storageClasses.addFromConfig(self._config) 

278 self._allow_put_of_predefined_dataset = self._config.get( 

279 "allow_put_of_predefined_dataset", False 

280 ) 

281 except Exception: 

282 # Failures here usually mean that configuration is incomplete, 

283 # just issue an error message which includes config file URI. 

284 log.error(f"Failed to instantiate Butler from config {self._config.configFile}.") 

285 raise 

286 

287 if "run" in self._config or "collection" in self._config: 

288 raise ValueError("Passing a run or collection via configuration is no longer supported.") 

289 

290 GENERATION: ClassVar[int] = 3 

291 """This is a Generation 3 Butler. 

292 

293 This attribute may be removed in the future, once the Generation 2 Butler 

294 interface has been fully retired; it should only be used in transitional 

295 code. 

296 """ 

297 

298 @classmethod 

299 def get_repo_uri(cls, label: str) -> ResourcePath: 

300 """Look up the label in a butler repository index. 

301 

302 Parameters 

303 ---------- 

304 label : `str` 

305 Label of the Butler repository to look up. 

306 

307 Returns 

308 ------- 

309 uri : `lsst.resources.ResourcePath` 

310 URI to the Butler repository associated with the given label. 

311 

312 Raises 

313 ------ 

314 KeyError 

315 Raised if the label is not found in the index, or if an index 

316 can not be found at all. 

317 

318 Notes 

319 ----- 

320 See `~lsst.daf.butler.ButlerRepoIndex` for details on how the 

321 information is discovered. 

322 """ 

323 return ButlerRepoIndex.get_repo_uri(label) 

324 

325 @classmethod 

326 def get_known_repos(cls) -> Set[str]: 

327 """Retrieve the list of known repository labels. 

328 

329 Returns 

330 ------- 

331 repos : `set` of `str` 

332 All the known labels. Can be empty if no index can be found. 

333 

334 Notes 

335 ----- 

336 See `~lsst.daf.butler.ButlerRepoIndex` for details on how the 

337 information is discovered. 

338 """ 

339 return ButlerRepoIndex.get_known_repos() 

340 

341 @staticmethod 

342 def makeRepo( 

343 root: ResourcePathExpression, 

344 config: Union[Config, str, None] = None, 

345 dimensionConfig: Union[Config, str, None] = None, 

346 standalone: bool = False, 

347 searchPaths: Optional[List[str]] = None, 

348 forceConfigRoot: bool = True, 

349 outfile: Optional[ResourcePathExpression] = None, 

350 overwrite: bool = False, 

351 ) -> Config: 

352 """Create an empty data repository by adding a butler.yaml config 

353 to a repository root directory. 

354 

355 Parameters 

356 ---------- 

357 root : `lsst.resources.ResourcePathExpression` 

358 Path or URI to the root location of the new repository. Will be 

359 created if it does not exist. 

360 config : `Config` or `str`, optional 

361 Configuration to write to the repository, after setting any 

362 root-dependent Registry or Datastore config options. Can not 

363 be a `ButlerConfig` or a `ConfigSubset`. If `None`, default 

364 configuration will be used. Root-dependent config options 

365 specified in this config are overwritten if ``forceConfigRoot`` 

366 is `True`. 

367 dimensionConfig : `Config` or `str`, optional 

368 Configuration for dimensions, will be used to initialize registry 

369 database. 

370 standalone : `bool` 

371 If True, write all expanded defaults, not just customized or 

372 repository-specific settings. 

373 This (mostly) decouples the repository from the default 

374 configuration, insulating it from changes to the defaults (which 

375 may be good or bad, depending on the nature of the changes). 

376 Future *additions* to the defaults will still be picked up when 

377 initializing `Butlers` to repos created with ``standalone=True``. 

378 searchPaths : `list` of `str`, optional 

379 Directory paths to search when calculating the full butler 

380 configuration. 

381 forceConfigRoot : `bool`, optional 

382 If `False`, any values present in the supplied ``config`` that 

383 would normally be reset are not overridden and will appear 

384 directly in the output config. This allows non-standard overrides 

385 of the root directory for a datastore or registry to be given. 

386 If this parameter is `True` the values for ``root`` will be 

387 forced into the resulting config if appropriate. 

388 outfile : `lss.resources.ResourcePathExpression`, optional 

389 If not-`None`, the output configuration will be written to this 

390 location rather than into the repository itself. Can be a URI 

391 string. Can refer to a directory that will be used to write 

392 ``butler.yaml``. 

393 overwrite : `bool`, optional 

394 Create a new configuration file even if one already exists 

395 in the specified output location. Default is to raise 

396 an exception. 

397 

398 Returns 

399 ------- 

400 config : `Config` 

401 The updated `Config` instance written to the repo. 

402 

403 Raises 

404 ------ 

405 ValueError 

406 Raised if a ButlerConfig or ConfigSubset is passed instead of a 

407 regular Config (as these subclasses would make it impossible to 

408 support ``standalone=False``). 

409 FileExistsError 

410 Raised if the output config file already exists. 

411 os.error 

412 Raised if the directory does not exist, exists but is not a 

413 directory, or cannot be created. 

414 

415 Notes 

416 ----- 

417 Note that when ``standalone=False`` (the default), the configuration 

418 search path (see `ConfigSubset.defaultSearchPaths`) that was used to 

419 construct the repository should also be used to construct any Butlers 

420 to avoid configuration inconsistencies. 

421 """ 

422 if isinstance(config, (ButlerConfig, ConfigSubset)): 

423 raise ValueError("makeRepo must be passed a regular Config without defaults applied.") 

424 

425 # Ensure that the root of the repository exists or can be made 

426 root_uri = ResourcePath(root, forceDirectory=True) 

427 root_uri.mkdir() 

428 

429 config = Config(config) 

430 

431 # If we are creating a new repo from scratch with relative roots, 

432 # do not propagate an explicit root from the config file 

433 if "root" in config: 

434 del config["root"] 

435 

436 full = ButlerConfig(config, searchPaths=searchPaths) # this applies defaults 

437 imported_class = doImportType(full["datastore", "cls"]) 

438 if not issubclass(imported_class, Datastore): 

439 raise TypeError(f"Imported datastore class {full['datastore', 'cls']} is not a Datastore") 

440 datastoreClass: Type[Datastore] = imported_class 

441 datastoreClass.setConfigRoot(BUTLER_ROOT_TAG, config, full, overwrite=forceConfigRoot) 

442 

443 # if key exists in given config, parse it, otherwise parse the defaults 

444 # in the expanded config 

445 if config.get(("registry", "db")): 

446 registryConfig = RegistryConfig(config) 

447 else: 

448 registryConfig = RegistryConfig(full) 

449 defaultDatabaseUri = registryConfig.makeDefaultDatabaseUri(BUTLER_ROOT_TAG) 

450 if defaultDatabaseUri is not None: 

451 Config.updateParameters( 

452 RegistryConfig, config, full, toUpdate={"db": defaultDatabaseUri}, overwrite=forceConfigRoot 

453 ) 

454 else: 

455 Config.updateParameters(RegistryConfig, config, full, toCopy=("db",), overwrite=forceConfigRoot) 

456 

457 if standalone: 

458 config.merge(full) 

459 else: 

460 # Always expand the registry.managers section into the per-repo 

461 # config, because after the database schema is created, it's not 

462 # allowed to change anymore. Note that in the standalone=True 

463 # branch, _everything_ in the config is expanded, so there's no 

464 # need to special case this. 

465 Config.updateParameters(RegistryConfig, config, full, toMerge=("managers",), overwrite=False) 

466 configURI: ResourcePathExpression 

467 if outfile is not None: 

468 # When writing to a separate location we must include 

469 # the root of the butler repo in the config else it won't know 

470 # where to look. 

471 config["root"] = root_uri.geturl() 

472 configURI = outfile 

473 else: 

474 configURI = root_uri 

475 config.dumpToUri(configURI, overwrite=overwrite) 

476 

477 # Create Registry and populate tables 

478 registryConfig = RegistryConfig(config.get("registry")) 

479 dimensionConfig = DimensionConfig(dimensionConfig) 

480 Registry.createFromConfig(registryConfig, dimensionConfig=dimensionConfig, butlerRoot=root_uri) 

481 

482 log.verbose("Wrote new Butler configuration file to %s", configURI) 

483 

484 return config 

485 

486 @classmethod 

487 def _unpickle( 

488 cls, 

489 config: ButlerConfig, 

490 collections: Optional[CollectionSearch], 

491 run: Optional[str], 

492 defaultDataId: Dict[str, str], 

493 writeable: bool, 

494 ) -> Butler: 

495 """Callable used to unpickle a Butler. 

496 

497 We prefer not to use ``Butler.__init__`` directly so we can force some 

498 of its many arguments to be keyword-only (note that ``__reduce__`` 

499 can only invoke callables with positional arguments). 

500 

501 Parameters 

502 ---------- 

503 config : `ButlerConfig` 

504 Butler configuration, already coerced into a true `ButlerConfig` 

505 instance (and hence after any search paths for overrides have been 

506 utilized). 

507 collections : `CollectionSearch` 

508 Names of the default collections to read from. 

509 run : `str`, optional 

510 Name of the default `~CollectionType.RUN` collection to write to. 

511 defaultDataId : `dict` [ `str`, `str` ] 

512 Default data ID values. 

513 writeable : `bool` 

514 Whether the Butler should support write operations. 

515 

516 Returns 

517 ------- 

518 butler : `Butler` 

519 A new `Butler` instance. 

520 """ 

521 # MyPy doesn't recognize that the kwargs below are totally valid; it 

522 # seems to think '**defaultDataId* is a _positional_ argument! 

523 return cls( 

524 config=config, 

525 collections=collections, 

526 run=run, 

527 writeable=writeable, 

528 **defaultDataId, # type: ignore 

529 ) 

530 

531 def __reduce__(self) -> tuple: 

532 """Support pickling.""" 

533 return ( 

534 Butler._unpickle, 

535 ( 

536 self._config, 

537 self.collections, 

538 self.run, 

539 self.registry.defaults.dataId.byName(), 

540 self.registry.isWriteable(), 

541 ), 

542 ) 

543 

544 def __str__(self) -> str: 

545 return "Butler(collections={}, run={}, datastore='{}', registry='{}')".format( 

546 self.collections, self.run, self.datastore, self.registry 

547 ) 

548 

549 def isWriteable(self) -> bool: 

550 """Return `True` if this `Butler` supports write operations.""" 

551 return self.registry.isWriteable() 

552 

553 @contextlib.contextmanager 

554 def transaction(self) -> Iterator[None]: 

555 """Context manager supporting `Butler` transactions. 

556 

557 Transactions can be nested. 

558 """ 

559 with self.registry.transaction(): 

560 with self.datastore.transaction(): 

561 yield 

562 

563 def _standardizeArgs( 

564 self, 

565 datasetRefOrType: Union[DatasetRef, DatasetType, str], 

566 dataId: Optional[DataId] = None, 

567 for_put: bool = True, 

568 **kwargs: Any, 

569 ) -> Tuple[DatasetType, Optional[DataId]]: 

570 """Standardize the arguments passed to several Butler APIs. 

571 

572 Parameters 

573 ---------- 

574 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

575 When `DatasetRef` the `dataId` should be `None`. 

576 Otherwise the `DatasetType` or name thereof. 

577 dataId : `dict` or `DataCoordinate` 

578 A `dict` of `Dimension` link name, value pairs that label the 

579 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

580 should be provided as the second argument. 

581 for_put : `bool`, optional 

582 If `True` this call is invoked as part of a `Butler.put()`. 

583 Otherwise it is assumed to be part of a `Butler.get()`. This 

584 parameter is only relevant if there is dataset type 

585 inconsistency. 

586 **kwargs 

587 Additional keyword arguments used to augment or construct a 

588 `DataCoordinate`. See `DataCoordinate.standardize` 

589 parameters. 

590 

591 Returns 

592 ------- 

593 datasetType : `DatasetType` 

594 A `DatasetType` instance extracted from ``datasetRefOrType``. 

595 dataId : `dict` or `DataId`, optional 

596 Argument that can be used (along with ``kwargs``) to construct a 

597 `DataId`. 

598 

599 Notes 

600 ----- 

601 Butler APIs that conceptually need a DatasetRef also allow passing a 

602 `DatasetType` (or the name of one) and a `DataId` (or a dict and 

603 keyword arguments that can be used to construct one) separately. This 

604 method accepts those arguments and always returns a true `DatasetType` 

605 and a `DataId` or `dict`. 

606 

607 Standardization of `dict` vs `DataId` is best handled by passing the 

608 returned ``dataId`` (and ``kwargs``) to `Registry` APIs, which are 

609 generally similarly flexible. 

610 """ 

611 externalDatasetType: Optional[DatasetType] = None 

612 internalDatasetType: Optional[DatasetType] = None 

613 if isinstance(datasetRefOrType, DatasetRef): 

614 if dataId is not None or kwargs: 

615 raise ValueError("DatasetRef given, cannot use dataId as well") 

616 externalDatasetType = datasetRefOrType.datasetType 

617 dataId = datasetRefOrType.dataId 

618 else: 

619 # Don't check whether DataId is provided, because Registry APIs 

620 # can usually construct a better error message when it wasn't. 

621 if isinstance(datasetRefOrType, DatasetType): 

622 externalDatasetType = datasetRefOrType 

623 else: 

624 internalDatasetType = self.registry.getDatasetType(datasetRefOrType) 

625 

626 # Check that they are self-consistent 

627 if externalDatasetType is not None: 

628 internalDatasetType = self.registry.getDatasetType(externalDatasetType.name) 

629 if externalDatasetType != internalDatasetType: 

630 # We can allow differences if they are compatible, depending 

631 # on whether this is a get or a put. A get requires that 

632 # the python type associated with the datastore can be 

633 # converted to the user type. A put requires that the user 

634 # supplied python type can be converted to the internal 

635 # type expected by registry. 

636 relevantDatasetType = internalDatasetType 

637 if for_put: 

638 is_compatible = internalDatasetType.is_compatible_with(externalDatasetType) 

639 else: 

640 is_compatible = externalDatasetType.is_compatible_with(internalDatasetType) 

641 relevantDatasetType = externalDatasetType 

642 if not is_compatible: 

643 raise ValueError( 

644 f"Supplied dataset type ({externalDatasetType}) inconsistent with " 

645 f"registry definition ({internalDatasetType})" 

646 ) 

647 # Override the internal definition. 

648 internalDatasetType = relevantDatasetType 

649 

650 assert internalDatasetType is not None 

651 return internalDatasetType, dataId 

652 

653 def _rewrite_data_id( 

654 self, dataId: Optional[DataId], datasetType: DatasetType, **kwargs: Any 

655 ) -> Tuple[Optional[DataId], Dict[str, Any]]: 

656 """Rewrite a data ID taking into account dimension records. 

657 

658 Take a Data ID and keyword args and rewrite it if necessary to 

659 allow the user to specify dimension records rather than dimension 

660 primary values. 

661 

662 This allows a user to include a dataId dict with keys of 

663 ``exposure.day_obs`` and ``exposure.seq_num`` instead of giving 

664 the integer exposure ID. It also allows a string to be given 

665 for a dimension value rather than the integer ID if that is more 

666 convenient. For example, rather than having to specifyin the 

667 detector with ``detector.full_name``, a string given for ``detector`` 

668 will be interpreted as the full name and converted to the integer 

669 value. 

670 

671 Keyword arguments can also use strings for dimensions like detector 

672 and exposure but python does not allow them to include ``.`` and 

673 so the ``exposure.day_obs`` syntax can not be used in a keyword 

674 argument. 

675 

676 Parameters 

677 ---------- 

678 dataId : `dict` or `DataCoordinate` 

679 A `dict` of `Dimension` link name, value pairs that will label the 

680 `DatasetRef` within a Collection. 

681 datasetType : `DatasetType` 

682 The dataset type associated with this dataId. Required to 

683 determine the relevant dimensions. 

684 **kwargs 

685 Additional keyword arguments used to augment or construct a 

686 `DataId`. See `DataId` parameters. 

687 

688 Returns 

689 ------- 

690 dataId : `dict` or `DataCoordinate` 

691 The, possibly rewritten, dataId. If given a `DataCoordinate` and 

692 no keyword arguments, the original dataId will be returned 

693 unchanged. 

694 **kwargs : `dict` 

695 Any unused keyword arguments (would normally be empty dict). 

696 """ 

697 # Do nothing if we have a standalone DataCoordinate. 

698 if isinstance(dataId, DataCoordinate) and not kwargs: 

699 return dataId, kwargs 

700 

701 # Process dimension records that are using record information 

702 # rather than ids 

703 newDataId: Dict[str, DataIdValue] = {} 

704 byRecord: Dict[str, Dict[str, Any]] = defaultdict(dict) 

705 

706 # if all the dataId comes from keyword parameters we do not need 

707 # to do anything here because they can't be of the form 

708 # exposure.obs_id because a "." is not allowed in a keyword parameter. 

709 if dataId: 

710 for k, v in dataId.items(): 

711 # If we have a Dimension we do not need to do anything 

712 # because it cannot be a compound key. 

713 if isinstance(k, str) and "." in k: 

714 # Someone is using a more human-readable dataId 

715 dimensionName, record = k.split(".", 1) 

716 byRecord[dimensionName][record] = v 

717 elif isinstance(k, Dimension): 

718 newDataId[k.name] = v 

719 else: 

720 newDataId[k] = v 

721 

722 # Go through the updated dataId and check the type in case someone is 

723 # using an alternate key. We have already filtered out the compound 

724 # keys dimensions.record format. 

725 not_dimensions = {} 

726 

727 # Will need to look in the dataId and the keyword arguments 

728 # and will remove them if they need to be fixed or are unrecognized. 

729 for dataIdDict in (newDataId, kwargs): 

730 # Use a list so we can adjust the dict safely in the loop 

731 for dimensionName in list(dataIdDict): 

732 value = dataIdDict[dimensionName] 

733 try: 

734 dimension = self.registry.dimensions.getStaticDimensions()[dimensionName] 

735 except KeyError: 

736 # This is not a real dimension 

737 not_dimensions[dimensionName] = value 

738 del dataIdDict[dimensionName] 

739 continue 

740 

741 # Convert an integral type to an explicit int to simplify 

742 # comparisons here 

743 if isinstance(value, numbers.Integral): 

744 value = int(value) 

745 

746 if not isinstance(value, dimension.primaryKey.getPythonType()): 

747 for alternate in dimension.alternateKeys: 

748 if isinstance(value, alternate.getPythonType()): 

749 byRecord[dimensionName][alternate.name] = value 

750 del dataIdDict[dimensionName] 

751 log.debug( 

752 "Converting dimension %s to %s.%s=%s", 

753 dimensionName, 

754 dimensionName, 

755 alternate.name, 

756 value, 

757 ) 

758 break 

759 else: 

760 log.warning( 

761 "Type mismatch found for value '%r' provided for dimension %s. " 

762 "Could not find matching alternative (primary key has type %s) " 

763 "so attempting to use as-is.", 

764 value, 

765 dimensionName, 

766 dimension.primaryKey.getPythonType(), 

767 ) 

768 

769 # By this point kwargs and newDataId should only include valid 

770 # dimensions. Merge kwargs in to the new dataId and log if there 

771 # are dimensions in both (rather than calling update). 

772 for k, v in kwargs.items(): 

773 if k in newDataId and newDataId[k] != v: 

774 log.debug( 

775 "Keyword arg %s overriding explicit value in dataId of %s with %s", k, newDataId[k], v 

776 ) 

777 newDataId[k] = v 

778 # No need to retain any values in kwargs now. 

779 kwargs = {} 

780 

781 # If we have some unrecognized dimensions we have to try to connect 

782 # them to records in other dimensions. This is made more complicated 

783 # by some dimensions having records with clashing names. A mitigation 

784 # is that we can tell by this point which dimensions are missing 

785 # for the DatasetType but this does not work for calibrations 

786 # where additional dimensions can be used to constrain the temporal 

787 # axis. 

788 if not_dimensions: 

789 # Search for all dimensions even if we have been given a value 

790 # explicitly. In some cases records are given as well as the 

791 # actually dimension and this should not be an error if they 

792 # match. 

793 mandatoryDimensions = datasetType.dimensions.names # - provided 

794 

795 candidateDimensions: Set[str] = set() 

796 candidateDimensions.update(mandatoryDimensions) 

797 

798 # For calibrations we may well be needing temporal dimensions 

799 # so rather than always including all dimensions in the scan 

800 # restrict things a little. It is still possible for there 

801 # to be confusion over day_obs in visit vs exposure for example. 

802 # If we are not searching calibration collections things may 

803 # fail but they are going to fail anyway because of the 

804 # ambiguousness of the dataId... 

805 if datasetType.isCalibration(): 

806 for dim in self.registry.dimensions.getStaticDimensions(): 

807 if dim.temporal: 

808 candidateDimensions.add(str(dim)) 

809 

810 # Look up table for the first association with a dimension 

811 guessedAssociation: Dict[str, Dict[str, Any]] = defaultdict(dict) 

812 

813 # Keep track of whether an item is associated with multiple 

814 # dimensions. 

815 counter: Counter[str] = Counter() 

816 assigned: Dict[str, Set[str]] = defaultdict(set) 

817 

818 # Go through the missing dimensions and associate the 

819 # given names with records within those dimensions 

820 matched_dims = set() 

821 for dimensionName in candidateDimensions: 

822 dimension = self.registry.dimensions.getStaticDimensions()[dimensionName] 

823 fields = dimension.metadata.names | dimension.uniqueKeys.names 

824 for field in not_dimensions: 

825 if field in fields: 

826 guessedAssociation[dimensionName][field] = not_dimensions[field] 

827 counter[dimensionName] += 1 

828 assigned[field].add(dimensionName) 

829 matched_dims.add(field) 

830 

831 # Calculate the fields that matched nothing. 

832 never_found = set(not_dimensions) - matched_dims 

833 

834 if never_found: 

835 raise DimensionNameError(f"Unrecognized keyword args given: {never_found}") 

836 

837 # There is a chance we have allocated a single dataId item 

838 # to multiple dimensions. Need to decide which should be retained. 

839 # For now assume that the most popular alternative wins. 

840 # This means that day_obs with seq_num will result in 

841 # exposure.day_obs and not visit.day_obs 

842 # Also prefer an explicitly missing dimension over an inferred 

843 # temporal dimension. 

844 for fieldName, assignedDimensions in assigned.items(): 

845 if len(assignedDimensions) > 1: 

846 # Pick the most popular (preferring mandatory dimensions) 

847 requiredButMissing = assignedDimensions.intersection(mandatoryDimensions) 

848 if requiredButMissing: 

849 candidateDimensions = requiredButMissing 

850 else: 

851 candidateDimensions = assignedDimensions 

852 

853 # Select the relevant items and get a new restricted 

854 # counter. 

855 theseCounts = {k: v for k, v in counter.items() if k in candidateDimensions} 

856 duplicatesCounter: Counter[str] = Counter() 

857 duplicatesCounter.update(theseCounts) 

858 

859 # Choose the most common. If they are equally common 

860 # we will pick the one that was found first. 

861 # Returns a list of tuples 

862 selected = duplicatesCounter.most_common(1)[0][0] 

863 

864 log.debug( 

865 "Ambiguous dataId entry '%s' associated with multiple dimensions: %s." 

866 " Removed ambiguity by choosing dimension %s.", 

867 fieldName, 

868 ", ".join(assignedDimensions), 

869 selected, 

870 ) 

871 

872 for candidateDimension in assignedDimensions: 

873 if candidateDimension != selected: 

874 del guessedAssociation[candidateDimension][fieldName] 

875 

876 # Update the record look up dict with the new associations 

877 for dimensionName, values in guessedAssociation.items(): 

878 if values: # A dict might now be empty 

879 log.debug("Assigned non-dimension dataId keys to dimension %s: %s", dimensionName, values) 

880 byRecord[dimensionName].update(values) 

881 

882 if byRecord: 

883 # Some record specifiers were found so we need to convert 

884 # them to the Id form 

885 for dimensionName, values in byRecord.items(): 

886 if dimensionName in newDataId: 

887 log.debug( 

888 "DataId specified explicit %s dimension value of %s in addition to" 

889 " general record specifiers for it of %s. Ignoring record information.", 

890 dimensionName, 

891 newDataId[dimensionName], 

892 str(values), 

893 ) 

894 # Get the actual record and compare with these values. 

895 try: 

896 recs = list(self.registry.queryDimensionRecords(dimensionName, dataId=newDataId)) 

897 except DataIdError: 

898 raise DataIdValueError( 

899 f"Could not find dimension '{dimensionName}'" 

900 f" with dataId {newDataId} as part of comparing with" 

901 f" record values {byRecord[dimensionName]}" 

902 ) from None 

903 if len(recs) == 1: 

904 errmsg: List[str] = [] 

905 for k, v in values.items(): 

906 if (recval := getattr(recs[0], k)) != v: 

907 errmsg.append(f"{k}({recval} != {v})") 

908 if errmsg: 

909 raise InconsistentDataIdError( 

910 f"Dimension {dimensionName} in dataId has explicit value" 

911 " inconsistent with records: " + ", ".join(errmsg) 

912 ) 

913 else: 

914 # Multiple matches for an explicit dimension 

915 # should never happen but let downstream complain. 

916 pass 

917 continue 

918 

919 # Build up a WHERE expression 

920 bind = {k: v for k, v in values.items()} 

921 where = " AND ".join(f"{dimensionName}.{k} = {k}" for k in bind) 

922 

923 # Hopefully we get a single record that matches 

924 records = set( 

925 self.registry.queryDimensionRecords( 

926 dimensionName, dataId=newDataId, where=where, bind=bind, **kwargs 

927 ) 

928 ) 

929 

930 if len(records) != 1: 

931 if len(records) > 1: 

932 log.debug("Received %d records from constraints of %s", len(records), str(values)) 

933 for r in records: 

934 log.debug("- %s", str(r)) 

935 raise InconsistentDataIdError( 

936 f"DataId specification for dimension {dimensionName} is not" 

937 f" uniquely constrained to a single dataset by {values}." 

938 f" Got {len(records)} results." 

939 ) 

940 raise InconsistentDataIdError( 

941 f"DataId specification for dimension {dimensionName} matched no" 

942 f" records when constrained by {values}" 

943 ) 

944 

945 # Get the primary key from the real dimension object 

946 dimension = self.registry.dimensions.getStaticDimensions()[dimensionName] 

947 if not isinstance(dimension, Dimension): 

948 raise RuntimeError( 

949 f"{dimension.name} is not a true dimension, and cannot be used in data IDs." 

950 ) 

951 newDataId[dimensionName] = getattr(records.pop(), dimension.primaryKey.name) 

952 

953 return newDataId, kwargs 

954 

955 def _findDatasetRef( 

956 self, 

957 datasetRefOrType: Union[DatasetRef, DatasetType, str], 

958 dataId: Optional[DataId] = None, 

959 *, 

960 collections: Any = None, 

961 allowUnresolved: bool = False, 

962 **kwargs: Any, 

963 ) -> DatasetRef: 

964 """Shared logic for methods that start with a search for a dataset in 

965 the registry. 

966 

967 Parameters 

968 ---------- 

969 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

970 When `DatasetRef` the `dataId` should be `None`. 

971 Otherwise the `DatasetType` or name thereof. 

972 dataId : `dict` or `DataCoordinate`, optional 

973 A `dict` of `Dimension` link name, value pairs that label the 

974 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

975 should be provided as the first argument. 

976 collections : Any, optional 

977 Collections to be searched, overriding ``self.collections``. 

978 Can be any of the types supported by the ``collections`` argument 

979 to butler construction. 

980 allowUnresolved : `bool`, optional 

981 If `True`, return an unresolved `DatasetRef` if finding a resolved 

982 one in the `Registry` fails. Defaults to `False`. 

983 **kwargs 

984 Additional keyword arguments used to augment or construct a 

985 `DataId`. See `DataId` parameters. 

986 

987 Returns 

988 ------- 

989 ref : `DatasetRef` 

990 A reference to the dataset identified by the given arguments. 

991 

992 Raises 

993 ------ 

994 LookupError 

995 Raised if no matching dataset exists in the `Registry` (and 

996 ``allowUnresolved is False``). 

997 ValueError 

998 Raised if a resolved `DatasetRef` was passed as an input, but it 

999 differs from the one found in the registry. 

1000 TypeError 

1001 Raised if no collections were provided. 

1002 """ 

1003 datasetType, dataId = self._standardizeArgs(datasetRefOrType, dataId, for_put=False, **kwargs) 

1004 if isinstance(datasetRefOrType, DatasetRef): 

1005 idNumber = datasetRefOrType.id 

1006 else: 

1007 idNumber = None 

1008 timespan: Optional[Timespan] = None 

1009 

1010 dataId, kwargs = self._rewrite_data_id(dataId, datasetType, **kwargs) 

1011 

1012 if datasetType.isCalibration(): 

1013 # Because this is a calibration dataset, first try to make a 

1014 # standardize the data ID without restricting the dimensions to 

1015 # those of the dataset type requested, because there may be extra 

1016 # dimensions that provide temporal information for a validity-range 

1017 # lookup. 

1018 dataId = DataCoordinate.standardize( 

1019 dataId, universe=self.registry.dimensions, defaults=self.registry.defaults.dataId, **kwargs 

1020 ) 

1021 if dataId.graph.temporal: 

1022 dataId = self.registry.expandDataId(dataId) 

1023 timespan = dataId.timespan 

1024 else: 

1025 # Standardize the data ID to just the dimensions of the dataset 

1026 # type instead of letting registry.findDataset do it, so we get the 

1027 # result even if no dataset is found. 

1028 dataId = DataCoordinate.standardize( 

1029 dataId, graph=datasetType.dimensions, defaults=self.registry.defaults.dataId, **kwargs 

1030 ) 

1031 # Always lookup the DatasetRef, even if one is given, to ensure it is 

1032 # present in the current collection. 

1033 ref = self.registry.findDataset(datasetType, dataId, collections=collections, timespan=timespan) 

1034 if ref is None: 

1035 if allowUnresolved: 

1036 return DatasetRef(datasetType, dataId) 

1037 else: 

1038 if collections is None: 

1039 collections = self.registry.defaults.collections 

1040 raise LookupError( 

1041 f"Dataset {datasetType.name} with data ID {dataId} " 

1042 f"could not be found in collections {collections}." 

1043 ) 

1044 if idNumber is not None and idNumber != ref.id: 

1045 if collections is None: 

1046 collections = self.registry.defaults.collections 

1047 raise ValueError( 

1048 f"DatasetRef.id provided ({idNumber}) does not match " 

1049 f"id ({ref.id}) in registry in collections {collections}." 

1050 ) 

1051 if datasetType != ref.datasetType: 

1052 # If they differ it is because the user explicitly specified 

1053 # a compatible dataset type to this call rather than using the 

1054 # registry definition. The DatasetRef must therefore be recreated 

1055 # using the user definition such that the expected type is 

1056 # returned. 

1057 ref = DatasetRef(datasetType, ref.dataId, run=ref.run, id=ref.id) 

1058 

1059 return ref 

1060 

1061 @transactional 

1062 def putDirect(self, obj: Any, ref: DatasetRef) -> DatasetRef: 

1063 # Docstring inherited. 

1064 (imported_ref,) = self.registry._importDatasets( 

1065 [ref], 

1066 expand=True, 

1067 ) 

1068 if imported_ref.id != ref.getCheckedId(): 

1069 raise RuntimeError("This registry configuration does not support putDirect.") 

1070 self.datastore.put(obj, ref) 

1071 return ref 

1072 

1073 @transactional 

1074 def put( 

1075 self, 

1076 obj: Any, 

1077 datasetRefOrType: Union[DatasetRef, DatasetType, str], 

1078 dataId: Optional[DataId] = None, 

1079 *, 

1080 run: Optional[str] = None, 

1081 **kwargs: Any, 

1082 ) -> DatasetRef: 

1083 """Store and register a dataset. 

1084 

1085 Parameters 

1086 ---------- 

1087 obj : `object` 

1088 The dataset. 

1089 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

1090 When `DatasetRef` is provided, ``dataId`` should be `None`. 

1091 Otherwise the `DatasetType` or name thereof. 

1092 dataId : `dict` or `DataCoordinate` 

1093 A `dict` of `Dimension` link name, value pairs that label the 

1094 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

1095 should be provided as the second argument. 

1096 run : `str`, optional 

1097 The name of the run the dataset should be added to, overriding 

1098 ``self.run``. 

1099 **kwargs 

1100 Additional keyword arguments used to augment or construct a 

1101 `DataCoordinate`. See `DataCoordinate.standardize` 

1102 parameters. 

1103 

1104 Returns 

1105 ------- 

1106 ref : `DatasetRef` 

1107 A reference to the stored dataset, updated with the correct id if 

1108 given. 

1109 

1110 Raises 

1111 ------ 

1112 TypeError 

1113 Raised if the butler is read-only or if no run has been provided. 

1114 """ 

1115 log.debug("Butler put: %s, dataId=%s, run=%s", datasetRefOrType, dataId, run) 

1116 if not self.isWriteable(): 

1117 raise TypeError("Butler is read-only.") 

1118 datasetType, dataId = self._standardizeArgs(datasetRefOrType, dataId, **kwargs) 

1119 if isinstance(datasetRefOrType, DatasetRef) and datasetRefOrType.id is not None: 

1120 raise ValueError("DatasetRef must not be in registry, must have None id") 

1121 

1122 # Handle dimension records in dataId 

1123 dataId, kwargs = self._rewrite_data_id(dataId, datasetType, **kwargs) 

1124 

1125 # Add Registry Dataset entry. 

1126 dataId = self.registry.expandDataId(dataId, graph=datasetType.dimensions, **kwargs) 

1127 

1128 # For an execution butler the datasets will be pre-defined. 

1129 # If the butler is configured that way datasets should only be inserted 

1130 # if they do not already exist in registry. Trying and catching 

1131 # ConflictingDefinitionError will not work because the transaction 

1132 # will be corrupted. Instead, in this mode always check first. 

1133 ref = None 

1134 ref_is_predefined = False 

1135 if self._allow_put_of_predefined_dataset: 

1136 # Get the matching ref for this run. 

1137 ref = self.registry.findDataset(datasetType, collections=run, dataId=dataId) 

1138 

1139 if ref: 

1140 # Must be expanded form for datastore templating 

1141 dataId = self.registry.expandDataId(dataId, graph=datasetType.dimensions) 

1142 ref = ref.expanded(dataId) 

1143 ref_is_predefined = True 

1144 

1145 if not ref: 

1146 (ref,) = self.registry.insertDatasets(datasetType, run=run, dataIds=[dataId]) 

1147 

1148 # If the ref is predefined it is possible that the datastore also 

1149 # has the record. Asking datastore to put it again will result in 

1150 # the artifact being recreated, overwriting previous, then will cause 

1151 # a failure in writing the record which will cause the artifact 

1152 # to be removed. Much safer to ask first before attempting to 

1153 # overwrite. Race conditions should not be an issue for the 

1154 # execution butler environment. 

1155 if ref_is_predefined: 

1156 if self.datastore.knows(ref): 

1157 raise ConflictingDefinitionError(f"Dataset associated {ref} already exists.") 

1158 

1159 self.datastore.put(obj, ref) 

1160 

1161 return ref 

1162 

1163 def getDirect(self, ref: DatasetRef, *, parameters: Optional[Dict[str, Any]] = None) -> Any: 

1164 """Retrieve a stored dataset. 

1165 

1166 Unlike `Butler.get`, this method allows datasets outside the Butler's 

1167 collection to be read as long as the `DatasetRef` that identifies them 

1168 can be obtained separately. 

1169 

1170 Parameters 

1171 ---------- 

1172 ref : `DatasetRef` 

1173 Resolved reference to an already stored dataset. 

1174 parameters : `dict` 

1175 Additional StorageClass-defined options to control reading, 

1176 typically used to efficiently read only a subset of the dataset. 

1177 

1178 Returns 

1179 ------- 

1180 obj : `object` 

1181 The dataset. 

1182 """ 

1183 return self.datastore.get(ref, parameters=parameters) 

1184 

1185 def getDirectDeferred( 

1186 self, ref: DatasetRef, *, parameters: Union[dict, None] = None 

1187 ) -> DeferredDatasetHandle: 

1188 """Create a `DeferredDatasetHandle` which can later retrieve a dataset, 

1189 from a resolved `DatasetRef`. 

1190 

1191 Parameters 

1192 ---------- 

1193 ref : `DatasetRef` 

1194 Resolved reference to an already stored dataset. 

1195 parameters : `dict` 

1196 Additional StorageClass-defined options to control reading, 

1197 typically used to efficiently read only a subset of the dataset. 

1198 

1199 Returns 

1200 ------- 

1201 obj : `DeferredDatasetHandle` 

1202 A handle which can be used to retrieve a dataset at a later time. 

1203 

1204 Raises 

1205 ------ 

1206 AmbiguousDatasetError 

1207 Raised if ``ref.id is None``, i.e. the reference is unresolved. 

1208 """ 

1209 if ref.id is None: 

1210 raise AmbiguousDatasetError( 

1211 f"Dataset of type {ref.datasetType.name} with data ID {ref.dataId} is not resolved." 

1212 ) 

1213 return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters) 

1214 

1215 def getDeferred( 

1216 self, 

1217 datasetRefOrType: Union[DatasetRef, DatasetType, str], 

1218 dataId: Optional[DataId] = None, 

1219 *, 

1220 parameters: Union[dict, None] = None, 

1221 collections: Any = None, 

1222 **kwargs: Any, 

1223 ) -> DeferredDatasetHandle: 

1224 """Create a `DeferredDatasetHandle` which can later retrieve a dataset, 

1225 after an immediate registry lookup. 

1226 

1227 Parameters 

1228 ---------- 

1229 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

1230 When `DatasetRef` the `dataId` should be `None`. 

1231 Otherwise the `DatasetType` or name thereof. 

1232 dataId : `dict` or `DataCoordinate`, optional 

1233 A `dict` of `Dimension` link name, value pairs that label the 

1234 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

1235 should be provided as the first argument. 

1236 parameters : `dict` 

1237 Additional StorageClass-defined options to control reading, 

1238 typically used to efficiently read only a subset of the dataset. 

1239 collections : Any, optional 

1240 Collections to be searched, overriding ``self.collections``. 

1241 Can be any of the types supported by the ``collections`` argument 

1242 to butler construction. 

1243 **kwargs 

1244 Additional keyword arguments used to augment or construct a 

1245 `DataId`. See `DataId` parameters. 

1246 

1247 Returns 

1248 ------- 

1249 obj : `DeferredDatasetHandle` 

1250 A handle which can be used to retrieve a dataset at a later time. 

1251 

1252 Raises 

1253 ------ 

1254 LookupError 

1255 Raised if no matching dataset exists in the `Registry` (and 

1256 ``allowUnresolved is False``). 

1257 ValueError 

1258 Raised if a resolved `DatasetRef` was passed as an input, but it 

1259 differs from the one found in the registry. 

1260 TypeError 

1261 Raised if no collections were provided. 

1262 """ 

1263 ref = self._findDatasetRef(datasetRefOrType, dataId, collections=collections, **kwargs) 

1264 return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters) 

1265 

1266 def get( 

1267 self, 

1268 datasetRefOrType: Union[DatasetRef, DatasetType, str], 

1269 dataId: Optional[DataId] = None, 

1270 *, 

1271 parameters: Optional[Dict[str, Any]] = None, 

1272 collections: Any = None, 

1273 **kwargs: Any, 

1274 ) -> Any: 

1275 """Retrieve a stored dataset. 

1276 

1277 Parameters 

1278 ---------- 

1279 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

1280 When `DatasetRef` the `dataId` should be `None`. 

1281 Otherwise the `DatasetType` or name thereof. 

1282 dataId : `dict` or `DataCoordinate` 

1283 A `dict` of `Dimension` link name, value pairs that label the 

1284 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

1285 should be provided as the first argument. 

1286 parameters : `dict` 

1287 Additional StorageClass-defined options to control reading, 

1288 typically used to efficiently read only a subset of the dataset. 

1289 collections : Any, optional 

1290 Collections to be searched, overriding ``self.collections``. 

1291 Can be any of the types supported by the ``collections`` argument 

1292 to butler construction. 

1293 **kwargs 

1294 Additional keyword arguments used to augment or construct a 

1295 `DataCoordinate`. See `DataCoordinate.standardize` 

1296 parameters. 

1297 

1298 Returns 

1299 ------- 

1300 obj : `object` 

1301 The dataset. 

1302 

1303 Raises 

1304 ------ 

1305 ValueError 

1306 Raised if a resolved `DatasetRef` was passed as an input, but it 

1307 differs from the one found in the registry. 

1308 LookupError 

1309 Raised if no matching dataset exists in the `Registry`. 

1310 TypeError 

1311 Raised if no collections were provided. 

1312 

1313 Notes 

1314 ----- 

1315 When looking up datasets in a `~CollectionType.CALIBRATION` collection, 

1316 this method requires that the given data ID include temporal dimensions 

1317 beyond the dimensions of the dataset type itself, in order to find the 

1318 dataset with the appropriate validity range. For example, a "bias" 

1319 dataset with native dimensions ``{instrument, detector}`` could be 

1320 fetched with a ``{instrument, detector, exposure}`` data ID, because 

1321 ``exposure`` is a temporal dimension. 

1322 """ 

1323 log.debug("Butler get: %s, dataId=%s, parameters=%s", datasetRefOrType, dataId, parameters) 

1324 ref = self._findDatasetRef(datasetRefOrType, dataId, collections=collections, **kwargs) 

1325 return self.getDirect(ref, parameters=parameters) 

1326 

1327 def getURIs( 

1328 self, 

1329 datasetRefOrType: Union[DatasetRef, DatasetType, str], 

1330 dataId: Optional[DataId] = None, 

1331 *, 

1332 predict: bool = False, 

1333 collections: Any = None, 

1334 run: Optional[str] = None, 

1335 **kwargs: Any, 

1336 ) -> Tuple[Optional[ResourcePath], Dict[str, ResourcePath]]: 

1337 """Returns the URIs associated with the dataset. 

1338 

1339 Parameters 

1340 ---------- 

1341 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

1342 When `DatasetRef` the `dataId` should be `None`. 

1343 Otherwise the `DatasetType` or name thereof. 

1344 dataId : `dict` or `DataCoordinate` 

1345 A `dict` of `Dimension` link name, value pairs that label the 

1346 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

1347 should be provided as the first argument. 

1348 predict : `bool` 

1349 If `True`, allow URIs to be returned of datasets that have not 

1350 been written. 

1351 collections : Any, optional 

1352 Collections to be searched, overriding ``self.collections``. 

1353 Can be any of the types supported by the ``collections`` argument 

1354 to butler construction. 

1355 run : `str`, optional 

1356 Run to use for predictions, overriding ``self.run``. 

1357 **kwargs 

1358 Additional keyword arguments used to augment or construct a 

1359 `DataCoordinate`. See `DataCoordinate.standardize` 

1360 parameters. 

1361 

1362 Returns 

1363 ------- 

1364 primary : `lsst.resources.ResourcePath` 

1365 The URI to the primary artifact associated with this dataset. 

1366 If the dataset was disassembled within the datastore this 

1367 may be `None`. 

1368 components : `dict` 

1369 URIs to any components associated with the dataset artifact. 

1370 Can be empty if there are no components. 

1371 """ 

1372 ref = self._findDatasetRef( 

1373 datasetRefOrType, dataId, allowUnresolved=predict, collections=collections, **kwargs 

1374 ) 

1375 if ref.id is None: # only possible if predict is True 

1376 if run is None: 

1377 run = self.run 

1378 if run is None: 

1379 raise TypeError("Cannot predict location with run=None.") 

1380 # Lie about ID, because we can't guess it, and only 

1381 # Datastore.getURIs() will ever see it (and it doesn't use it). 

1382 ref = ref.resolved(id=0, run=run) 

1383 return self.datastore.getURIs(ref, predict) 

1384 

1385 def getURI( 

1386 self, 

1387 datasetRefOrType: Union[DatasetRef, DatasetType, str], 

1388 dataId: Optional[DataId] = None, 

1389 *, 

1390 predict: bool = False, 

1391 collections: Any = None, 

1392 run: Optional[str] = None, 

1393 **kwargs: Any, 

1394 ) -> ResourcePath: 

1395 """Return the URI to the Dataset. 

1396 

1397 Parameters 

1398 ---------- 

1399 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

1400 When `DatasetRef` the `dataId` should be `None`. 

1401 Otherwise the `DatasetType` or name thereof. 

1402 dataId : `dict` or `DataCoordinate` 

1403 A `dict` of `Dimension` link name, value pairs that label the 

1404 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

1405 should be provided as the first argument. 

1406 predict : `bool` 

1407 If `True`, allow URIs to be returned of datasets that have not 

1408 been written. 

1409 collections : Any, optional 

1410 Collections to be searched, overriding ``self.collections``. 

1411 Can be any of the types supported by the ``collections`` argument 

1412 to butler construction. 

1413 run : `str`, optional 

1414 Run to use for predictions, overriding ``self.run``. 

1415 **kwargs 

1416 Additional keyword arguments used to augment or construct a 

1417 `DataCoordinate`. See `DataCoordinate.standardize` 

1418 parameters. 

1419 

1420 Returns 

1421 ------- 

1422 uri : `lsst.resources.ResourcePath` 

1423 URI pointing to the Dataset within the datastore. If the 

1424 Dataset does not exist in the datastore, and if ``predict`` is 

1425 `True`, the URI will be a prediction and will include a URI 

1426 fragment "#predicted". 

1427 If the datastore does not have entities that relate well 

1428 to the concept of a URI the returned URI string will be 

1429 descriptive. The returned URI is not guaranteed to be obtainable. 

1430 

1431 Raises 

1432 ------ 

1433 LookupError 

1434 A URI has been requested for a dataset that does not exist and 

1435 guessing is not allowed. 

1436 ValueError 

1437 Raised if a resolved `DatasetRef` was passed as an input, but it 

1438 differs from the one found in the registry. 

1439 TypeError 

1440 Raised if no collections were provided. 

1441 RuntimeError 

1442 Raised if a URI is requested for a dataset that consists of 

1443 multiple artifacts. 

1444 """ 

1445 primary, components = self.getURIs( 

1446 datasetRefOrType, dataId=dataId, predict=predict, collections=collections, run=run, **kwargs 

1447 ) 

1448 

1449 if primary is None or components: 

1450 raise RuntimeError( 

1451 f"Dataset ({datasetRefOrType}) includes distinct URIs for components. " 

1452 "Use Butler.getURIs() instead." 

1453 ) 

1454 return primary 

1455 

1456 def retrieveArtifacts( 

1457 self, 

1458 refs: Iterable[DatasetRef], 

1459 destination: ResourcePathExpression, 

1460 transfer: str = "auto", 

1461 preserve_path: bool = True, 

1462 overwrite: bool = False, 

1463 ) -> List[ResourcePath]: 

1464 """Retrieve the artifacts associated with the supplied refs. 

1465 

1466 Parameters 

1467 ---------- 

1468 refs : iterable of `DatasetRef` 

1469 The datasets for which artifacts are to be retrieved. 

1470 A single ref can result in multiple artifacts. The refs must 

1471 be resolved. 

1472 destination : `lsst.resources.ResourcePath` or `str` 

1473 Location to write the artifacts. 

1474 transfer : `str`, optional 

1475 Method to use to transfer the artifacts. Must be one of the options 

1476 supported by `~lsst.resources.ResourcePath.transfer_from()`. 

1477 "move" is not allowed. 

1478 preserve_path : `bool`, optional 

1479 If `True` the full path of the artifact within the datastore 

1480 is preserved. If `False` the final file component of the path 

1481 is used. 

1482 overwrite : `bool`, optional 

1483 If `True` allow transfers to overwrite existing files at the 

1484 destination. 

1485 

1486 Returns 

1487 ------- 

1488 targets : `list` of `lsst.resources.ResourcePath` 

1489 URIs of file artifacts in destination location. Order is not 

1490 preserved. 

1491 

1492 Notes 

1493 ----- 

1494 For non-file datastores the artifacts written to the destination 

1495 may not match the representation inside the datastore. For example 

1496 a hierarchical data structure in a NoSQL database may well be stored 

1497 as a JSON file. 

1498 """ 

1499 return self.datastore.retrieveArtifacts( 

1500 refs, 

1501 ResourcePath(destination), 

1502 transfer=transfer, 

1503 preserve_path=preserve_path, 

1504 overwrite=overwrite, 

1505 ) 

1506 

1507 def datasetExists( 

1508 self, 

1509 datasetRefOrType: Union[DatasetRef, DatasetType, str], 

1510 dataId: Optional[DataId] = None, 

1511 *, 

1512 collections: Any = None, 

1513 **kwargs: Any, 

1514 ) -> bool: 

1515 """Return True if the Dataset is actually present in the Datastore. 

1516 

1517 Parameters 

1518 ---------- 

1519 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

1520 When `DatasetRef` the `dataId` should be `None`. 

1521 Otherwise the `DatasetType` or name thereof. 

1522 dataId : `dict` or `DataCoordinate` 

1523 A `dict` of `Dimension` link name, value pairs that label the 

1524 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

1525 should be provided as the first argument. 

1526 collections : Any, optional 

1527 Collections to be searched, overriding ``self.collections``. 

1528 Can be any of the types supported by the ``collections`` argument 

1529 to butler construction. 

1530 **kwargs 

1531 Additional keyword arguments used to augment or construct a 

1532 `DataCoordinate`. See `DataCoordinate.standardize` 

1533 parameters. 

1534 

1535 Raises 

1536 ------ 

1537 LookupError 

1538 Raised if the dataset is not even present in the Registry. 

1539 ValueError 

1540 Raised if a resolved `DatasetRef` was passed as an input, but it 

1541 differs from the one found in the registry. 

1542 TypeError 

1543 Raised if no collections were provided. 

1544 """ 

1545 ref = self._findDatasetRef(datasetRefOrType, dataId, collections=collections, **kwargs) 

1546 return self.datastore.exists(ref) 

1547 

1548 def removeRuns(self, names: Iterable[str], unstore: bool = True) -> None: 

1549 """Remove one or more `~CollectionType.RUN` collections and the 

1550 datasets within them. 

1551 

1552 Parameters 

1553 ---------- 

1554 names : `Iterable` [ `str` ] 

1555 The names of the collections to remove. 

1556 unstore : `bool`, optional 

1557 If `True` (default), delete datasets from all datastores in which 

1558 they are present, and attempt to rollback the registry deletions if 

1559 datastore deletions fail (which may not always be possible). If 

1560 `False`, datastore records for these datasets are still removed, 

1561 but any artifacts (e.g. files) will not be. 

1562 

1563 Raises 

1564 ------ 

1565 TypeError 

1566 Raised if one or more collections are not of type 

1567 `~CollectionType.RUN`. 

1568 """ 

1569 if not self.isWriteable(): 

1570 raise TypeError("Butler is read-only.") 

1571 names = list(names) 

1572 refs: List[DatasetRef] = [] 

1573 for name in names: 

1574 collectionType = self.registry.getCollectionType(name) 

1575 if collectionType is not CollectionType.RUN: 

1576 raise TypeError(f"The collection type of '{name}' is {collectionType.name}, not RUN.") 

1577 refs.extend(self.registry.queryDatasets(..., collections=name, findFirst=True)) 

1578 with self.registry.transaction(): 

1579 if unstore: 

1580 self.datastore.trash(refs) 

1581 else: 

1582 self.datastore.forget(refs) 

1583 for name in names: 

1584 self.registry.removeCollection(name) 

1585 if unstore: 

1586 # Point of no return for removing artifacts 

1587 self.datastore.emptyTrash() 

1588 

1589 def pruneCollection( 

1590 self, name: str, purge: bool = False, unstore: bool = False, unlink: Optional[List[str]] = None 

1591 ) -> None: 

1592 """Remove a collection and possibly prune datasets within it. 

1593 

1594 Parameters 

1595 ---------- 

1596 name : `str` 

1597 Name of the collection to remove. If this is a 

1598 `~CollectionType.TAGGED` or `~CollectionType.CHAINED` collection, 

1599 datasets within the collection are not modified unless ``unstore`` 

1600 is `True`. If this is a `~CollectionType.RUN` collection, 

1601 ``purge`` and ``unstore`` must be `True`, and all datasets in it 

1602 are fully removed from the data repository. 

1603 purge : `bool`, optional 

1604 If `True`, permit `~CollectionType.RUN` collections to be removed, 

1605 fully removing datasets within them. Requires ``unstore=True`` as 

1606 well as an added precaution against accidental deletion. Must be 

1607 `False` (default) if the collection is not a ``RUN``. 

1608 unstore: `bool`, optional 

1609 If `True`, remove all datasets in the collection from all 

1610 datastores in which they appear. 

1611 unlink: `list` [`str`], optional 

1612 Before removing the given `collection` unlink it from from these 

1613 parent collections. 

1614 

1615 Raises 

1616 ------ 

1617 TypeError 

1618 Raised if the butler is read-only or arguments are mutually 

1619 inconsistent. 

1620 """ 

1621 # See pruneDatasets comments for more information about the logic here; 

1622 # the cases are almost the same, but here we can rely on Registry to 

1623 # take care everything but Datastore deletion when we remove the 

1624 # collection. 

1625 if not self.isWriteable(): 

1626 raise TypeError("Butler is read-only.") 

1627 collectionType = self.registry.getCollectionType(name) 

1628 if purge and not unstore: 

1629 raise PurgeWithoutUnstorePruneCollectionsError() 

1630 if collectionType is CollectionType.RUN and not purge: 

1631 raise RunWithoutPurgePruneCollectionsError(collectionType) 

1632 if collectionType is not CollectionType.RUN and purge: 

1633 raise PurgeUnsupportedPruneCollectionsError(collectionType) 

1634 

1635 def remove(child: str, parent: str) -> None: 

1636 """Remove a child collection from a parent collection.""" 

1637 # Remove child from parent. 

1638 chain = list(self.registry.getCollectionChain(parent)) 

1639 try: 

1640 chain.remove(name) 

1641 except ValueError as e: 

1642 raise RuntimeError(f"{name} is not a child of {parent}") from e 

1643 self.registry.setCollectionChain(parent, chain) 

1644 

1645 with self.registry.transaction(): 

1646 if unlink: 

1647 for parent in unlink: 

1648 remove(name, parent) 

1649 if unstore: 

1650 refs = self.registry.queryDatasets(..., collections=name, findFirst=True) 

1651 self.datastore.trash(refs) 

1652 self.registry.removeCollection(name) 

1653 

1654 if unstore: 

1655 # Point of no return for removing artifacts 

1656 self.datastore.emptyTrash() 

1657 

1658 def pruneDatasets( 

1659 self, 

1660 refs: Iterable[DatasetRef], 

1661 *, 

1662 disassociate: bool = True, 

1663 unstore: bool = False, 

1664 tags: Iterable[str] = (), 

1665 purge: bool = False, 

1666 run: Optional[str] = None, 

1667 ) -> None: 

1668 """Remove one or more datasets from a collection and/or storage. 

1669 

1670 Parameters 

1671 ---------- 

1672 refs : `~collections.abc.Iterable` of `DatasetRef` 

1673 Datasets to prune. These must be "resolved" references (not just 

1674 a `DatasetType` and data ID). 

1675 disassociate : `bool`, optional 

1676 Disassociate pruned datasets from ``tags``, or from all collections 

1677 if ``purge=True``. 

1678 unstore : `bool`, optional 

1679 If `True` (`False` is default) remove these datasets from all 

1680 datastores known to this butler. Note that this will make it 

1681 impossible to retrieve these datasets even via other collections. 

1682 Datasets that are already not stored are ignored by this option. 

1683 tags : `Iterable` [ `str` ], optional 

1684 `~CollectionType.TAGGED` collections to disassociate the datasets 

1685 from. Ignored if ``disassociate`` is `False` or ``purge`` is 

1686 `True`. 

1687 purge : `bool`, optional 

1688 If `True` (`False` is default), completely remove the dataset from 

1689 the `Registry`. To prevent accidental deletions, ``purge`` may 

1690 only be `True` if all of the following conditions are met: 

1691 

1692 - All given datasets are in the given run. 

1693 - ``disassociate`` is `True`; 

1694 - ``unstore`` is `True`. 

1695 

1696 This mode may remove provenance information from datasets other 

1697 than those provided, and should be used with extreme care. 

1698 

1699 Raises 

1700 ------ 

1701 TypeError 

1702 Raised if the butler is read-only, if no collection was provided, 

1703 or the conditions for ``purge=True`` were not met. 

1704 """ 

1705 if not self.isWriteable(): 

1706 raise TypeError("Butler is read-only.") 

1707 if purge: 

1708 if not disassociate: 

1709 raise TypeError("Cannot pass purge=True without disassociate=True.") 

1710 if not unstore: 

1711 raise TypeError("Cannot pass purge=True without unstore=True.") 

1712 elif disassociate: 

1713 tags = tuple(tags) 

1714 if not tags: 

1715 raise TypeError("No tags provided but disassociate=True.") 

1716 for tag in tags: 

1717 collectionType = self.registry.getCollectionType(tag) 

1718 if collectionType is not CollectionType.TAGGED: 

1719 raise TypeError( 

1720 f"Cannot disassociate from collection '{tag}' " 

1721 f"of non-TAGGED type {collectionType.name}." 

1722 ) 

1723 # Transform possibly-single-pass iterable into something we can iterate 

1724 # over multiple times. 

1725 refs = list(refs) 

1726 # Pruning a component of a DatasetRef makes no sense since registry 

1727 # doesn't know about components and datastore might not store 

1728 # components in a separate file 

1729 for ref in refs: 

1730 if ref.datasetType.component(): 

1731 raise ValueError(f"Can not prune a component of a dataset (ref={ref})") 

1732 # We don't need an unreliable Datastore transaction for this, because 

1733 # we've been extra careful to ensure that Datastore.trash only involves 

1734 # mutating the Registry (it can _look_ at Datastore-specific things, 

1735 # but shouldn't change them), and hence all operations here are 

1736 # Registry operations. 

1737 with self.registry.transaction(): 

1738 if unstore: 

1739 self.datastore.trash(refs) 

1740 if purge: 

1741 self.registry.removeDatasets(refs) 

1742 elif disassociate: 

1743 assert tags, "Guaranteed by earlier logic in this function." 

1744 for tag in tags: 

1745 self.registry.disassociate(tag, refs) 

1746 # We've exited the Registry transaction, and apparently committed. 

1747 # (if there was an exception, everything rolled back, and it's as if 

1748 # nothing happened - and we never get here). 

1749 # Datastore artifacts are not yet gone, but they're clearly marked 

1750 # as trash, so if we fail to delete now because of (e.g.) filesystem 

1751 # problems we can try again later, and if manual administrative 

1752 # intervention is required, it's pretty clear what that should entail: 

1753 # deleting everything on disk and in private Datastore tables that is 

1754 # in the dataset_location_trash table. 

1755 if unstore: 

1756 # Point of no return for removing artifacts 

1757 self.datastore.emptyTrash() 

1758 

1759 @transactional 

1760 def ingest( 

1761 self, 

1762 *datasets: FileDataset, 

1763 transfer: Optional[str] = "auto", 

1764 run: Optional[str] = None, 

1765 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

1766 record_validation_info: bool = True, 

1767 ) -> None: 

1768 """Store and register one or more datasets that already exist on disk. 

1769 

1770 Parameters 

1771 ---------- 

1772 datasets : `FileDataset` 

1773 Each positional argument is a struct containing information about 

1774 a file to be ingested, including its URI (either absolute or 

1775 relative to the datastore root, if applicable), a `DatasetRef`, 

1776 and optionally a formatter class or its fully-qualified string 

1777 name. If a formatter is not provided, the formatter that would be 

1778 used for `put` is assumed. On successful return, all 

1779 `FileDataset.ref` attributes will have their `DatasetRef.id` 

1780 attribute populated and all `FileDataset.formatter` attributes will 

1781 be set to the formatter class used. `FileDataset.path` attributes 

1782 may be modified to put paths in whatever the datastore considers a 

1783 standardized form. 

1784 transfer : `str`, optional 

1785 If not `None`, must be one of 'auto', 'move', 'copy', 'direct', 

1786 'split', 'hardlink', 'relsymlink' or 'symlink', indicating how to 

1787 transfer the file. 

1788 run : `str`, optional 

1789 The name of the run ingested datasets should be added to, 

1790 overriding ``self.run``. 

1791 idGenerationMode : `DatasetIdGenEnum`, optional 

1792 Specifies option for generating dataset IDs. By default unique IDs 

1793 are generated for each inserted dataset. 

1794 record_validation_info : `bool`, optional 

1795 If `True`, the default, the datastore can record validation 

1796 information associated with the file. If `False` the datastore 

1797 will not attempt to track any information such as checksums 

1798 or file sizes. This can be useful if such information is tracked 

1799 in an external system or if the file is to be compressed in place. 

1800 It is up to the datastore whether this parameter is relevant. 

1801 

1802 Raises 

1803 ------ 

1804 TypeError 

1805 Raised if the butler is read-only or if no run was provided. 

1806 NotImplementedError 

1807 Raised if the `Datastore` does not support the given transfer mode. 

1808 DatasetTypeNotSupportedError 

1809 Raised if one or more files to be ingested have a dataset type that 

1810 is not supported by the `Datastore`.. 

1811 FileNotFoundError 

1812 Raised if one of the given files does not exist. 

1813 FileExistsError 

1814 Raised if transfer is not `None` but the (internal) location the 

1815 file would be moved to is already occupied. 

1816 

1817 Notes 

1818 ----- 

1819 This operation is not fully exception safe: if a database operation 

1820 fails, the given `FileDataset` instances may be only partially updated. 

1821 

1822 It is atomic in terms of database operations (they will either all 

1823 succeed or all fail) providing the database engine implements 

1824 transactions correctly. It will attempt to be atomic in terms of 

1825 filesystem operations as well, but this cannot be implemented 

1826 rigorously for most datastores. 

1827 """ 

1828 if not self.isWriteable(): 

1829 raise TypeError("Butler is read-only.") 

1830 progress = Progress("lsst.daf.butler.Butler.ingest", level=logging.DEBUG) 

1831 # Reorganize the inputs so they're grouped by DatasetType and then 

1832 # data ID. We also include a list of DatasetRefs for each FileDataset 

1833 # to hold the resolved DatasetRefs returned by the Registry, before 

1834 # it's safe to swap them into FileDataset.refs. 

1835 # Some type annotation aliases to make that clearer: 

1836 GroupForType = Dict[DataCoordinate, Tuple[FileDataset, List[DatasetRef]]] 

1837 GroupedData = MutableMapping[DatasetType, GroupForType] 

1838 # The actual data structure: 

1839 groupedData: GroupedData = defaultdict(dict) 

1840 # And the nested loop that populates it: 

1841 for dataset in progress.wrap(datasets, desc="Grouping by dataset type"): 

1842 # This list intentionally shared across the inner loop, since it's 

1843 # associated with `dataset`. 

1844 resolvedRefs: List[DatasetRef] = [] 

1845 

1846 # Somewhere to store pre-existing refs if we have an 

1847 # execution butler. 

1848 existingRefs: List[DatasetRef] = [] 

1849 

1850 for ref in dataset.refs: 

1851 if ref.dataId in groupedData[ref.datasetType]: 

1852 raise ConflictingDefinitionError( 

1853 f"Ingest conflict. Dataset {dataset.path} has same" 

1854 " DataId as other ingest dataset" 

1855 f" {groupedData[ref.datasetType][ref.dataId][0].path} " 

1856 f" ({ref.dataId})" 

1857 ) 

1858 if self._allow_put_of_predefined_dataset: 

1859 existing_ref = self.registry.findDataset( 

1860 ref.datasetType, dataId=ref.dataId, collections=run 

1861 ) 

1862 if existing_ref: 

1863 if self.datastore.knows(existing_ref): 

1864 raise ConflictingDefinitionError( 

1865 f"Dataset associated with path {dataset.path}" 

1866 f" already exists as {existing_ref}." 

1867 ) 

1868 # Store this ref elsewhere since it already exists 

1869 # and we do not want to remake it but we do want 

1870 # to store it in the datastore. 

1871 existingRefs.append(existing_ref) 

1872 

1873 # Nothing else to do until we have finished 

1874 # iterating. 

1875 continue 

1876 

1877 groupedData[ref.datasetType][ref.dataId] = (dataset, resolvedRefs) 

1878 

1879 if existingRefs: 

1880 

1881 if len(dataset.refs) != len(existingRefs): 

1882 # Keeping track of partially pre-existing datasets is hard 

1883 # and should generally never happen. For now don't allow 

1884 # it. 

1885 raise ConflictingDefinitionError( 

1886 f"For dataset {dataset.path} some dataIds already exist" 

1887 " in registry but others do not. This is not supported." 

1888 ) 

1889 

1890 # Attach the resolved refs if we found them. 

1891 dataset.refs = existingRefs 

1892 

1893 # Now we can bulk-insert into Registry for each DatasetType. 

1894 for datasetType, groupForType in progress.iter_item_chunks( 

1895 groupedData.items(), desc="Bulk-inserting datasets by type" 

1896 ): 

1897 refs = self.registry.insertDatasets( 

1898 datasetType, 

1899 dataIds=groupForType.keys(), 

1900 run=run, 

1901 expand=self.datastore.needs_expanded_data_ids(transfer, datasetType), 

1902 idGenerationMode=idGenerationMode, 

1903 ) 

1904 # Append those resolved DatasetRefs to the new lists we set up for 

1905 # them. 

1906 for ref, (_, resolvedRefs) in zip(refs, groupForType.values()): 

1907 resolvedRefs.append(ref) 

1908 

1909 # Go back to the original FileDatasets to replace their refs with the 

1910 # new resolved ones. 

1911 for groupForType in progress.iter_chunks( 

1912 groupedData.values(), desc="Reassociating resolved dataset refs with files" 

1913 ): 

1914 for dataset, resolvedRefs in groupForType.values(): 

1915 dataset.refs = resolvedRefs 

1916 

1917 # Bulk-insert everything into Datastore. 

1918 self.datastore.ingest(*datasets, transfer=transfer, record_validation_info=record_validation_info) 

1919 

1920 @contextlib.contextmanager 

1921 def export( 

1922 self, 

1923 *, 

1924 directory: Optional[str] = None, 

1925 filename: Optional[str] = None, 

1926 format: Optional[str] = None, 

1927 transfer: Optional[str] = None, 

1928 ) -> Iterator[RepoExportContext]: 

1929 """Export datasets from the repository represented by this `Butler`. 

1930 

1931 This method is a context manager that returns a helper object 

1932 (`RepoExportContext`) that is used to indicate what information from 

1933 the repository should be exported. 

1934 

1935 Parameters 

1936 ---------- 

1937 directory : `str`, optional 

1938 Directory dataset files should be written to if ``transfer`` is not 

1939 `None`. 

1940 filename : `str`, optional 

1941 Name for the file that will include database information associated 

1942 with the exported datasets. If this is not an absolute path and 

1943 ``directory`` is not `None`, it will be written to ``directory`` 

1944 instead of the current working directory. Defaults to 

1945 "export.{format}". 

1946 format : `str`, optional 

1947 File format for the database information file. If `None`, the 

1948 extension of ``filename`` will be used. 

1949 transfer : `str`, optional 

1950 Transfer mode passed to `Datastore.export`. 

1951 

1952 Raises 

1953 ------ 

1954 TypeError 

1955 Raised if the set of arguments passed is inconsistent. 

1956 

1957 Examples 

1958 -------- 

1959 Typically the `Registry.queryDataIds` and `Registry.queryDatasets` 

1960 methods are used to provide the iterables over data IDs and/or datasets 

1961 to be exported:: 

1962 

1963 with butler.export("exports.yaml") as export: 

1964 # Export all flats, but none of the dimension element rows 

1965 # (i.e. data ID information) associated with them. 

1966 export.saveDatasets(butler.registry.queryDatasets("flat"), 

1967 elements=()) 

1968 # Export all datasets that start with "deepCoadd_" and all of 

1969 # their associated data ID information. 

1970 export.saveDatasets(butler.registry.queryDatasets("deepCoadd_*")) 

1971 """ 

1972 if directory is None and transfer is not None: 

1973 raise TypeError("Cannot transfer without providing a directory.") 

1974 if transfer == "move": 

1975 raise TypeError("Transfer may not be 'move': export is read-only") 

1976 if format is None: 

1977 if filename is None: 

1978 raise TypeError("At least one of 'filename' or 'format' must be provided.") 

1979 else: 

1980 _, format = os.path.splitext(filename) 

1981 elif filename is None: 

1982 filename = f"export.{format}" 

1983 if directory is not None: 

1984 filename = os.path.join(directory, filename) 

1985 BackendClass = get_class_of(self._config["repo_transfer_formats"][format]["export"]) 

1986 with open(filename, "w") as stream: 

1987 backend = BackendClass(stream) 

1988 try: 

1989 helper = RepoExportContext( 

1990 self.registry, self.datastore, backend=backend, directory=directory, transfer=transfer 

1991 ) 

1992 yield helper 

1993 except BaseException: 

1994 raise 

1995 else: 

1996 helper._finish() 

1997 

1998 def import_( 

1999 self, 

2000 *, 

2001 directory: Optional[str] = None, 

2002 filename: Union[str, TextIO, None] = None, 

2003 format: Optional[str] = None, 

2004 transfer: Optional[str] = None, 

2005 skip_dimensions: Optional[Set] = None, 

2006 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

2007 reuseIds: bool = False, 

2008 ) -> None: 

2009 """Import datasets into this repository that were exported from a 

2010 different butler repository via `~lsst.daf.butler.Butler.export`. 

2011 

2012 Parameters 

2013 ---------- 

2014 directory : `str`, optional 

2015 Directory containing dataset files to import from. If `None`, 

2016 ``filename`` and all dataset file paths specified therein must 

2017 be absolute. 

2018 filename : `str` or `TextIO`, optional 

2019 A stream or name of file that contains database information 

2020 associated with the exported datasets, typically generated by 

2021 `~lsst.daf.butler.Butler.export`. If this a string (name) and 

2022 is not an absolute path, does not exist in the current working 

2023 directory, and ``directory`` is not `None`, it is assumed to be in 

2024 ``directory``. Defaults to "export.{format}". 

2025 format : `str`, optional 

2026 File format for ``filename``. If `None`, the extension of 

2027 ``filename`` will be used. 

2028 transfer : `str`, optional 

2029 Transfer mode passed to `~lsst.daf.butler.Datastore.ingest`. 

2030 skip_dimensions : `set`, optional 

2031 Names of dimensions that should be skipped and not imported. 

2032 idGenerationMode : `DatasetIdGenEnum`, optional 

2033 Specifies option for generating dataset IDs when IDs are not 

2034 provided or their type does not match backend type. By default 

2035 unique IDs are generated for each inserted dataset. 

2036 reuseIds : `bool`, optional 

2037 If `True` then forces re-use of imported dataset IDs for integer 

2038 IDs which are normally generated as auto-incremented; exception 

2039 will be raised if imported IDs clash with existing ones. This 

2040 option has no effect on the use of globally-unique IDs which are 

2041 always re-used (or generated if integer IDs are being imported). 

2042 

2043 Raises 

2044 ------ 

2045 TypeError 

2046 Raised if the set of arguments passed is inconsistent, or if the 

2047 butler is read-only. 

2048 """ 

2049 if not self.isWriteable(): 

2050 raise TypeError("Butler is read-only.") 

2051 if format is None: 

2052 if filename is None: 

2053 raise TypeError("At least one of 'filename' or 'format' must be provided.") 

2054 else: 

2055 _, format = os.path.splitext(filename) # type: ignore 

2056 elif filename is None: 

2057 filename = f"export.{format}" 

2058 if isinstance(filename, str) and directory is not None and not os.path.exists(filename): 

2059 filename = os.path.join(directory, filename) 

2060 BackendClass = get_class_of(self._config["repo_transfer_formats"][format]["import"]) 

2061 

2062 def doImport(importStream: TextIO) -> None: 

2063 backend = BackendClass(importStream, self.registry) 

2064 backend.register() 

2065 with self.transaction(): 

2066 backend.load( 

2067 self.datastore, 

2068 directory=directory, 

2069 transfer=transfer, 

2070 skip_dimensions=skip_dimensions, 

2071 idGenerationMode=idGenerationMode, 

2072 reuseIds=reuseIds, 

2073 ) 

2074 

2075 if isinstance(filename, str): 

2076 with open(filename, "r") as stream: 

2077 doImport(stream) 

2078 else: 

2079 doImport(filename) 

2080 

2081 def transfer_from( 

2082 self, 

2083 source_butler: Butler, 

2084 source_refs: Iterable[DatasetRef], 

2085 transfer: str = "auto", 

2086 id_gen_map: Dict[str, DatasetIdGenEnum] = None, 

2087 skip_missing: bool = True, 

2088 register_dataset_types: bool = False, 

2089 ) -> List[DatasetRef]: 

2090 """Transfer datasets to this Butler from a run in another Butler. 

2091 

2092 Parameters 

2093 ---------- 

2094 source_butler : `Butler` 

2095 Butler from which the datasets are to be transferred. 

2096 source_refs : iterable of `DatasetRef` 

2097 Datasets defined in the source butler that should be transferred to 

2098 this butler. 

2099 transfer : `str`, optional 

2100 Transfer mode passed to `~lsst.daf.butler.Datastore.transfer_from`. 

2101 id_gen_map : `dict` of [`str`, `DatasetIdGenEnum`], optional 

2102 A mapping of dataset type to ID generation mode. Only used if 

2103 the source butler is using integer IDs. Should not be used 

2104 if this receiving butler uses integer IDs. Without this dataset 

2105 import always uses unique. 

2106 skip_missing : `bool` 

2107 If `True`, datasets with no datastore artifact associated with 

2108 them are not transferred. If `False` a registry entry will be 

2109 created even if no datastore record is created (and so will 

2110 look equivalent to the dataset being unstored). 

2111 register_dataset_types : `bool` 

2112 If `True` any missing dataset types are registered. Otherwise 

2113 an exception is raised. 

2114 

2115 Returns 

2116 ------- 

2117 refs : `list` of `DatasetRef` 

2118 The refs added to this Butler. 

2119 

2120 Notes 

2121 ----- 

2122 Requires that any dimension definitions are already present in the 

2123 receiving Butler. The datastore artifact has to exist for a transfer 

2124 to be made but non-existence is not an error. 

2125 

2126 Datasets that already exist in this run will be skipped. 

2127 

2128 The datasets are imported as part of a transaction, although 

2129 dataset types are registered before the transaction is started. 

2130 This means that it is possible for a dataset type to be registered 

2131 even though transfer has failed. 

2132 """ 

2133 if not self.isWriteable(): 

2134 raise TypeError("Butler is read-only.") 

2135 progress = Progress("lsst.daf.butler.Butler.transfer_from", level=VERBOSE) 

2136 

2137 # Will iterate through the refs multiple times so need to convert 

2138 # to a list if this isn't a collection. 

2139 if not isinstance(source_refs, collections.abc.Collection): 

2140 source_refs = list(source_refs) 

2141 

2142 original_count = len(source_refs) 

2143 log.info("Transferring %d datasets into %s", original_count, str(self)) 

2144 

2145 if id_gen_map is None: 

2146 id_gen_map = {} 

2147 

2148 # In some situations the datastore artifact may be missing 

2149 # and we do not want that registry entry to be imported. 

2150 # Asking datastore is not sufficient, the records may have been 

2151 # purged, we have to ask for the (predicted) URI and check 

2152 # existence explicitly. Execution butler is set up exactly like 

2153 # this with no datastore records. 

2154 artifact_existence: Dict[ResourcePath, bool] = {} 

2155 if skip_missing: 

2156 dataset_existence = source_butler.datastore.mexists( 

2157 source_refs, artifact_existence=artifact_existence 

2158 ) 

2159 source_refs = [ref for ref, exists in dataset_existence.items() if exists] 

2160 filtered_count = len(source_refs) 

2161 log.verbose( 

2162 "%d datasets removed because the artifact does not exist. Now have %d.", 

2163 original_count - filtered_count, 

2164 filtered_count, 

2165 ) 

2166 

2167 # Importing requires that we group the refs by dataset type and run 

2168 # before doing the import. 

2169 source_dataset_types = set() 

2170 grouped_refs = defaultdict(list) 

2171 grouped_indices = defaultdict(list) 

2172 for i, ref in enumerate(source_refs): 

2173 grouped_refs[ref.datasetType, ref.run].append(ref) 

2174 grouped_indices[ref.datasetType, ref.run].append(i) 

2175 source_dataset_types.add(ref.datasetType) 

2176 

2177 # Check to see if the dataset type in the source butler has 

2178 # the same definition in the target butler and register missing 

2179 # ones if requested. Registration must happen outside a transaction. 

2180 newly_registered_dataset_types = set() 

2181 for datasetType in source_dataset_types: 

2182 if register_dataset_types: 

2183 # Let this raise immediately if inconsistent. Continuing 

2184 # on to find additional inconsistent dataset types 

2185 # might result in additional unwanted dataset types being 

2186 # registered. 

2187 if self.registry.registerDatasetType(datasetType): 

2188 newly_registered_dataset_types.add(datasetType) 

2189 else: 

2190 # If the dataset type is missing, let it fail immediately. 

2191 target_dataset_type = self.registry.getDatasetType(datasetType.name) 

2192 if target_dataset_type != datasetType: 

2193 raise ConflictingDefinitionError( 

2194 "Source butler dataset type differs from definition" 

2195 f" in target butler: {datasetType} !=" 

2196 f" {target_dataset_type}" 

2197 ) 

2198 if newly_registered_dataset_types: 

2199 # We may have registered some even if there were inconsistencies 

2200 # but should let people know (or else remove them again). 

2201 log.log( 

2202 VERBOSE, 

2203 "Registered the following dataset types in the target Butler: %s", 

2204 ", ".join(d.name for d in newly_registered_dataset_types), 

2205 ) 

2206 else: 

2207 log.log(VERBOSE, "All required dataset types are known to the target Butler") 

2208 

2209 # The returned refs should be identical for UUIDs. 

2210 # For now must also support integers and so need to retain the 

2211 # newly-created refs from this registry. 

2212 # Pre-size it so we can assign refs into the correct slots 

2213 transferred_refs_tmp: List[Optional[DatasetRef]] = [None] * len(source_refs) 

2214 default_id_gen = DatasetIdGenEnum.UNIQUE 

2215 

2216 handled_collections: Set[str] = set() 

2217 

2218 # Do all the importing in a single transaction. 

2219 with self.transaction(): 

2220 for (datasetType, run), refs_to_import in progress.iter_item_chunks( 

2221 grouped_refs.items(), desc="Importing to registry by run and dataset type" 

2222 ): 

2223 if run not in handled_collections: 

2224 run_doc = source_butler.registry.getCollectionDocumentation(run) 

2225 registered = self.registry.registerRun(run, doc=run_doc) 

2226 handled_collections.add(run) 

2227 if registered: 

2228 log.log(VERBOSE, "Creating output run %s", run) 

2229 

2230 id_generation_mode = default_id_gen 

2231 if isinstance(refs_to_import[0].id, int): 

2232 # ID generation mode might need to be overridden when 

2233 # targetting UUID 

2234 id_generation_mode = id_gen_map.get(datasetType.name, default_id_gen) 

2235 

2236 n_refs = len(refs_to_import) 

2237 log.verbose( 

2238 "Importing %d ref%s of dataset type %s into run %s", 

2239 n_refs, 

2240 "" if n_refs == 1 else "s", 

2241 datasetType.name, 

2242 run, 

2243 ) 

2244 

2245 # No way to know if this butler's registry uses UUID. 

2246 # We have to trust the caller on this. If it fails they will 

2247 # have to change their approach. We can't catch the exception 

2248 # and retry with unique because that will mess up the 

2249 # transaction handling. We aren't allowed to ask the registry 

2250 # manager what type of ID it is using. 

2251 imported_refs = self.registry._importDatasets( 

2252 refs_to_import, idGenerationMode=id_generation_mode, expand=False 

2253 ) 

2254 

2255 # Map them into the correct slots to match the initial order 

2256 for i, ref in zip(grouped_indices[datasetType, run], imported_refs): 

2257 transferred_refs_tmp[i] = ref 

2258 

2259 # Mypy insists that we might have None in here so we have to make 

2260 # that explicit by assigning to a new variable and filtering out 

2261 # something that won't be there. 

2262 transferred_refs = [ref for ref in transferred_refs_tmp if ref is not None] 

2263 

2264 # Check consistency 

2265 assert len(source_refs) == len(transferred_refs), "Different number of refs imported than given" 

2266 

2267 log.verbose("Imported %d datasets into destination butler", len(transferred_refs)) 

2268 

2269 # The transferred refs need to be reordered to match the original 

2270 # ordering given by the caller. Without this the datastore transfer 

2271 # will be broken. 

2272 

2273 # Ask the datastore to transfer. The datastore has to check that 

2274 # the source datastore is compatible with the target datastore. 

2275 self.datastore.transfer_from( 

2276 source_butler.datastore, 

2277 source_refs, 

2278 local_refs=transferred_refs, 

2279 transfer=transfer, 

2280 artifact_existence=artifact_existence, 

2281 ) 

2282 

2283 return transferred_refs 

2284 

2285 def validateConfiguration( 

2286 self, 

2287 logFailures: bool = False, 

2288 datasetTypeNames: Optional[Iterable[str]] = None, 

2289 ignore: Iterable[str] = None, 

2290 ) -> None: 

2291 """Validate butler configuration. 

2292 

2293 Checks that each `DatasetType` can be stored in the `Datastore`. 

2294 

2295 Parameters 

2296 ---------- 

2297 logFailures : `bool`, optional 

2298 If `True`, output a log message for every validation error 

2299 detected. 

2300 datasetTypeNames : iterable of `str`, optional 

2301 The `DatasetType` names that should be checked. This allows 

2302 only a subset to be selected. 

2303 ignore : iterable of `str`, optional 

2304 Names of DatasetTypes to skip over. This can be used to skip 

2305 known problems. If a named `DatasetType` corresponds to a 

2306 composite, all components of that `DatasetType` will also be 

2307 ignored. 

2308 

2309 Raises 

2310 ------ 

2311 ButlerValidationError 

2312 Raised if there is some inconsistency with how this Butler 

2313 is configured. 

2314 """ 

2315 if datasetTypeNames: 

2316 datasetTypes = [self.registry.getDatasetType(name) for name in datasetTypeNames] 

2317 else: 

2318 datasetTypes = list(self.registry.queryDatasetTypes()) 

2319 

2320 # filter out anything from the ignore list 

2321 if ignore: 

2322 ignore = set(ignore) 

2323 datasetTypes = [ 

2324 e for e in datasetTypes if e.name not in ignore and e.nameAndComponent()[0] not in ignore 

2325 ] 

2326 else: 

2327 ignore = set() 

2328 

2329 # Find all the registered instruments 

2330 instruments = set(record.name for record in self.registry.queryDimensionRecords("instrument")) 

2331 

2332 # For each datasetType that has an instrument dimension, create 

2333 # a DatasetRef for each defined instrument 

2334 datasetRefs = [] 

2335 

2336 for datasetType in datasetTypes: 

2337 if "instrument" in datasetType.dimensions: 

2338 for instrument in instruments: 

2339 datasetRef = DatasetRef( 

2340 datasetType, {"instrument": instrument}, conform=False # type: ignore 

2341 ) 

2342 datasetRefs.append(datasetRef) 

2343 

2344 entities: List[Union[DatasetType, DatasetRef]] = [] 

2345 entities.extend(datasetTypes) 

2346 entities.extend(datasetRefs) 

2347 

2348 datastoreErrorStr = None 

2349 try: 

2350 self.datastore.validateConfiguration(entities, logFailures=logFailures) 

2351 except ValidationError as e: 

2352 datastoreErrorStr = str(e) 

2353 

2354 # Also check that the LookupKeys used by the datastores match 

2355 # registry and storage class definitions 

2356 keys = self.datastore.getLookupKeys() 

2357 

2358 failedNames = set() 

2359 failedDataId = set() 

2360 for key in keys: 

2361 if key.name is not None: 

2362 if key.name in ignore: 

2363 continue 

2364 

2365 # skip if specific datasetType names were requested and this 

2366 # name does not match 

2367 if datasetTypeNames and key.name not in datasetTypeNames: 

2368 continue 

2369 

2370 # See if it is a StorageClass or a DatasetType 

2371 if key.name in self.storageClasses: 

2372 pass 

2373 else: 

2374 try: 

2375 self.registry.getDatasetType(key.name) 

2376 except KeyError: 

2377 if logFailures: 

2378 log.critical("Key '%s' does not correspond to a DatasetType or StorageClass", key) 

2379 failedNames.add(key) 

2380 else: 

2381 # Dimensions are checked for consistency when the Butler 

2382 # is created and rendezvoused with a universe. 

2383 pass 

2384 

2385 # Check that the instrument is a valid instrument 

2386 # Currently only support instrument so check for that 

2387 if key.dataId: 

2388 dataIdKeys = set(key.dataId) 

2389 if set(["instrument"]) != dataIdKeys: 

2390 if logFailures: 

2391 log.critical("Key '%s' has unsupported DataId override", key) 

2392 failedDataId.add(key) 

2393 elif key.dataId["instrument"] not in instruments: 

2394 if logFailures: 

2395 log.critical("Key '%s' has unknown instrument", key) 

2396 failedDataId.add(key) 

2397 

2398 messages = [] 

2399 

2400 if datastoreErrorStr: 

2401 messages.append(datastoreErrorStr) 

2402 

2403 for failed, msg in ( 

2404 (failedNames, "Keys without corresponding DatasetType or StorageClass entry: "), 

2405 (failedDataId, "Keys with bad DataId entries: "), 

2406 ): 

2407 if failed: 

2408 msg += ", ".join(str(k) for k in failed) 

2409 messages.append(msg) 

2410 

2411 if messages: 

2412 raise ValidationError(";\n".join(messages)) 

2413 

2414 @property 

2415 def collections(self) -> CollectionSearch: 

2416 """The collections to search by default, in order (`CollectionSearch`). 

2417 

2418 This is an alias for ``self.registry.defaults.collections``. It cannot 

2419 be set directly in isolation, but all defaults may be changed together 

2420 by assigning a new `RegistryDefaults` instance to 

2421 ``self.registry.defaults``. 

2422 """ 

2423 return self.registry.defaults.collections 

2424 

2425 @property 

2426 def run(self) -> Optional[str]: 

2427 """Name of the run this butler writes outputs to by default (`str` or 

2428 `None`). 

2429 

2430 This is an alias for ``self.registry.defaults.run``. It cannot be set 

2431 directly in isolation, but all defaults may be changed together by 

2432 assigning a new `RegistryDefaults` instance to 

2433 ``self.registry.defaults``. 

2434 """ 

2435 return self.registry.defaults.run 

2436 

2437 @property 

2438 def dimensions(self) -> DimensionUniverse: 

2439 # Docstring inherited. 

2440 return self.registry.dimensions 

2441 

2442 registry: Registry 

2443 """The object that manages dataset metadata and relationships (`Registry`). 

2444 

2445 Most operations that don't involve reading or writing butler datasets are 

2446 accessible only via `Registry` methods. 

2447 """ 

2448 

2449 datastore: Datastore 

2450 """The object that manages actual dataset storage (`Datastore`). 

2451 

2452 Direct user access to the datastore should rarely be necessary; the primary 

2453 exception is the case where a `Datastore` implementation provides extra 

2454 functionality beyond what the base class defines. 

2455 """ 

2456 

2457 storageClasses: StorageClassFactory 

2458 """An object that maps known storage class names to objects that fully 

2459 describe them (`StorageClassFactory`). 

2460 """ 

2461 

2462 _allow_put_of_predefined_dataset: bool 

2463 """Allow a put to succeed even if there is already a registry entry for it 

2464 but not a datastore record. (`bool`)."""