Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22""" 

23Butler top level classes. 

24""" 

25from __future__ import annotations 

26 

27__all__ = ( 

28 "Butler", 

29 "ButlerValidationError", 

30 "PruneCollectionsArgsError", 

31 "PurgeWithoutUnstorePruneCollectionsError", 

32 "RunWithoutPurgePruneCollectionsError", 

33 "PurgeUnsupportedPruneCollectionsError", 

34) 

35 

36 

37from collections import defaultdict 

38import contextlib 

39import logging 

40import numbers 

41import os 

42from typing import ( 

43 Any, 

44 ClassVar, 

45 Counter, 

46 Dict, 

47 Iterable, 

48 Iterator, 

49 List, 

50 MutableMapping, 

51 Optional, 

52 Set, 

53 TextIO, 

54 Tuple, 

55 Type, 

56 Union, 

57) 

58 

59try: 

60 import boto3 

61except ImportError: 

62 boto3 = None 

63 

64from lsst.utils import doImport 

65from .core import ( 

66 AmbiguousDatasetError, 

67 ButlerURI, 

68 Config, 

69 ConfigSubset, 

70 DataCoordinate, 

71 DataId, 

72 DataIdValue, 

73 DatasetRef, 

74 DatasetType, 

75 Datastore, 

76 Dimension, 

77 DimensionConfig, 

78 FileDataset, 

79 Progress, 

80 StorageClassFactory, 

81 Timespan, 

82 ValidationError, 

83) 

84from .core.repoRelocation import BUTLER_ROOT_TAG 

85from .core.utils import transactional, getClassOf 

86from ._deferredDatasetHandle import DeferredDatasetHandle 

87from ._butlerConfig import ButlerConfig 

88from .registry import Registry, RegistryConfig, RegistryDefaults, CollectionType, ConflictingDefinitionError 

89from .registry.wildcards import CollectionSearch 

90from .transfers import RepoExportContext 

91 

92log = logging.getLogger(__name__) 

93 

94 

95class ButlerValidationError(ValidationError): 

96 """There is a problem with the Butler configuration.""" 

97 pass 

98 

99 

100class PruneCollectionsArgsError(TypeError): 

101 """Base class for errors relating to Butler.pruneCollections input 

102 arguments. 

103 """ 

104 pass 

105 

106 

107class PurgeWithoutUnstorePruneCollectionsError(PruneCollectionsArgsError): 

108 """Raised when purge and unstore are both required to be True, and 

109 purge is True but unstore is False. 

110 """ 

111 

112 def __init__(self) -> None: 

113 super().__init__("Cannot pass purge=True without unstore=True.") 

114 

115 

116class RunWithoutPurgePruneCollectionsError(PruneCollectionsArgsError): 

117 """Raised when pruning a RUN collection but purge is False.""" 

118 

119 def __init__(self, collectionType: CollectionType): 

120 self.collectionType = collectionType 

121 super().__init__(f"Cannot prune RUN collection {self.collectionType.name} without purge=True.") 

122 

123 

124class PurgeUnsupportedPruneCollectionsError(PruneCollectionsArgsError): 

125 """Raised when purge is True but is not supported for the given 

126 collection.""" 

127 

128 def __init__(self, collectionType: CollectionType): 

129 self.collectionType = collectionType 

130 super().__init__( 

131 f"Cannot prune {self.collectionType} collection {self.collectionType.name} with purge=True.") 

132 

133 

134class Butler: 

135 """Main entry point for the data access system. 

136 

137 Parameters 

138 ---------- 

139 config : `ButlerConfig`, `Config` or `str`, optional. 

140 Configuration. Anything acceptable to the 

141 `ButlerConfig` constructor. If a directory path 

142 is given the configuration will be read from a ``butler.yaml`` file in 

143 that location. If `None` is given default values will be used. 

144 butler : `Butler`, optional. 

145 If provided, construct a new Butler that uses the same registry and 

146 datastore as the given one, but with the given collection and run. 

147 Incompatible with the ``config``, ``searchPaths``, and ``writeable`` 

148 arguments. 

149 collections : `str` or `Iterable` [ `str` ], optional 

150 An expression specifying the collections to be searched (in order) when 

151 reading datasets. 

152 This may be a `str` collection name or an iterable thereof. 

153 See :ref:`daf_butler_collection_expressions` for more information. 

154 These collections are not registered automatically and must be 

155 manually registered before they are used by any method, but they may be 

156 manually registered after the `Butler` is initialized. 

157 run : `str`, optional 

158 Name of the `~CollectionType.RUN` collection new datasets should be 

159 inserted into. If ``collections`` is `None` and ``run`` is not `None`, 

160 ``collections`` will be set to ``[run]``. If not `None`, this 

161 collection will automatically be registered. If this is not set (and 

162 ``writeable`` is not set either), a read-only butler will be created. 

163 searchPaths : `list` of `str`, optional 

164 Directory paths to search when calculating the full Butler 

165 configuration. Not used if the supplied config is already a 

166 `ButlerConfig`. 

167 writeable : `bool`, optional 

168 Explicitly sets whether the butler supports write operations. If not 

169 provided, a read-write butler is created if any of ``run``, ``tags``, 

170 or ``chains`` is non-empty. 

171 inferDefaults : `bool`, optional 

172 If `True` (default) infer default data ID values from the values 

173 present in the datasets in ``collections``: if all collections have the 

174 same value (or no value) for a governor dimension, that value will be 

175 the default for that dimension. Nonexistent collections are ignored. 

176 If a default value is provided explicitly for a governor dimension via 

177 ``**kwargs``, no default will be inferred for that dimension. 

178 **kwargs : `str` 

179 Default data ID key-value pairs. These may only identify "governor" 

180 dimensions like ``instrument`` and ``skymap``. 

181 

182 Examples 

183 -------- 

184 While there are many ways to control exactly how a `Butler` interacts with 

185 the collections in its `Registry`, the most common cases are still simple. 

186 

187 For a read-only `Butler` that searches one collection, do:: 

188 

189 butler = Butler("/path/to/repo", collections=["u/alice/DM-50000"]) 

190 

191 For a read-write `Butler` that writes to and reads from a 

192 `~CollectionType.RUN` collection:: 

193 

194 butler = Butler("/path/to/repo", run="u/alice/DM-50000/a") 

195 

196 The `Butler` passed to a ``PipelineTask`` is often much more complex, 

197 because we want to write to one `~CollectionType.RUN` collection but read 

198 from several others (as well):: 

199 

200 butler = Butler("/path/to/repo", run="u/alice/DM-50000/a", 

201 collections=["u/alice/DM-50000/a", 

202 "u/bob/DM-49998", 

203 "HSC/defaults"]) 

204 

205 This butler will `put` new datasets to the run ``u/alice/DM-50000/a``. 

206 Datasets will be read first from that run (since it appears first in the 

207 chain), and then from ``u/bob/DM-49998`` and finally ``HSC/defaults``. 

208 

209 Finally, one can always create a `Butler` with no collections:: 

210 

211 butler = Butler("/path/to/repo", writeable=True) 

212 

213 This can be extremely useful when you just want to use ``butler.registry``, 

214 e.g. for inserting dimension data or managing collections, or when the 

215 collections you want to use with the butler are not consistent. 

216 Passing ``writeable`` explicitly here is only necessary if you want to be 

217 able to make changes to the repo - usually the value for ``writeable`` can 

218 be guessed from the collection arguments provided, but it defaults to 

219 `False` when there are not collection arguments. 

220 """ 

221 def __init__(self, config: Union[Config, str, None] = None, *, 

222 butler: Optional[Butler] = None, 

223 collections: Any = None, 

224 run: Optional[str] = None, 

225 searchPaths: Optional[List[str]] = None, 

226 writeable: Optional[bool] = None, 

227 inferDefaults: bool = True, 

228 **kwargs: str, 

229 ): 

230 defaults = RegistryDefaults(collections=collections, run=run, infer=inferDefaults, **kwargs) 

231 # Load registry, datastore, etc. from config or existing butler. 

232 if butler is not None: 

233 if config is not None or searchPaths is not None or writeable is not None: 

234 raise TypeError("Cannot pass 'config', 'searchPaths', or 'writeable' " 

235 "arguments with 'butler' argument.") 

236 self.registry = butler.registry.copy(defaults) 

237 self.datastore = butler.datastore 

238 self.storageClasses = butler.storageClasses 

239 self._config: ButlerConfig = butler._config 

240 else: 

241 self._config = ButlerConfig(config, searchPaths=searchPaths) 

242 if "root" in self._config: 

243 butlerRoot = self._config["root"] 

244 else: 

245 butlerRoot = self._config.configDir 

246 if writeable is None: 

247 writeable = run is not None 

248 self.registry = Registry.fromConfig(self._config, butlerRoot=butlerRoot, writeable=writeable, 

249 defaults=defaults) 

250 self.datastore = Datastore.fromConfig(self._config, self.registry.getDatastoreBridgeManager(), 

251 butlerRoot=butlerRoot) 

252 self.storageClasses = StorageClassFactory() 

253 self.storageClasses.addFromConfig(self._config) 

254 if "run" in self._config or "collection" in self._config: 

255 raise ValueError("Passing a run or collection via configuration is no longer supported.") 

256 

257 GENERATION: ClassVar[int] = 3 

258 """This is a Generation 3 Butler. 

259 

260 This attribute may be removed in the future, once the Generation 2 Butler 

261 interface has been fully retired; it should only be used in transitional 

262 code. 

263 """ 

264 

265 @staticmethod 

266 def makeRepo(root: str, config: Union[Config, str, None] = None, 

267 dimensionConfig: Union[Config, str, None] = None, standalone: bool = False, 

268 searchPaths: Optional[List[str]] = None, forceConfigRoot: bool = True, 

269 outfile: Optional[str] = None, overwrite: bool = False) -> Config: 

270 """Create an empty data repository by adding a butler.yaml config 

271 to a repository root directory. 

272 

273 Parameters 

274 ---------- 

275 root : `str` or `ButlerURI` 

276 Path or URI to the root location of the new repository. Will be 

277 created if it does not exist. 

278 config : `Config` or `str`, optional 

279 Configuration to write to the repository, after setting any 

280 root-dependent Registry or Datastore config options. Can not 

281 be a `ButlerConfig` or a `ConfigSubset`. If `None`, default 

282 configuration will be used. Root-dependent config options 

283 specified in this config are overwritten if ``forceConfigRoot`` 

284 is `True`. 

285 dimensionConfig : `Config` or `str`, optional 

286 Configuration for dimensions, will be used to initialize registry 

287 database. 

288 standalone : `bool` 

289 If True, write all expanded defaults, not just customized or 

290 repository-specific settings. 

291 This (mostly) decouples the repository from the default 

292 configuration, insulating it from changes to the defaults (which 

293 may be good or bad, depending on the nature of the changes). 

294 Future *additions* to the defaults will still be picked up when 

295 initializing `Butlers` to repos created with ``standalone=True``. 

296 searchPaths : `list` of `str`, optional 

297 Directory paths to search when calculating the full butler 

298 configuration. 

299 forceConfigRoot : `bool`, optional 

300 If `False`, any values present in the supplied ``config`` that 

301 would normally be reset are not overridden and will appear 

302 directly in the output config. This allows non-standard overrides 

303 of the root directory for a datastore or registry to be given. 

304 If this parameter is `True` the values for ``root`` will be 

305 forced into the resulting config if appropriate. 

306 outfile : `str`, optional 

307 If not-`None`, the output configuration will be written to this 

308 location rather than into the repository itself. Can be a URI 

309 string. Can refer to a directory that will be used to write 

310 ``butler.yaml``. 

311 overwrite : `bool`, optional 

312 Create a new configuration file even if one already exists 

313 in the specified output location. Default is to raise 

314 an exception. 

315 

316 Returns 

317 ------- 

318 config : `Config` 

319 The updated `Config` instance written to the repo. 

320 

321 Raises 

322 ------ 

323 ValueError 

324 Raised if a ButlerConfig or ConfigSubset is passed instead of a 

325 regular Config (as these subclasses would make it impossible to 

326 support ``standalone=False``). 

327 FileExistsError 

328 Raised if the output config file already exists. 

329 os.error 

330 Raised if the directory does not exist, exists but is not a 

331 directory, or cannot be created. 

332 

333 Notes 

334 ----- 

335 Note that when ``standalone=False`` (the default), the configuration 

336 search path (see `ConfigSubset.defaultSearchPaths`) that was used to 

337 construct the repository should also be used to construct any Butlers 

338 to avoid configuration inconsistencies. 

339 """ 

340 if isinstance(config, (ButlerConfig, ConfigSubset)): 

341 raise ValueError("makeRepo must be passed a regular Config without defaults applied.") 

342 

343 # Ensure that the root of the repository exists or can be made 

344 uri = ButlerURI(root, forceDirectory=True) 

345 uri.mkdir() 

346 

347 config = Config(config) 

348 

349 # If we are creating a new repo from scratch with relative roots, 

350 # do not propagate an explicit root from the config file 

351 if "root" in config: 

352 del config["root"] 

353 

354 full = ButlerConfig(config, searchPaths=searchPaths) # this applies defaults 

355 datastoreClass: Type[Datastore] = doImport(full["datastore", "cls"]) 

356 datastoreClass.setConfigRoot(BUTLER_ROOT_TAG, config, full, overwrite=forceConfigRoot) 

357 

358 # if key exists in given config, parse it, otherwise parse the defaults 

359 # in the expanded config 

360 if config.get(("registry", "db")): 

361 registryConfig = RegistryConfig(config) 

362 else: 

363 registryConfig = RegistryConfig(full) 

364 defaultDatabaseUri = registryConfig.makeDefaultDatabaseUri(BUTLER_ROOT_TAG) 

365 if defaultDatabaseUri is not None: 

366 Config.updateParameters(RegistryConfig, config, full, 

367 toUpdate={"db": defaultDatabaseUri}, 

368 overwrite=forceConfigRoot) 

369 else: 

370 Config.updateParameters(RegistryConfig, config, full, toCopy=("db",), 

371 overwrite=forceConfigRoot) 

372 

373 if standalone: 

374 config.merge(full) 

375 else: 

376 # Always expand the registry.managers section into the per-repo 

377 # config, because after the database schema is created, it's not 

378 # allowed to change anymore. Note that in the standalone=True 

379 # branch, _everything_ in the config is expanded, so there's no 

380 # need to special case this. 

381 Config.updateParameters(RegistryConfig, config, full, toCopy=("managers",), overwrite=False) 

382 configURI: Union[str, ButlerURI] 

383 if outfile is not None: 

384 # When writing to a separate location we must include 

385 # the root of the butler repo in the config else it won't know 

386 # where to look. 

387 config["root"] = uri.geturl() 

388 configURI = outfile 

389 else: 

390 configURI = uri 

391 config.dumpToUri(configURI, overwrite=overwrite) 

392 

393 # Create Registry and populate tables 

394 registryConfig = RegistryConfig(config.get("registry")) 

395 dimensionConfig = DimensionConfig(dimensionConfig) 

396 Registry.createFromConfig(registryConfig, dimensionConfig=dimensionConfig, butlerRoot=root) 

397 

398 return config 

399 

400 @classmethod 

401 def _unpickle(cls, config: ButlerConfig, collections: Optional[CollectionSearch], run: Optional[str], 

402 defaultDataId: Dict[str, str], writeable: bool) -> Butler: 

403 """Callable used to unpickle a Butler. 

404 

405 We prefer not to use ``Butler.__init__`` directly so we can force some 

406 of its many arguments to be keyword-only (note that ``__reduce__`` 

407 can only invoke callables with positional arguments). 

408 

409 Parameters 

410 ---------- 

411 config : `ButlerConfig` 

412 Butler configuration, already coerced into a true `ButlerConfig` 

413 instance (and hence after any search paths for overrides have been 

414 utilized). 

415 collections : `CollectionSearch` 

416 Names of the default collections to read from. 

417 run : `str`, optional 

418 Name of the default `~CollectionType.RUN` collection to write to. 

419 defaultDataId : `dict` [ `str`, `str` ] 

420 Default data ID values. 

421 writeable : `bool` 

422 Whether the Butler should support write operations. 

423 

424 Returns 

425 ------- 

426 butler : `Butler` 

427 A new `Butler` instance. 

428 """ 

429 # MyPy doesn't recognize that the kwargs below are totally valid; it 

430 # seems to think '**defaultDataId* is a _positional_ argument! 

431 return cls(config=config, collections=collections, run=run, writeable=writeable, 

432 **defaultDataId) # type: ignore 

433 

434 def __reduce__(self) -> tuple: 

435 """Support pickling. 

436 """ 

437 return (Butler._unpickle, (self._config, self.collections, self.run, 

438 self.registry.defaults.dataId.byName(), 

439 self.registry.isWriteable())) 

440 

441 def __str__(self) -> str: 

442 return "Butler(collections={}, run={}, datastore='{}', registry='{}')".format( 

443 self.collections, self.run, self.datastore, self.registry) 

444 

445 def isWriteable(self) -> bool: 

446 """Return `True` if this `Butler` supports write operations. 

447 """ 

448 return self.registry.isWriteable() 

449 

450 @contextlib.contextmanager 

451 def transaction(self) -> Iterator[None]: 

452 """Context manager supporting `Butler` transactions. 

453 

454 Transactions can be nested. 

455 """ 

456 with self.registry.transaction(): 

457 with self.datastore.transaction(): 

458 yield 

459 

460 def _standardizeArgs(self, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

461 dataId: Optional[DataId] = None, **kwds: Any 

462 ) -> Tuple[DatasetType, Optional[DataId]]: 

463 """Standardize the arguments passed to several Butler APIs. 

464 

465 Parameters 

466 ---------- 

467 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

468 When `DatasetRef` the `dataId` should be `None`. 

469 Otherwise the `DatasetType` or name thereof. 

470 dataId : `dict` or `DataCoordinate` 

471 A `dict` of `Dimension` link name, value pairs that label the 

472 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

473 should be provided as the second argument. 

474 kwds 

475 Additional keyword arguments used to augment or construct a 

476 `DataCoordinate`. See `DataCoordinate.standardize` 

477 parameters. 

478 

479 Returns 

480 ------- 

481 datasetType : `DatasetType` 

482 A `DatasetType` instance extracted from ``datasetRefOrType``. 

483 dataId : `dict` or `DataId`, optional 

484 Argument that can be used (along with ``kwds``) to construct a 

485 `DataId`. 

486 

487 Notes 

488 ----- 

489 Butler APIs that conceptually need a DatasetRef also allow passing a 

490 `DatasetType` (or the name of one) and a `DataId` (or a dict and 

491 keyword arguments that can be used to construct one) separately. This 

492 method accepts those arguments and always returns a true `DatasetType` 

493 and a `DataId` or `dict`. 

494 

495 Standardization of `dict` vs `DataId` is best handled by passing the 

496 returned ``dataId`` (and ``kwds``) to `Registry` APIs, which are 

497 generally similarly flexible. 

498 """ 

499 externalDatasetType: Optional[DatasetType] = None 

500 internalDatasetType: Optional[DatasetType] = None 

501 if isinstance(datasetRefOrType, DatasetRef): 

502 if dataId is not None or kwds: 

503 raise ValueError("DatasetRef given, cannot use dataId as well") 

504 externalDatasetType = datasetRefOrType.datasetType 

505 dataId = datasetRefOrType.dataId 

506 else: 

507 # Don't check whether DataId is provided, because Registry APIs 

508 # can usually construct a better error message when it wasn't. 

509 if isinstance(datasetRefOrType, DatasetType): 

510 externalDatasetType = datasetRefOrType 

511 else: 

512 internalDatasetType = self.registry.getDatasetType(datasetRefOrType) 

513 

514 # Check that they are self-consistent 

515 if externalDatasetType is not None: 

516 internalDatasetType = self.registry.getDatasetType(externalDatasetType.name) 

517 if externalDatasetType != internalDatasetType: 

518 raise ValueError(f"Supplied dataset type ({externalDatasetType}) inconsistent with " 

519 f"registry definition ({internalDatasetType})") 

520 

521 assert internalDatasetType is not None 

522 return internalDatasetType, dataId 

523 

524 def _findDatasetRef(self, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

525 dataId: Optional[DataId] = None, *, 

526 collections: Any = None, 

527 allowUnresolved: bool = False, 

528 **kwds: Any) -> DatasetRef: 

529 """Shared logic for methods that start with a search for a dataset in 

530 the registry. 

531 

532 Parameters 

533 ---------- 

534 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

535 When `DatasetRef` the `dataId` should be `None`. 

536 Otherwise the `DatasetType` or name thereof. 

537 dataId : `dict` or `DataCoordinate`, optional 

538 A `dict` of `Dimension` link name, value pairs that label the 

539 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

540 should be provided as the first argument. 

541 collections : Any, optional 

542 Collections to be searched, overriding ``self.collections``. 

543 Can be any of the types supported by the ``collections`` argument 

544 to butler construction. 

545 allowUnresolved : `bool`, optional 

546 If `True`, return an unresolved `DatasetRef` if finding a resolved 

547 one in the `Registry` fails. Defaults to `False`. 

548 kwds 

549 Additional keyword arguments used to augment or construct a 

550 `DataId`. See `DataId` parameters. 

551 

552 Returns 

553 ------- 

554 ref : `DatasetRef` 

555 A reference to the dataset identified by the given arguments. 

556 

557 Raises 

558 ------ 

559 LookupError 

560 Raised if no matching dataset exists in the `Registry` (and 

561 ``allowUnresolved is False``). 

562 ValueError 

563 Raised if a resolved `DatasetRef` was passed as an input, but it 

564 differs from the one found in the registry. 

565 TypeError 

566 Raised if no collections were provided. 

567 """ 

568 datasetType, dataId = self._standardizeArgs(datasetRefOrType, dataId, **kwds) 

569 if isinstance(datasetRefOrType, DatasetRef): 

570 idNumber = datasetRefOrType.id 

571 else: 

572 idNumber = None 

573 timespan: Optional[Timespan] = None 

574 

575 # Process dimension records that are using record information 

576 # rather than ids 

577 newDataId: Dict[str, DataIdValue] = {} 

578 byRecord: Dict[str, Dict[str, Any]] = defaultdict(dict) 

579 

580 # if all the dataId comes from keyword parameters we do not need 

581 # to do anything here because they can't be of the form 

582 # exposure.obs_id because a "." is not allowed in a keyword parameter. 

583 if dataId: 

584 for k, v in dataId.items(): 

585 # If we have a Dimension we do not need to do anything 

586 # because it cannot be a compound key. 

587 if isinstance(k, str) and "." in k: 

588 # Someone is using a more human-readable dataId 

589 dimensionName, record = k.split(".", 1) 

590 byRecord[dimensionName][record] = v 

591 elif isinstance(k, Dimension): 

592 newDataId[k.name] = v 

593 else: 

594 newDataId[k] = v 

595 

596 # Go through the updated dataId and check the type in case someone is 

597 # using an alternate key. We have already filtered out the compound 

598 # keys dimensions.record format. 

599 not_dimensions = {} 

600 

601 # Will need to look in the dataId and the keyword arguments 

602 # and will remove them if they need to be fixed or are unrecognized. 

603 for dataIdDict in (newDataId, kwds): 

604 # Use a list so we can adjust the dict safely in the loop 

605 for dimensionName in list(dataIdDict): 

606 value = dataIdDict[dimensionName] 

607 try: 

608 dimension = self.registry.dimensions.getStaticDimensions()[dimensionName] 

609 except KeyError: 

610 # This is not a real dimension 

611 not_dimensions[dimensionName] = value 

612 del dataIdDict[dimensionName] 

613 continue 

614 

615 # Convert an integral type to an explicit int to simplify 

616 # comparisons here 

617 if isinstance(value, numbers.Integral): 

618 value = int(value) 

619 

620 if not isinstance(value, dimension.primaryKey.getPythonType()): 

621 for alternate in dimension.alternateKeys: 

622 if isinstance(value, alternate.getPythonType()): 

623 byRecord[dimensionName][alternate.name] = value 

624 del dataIdDict[dimensionName] 

625 log.debug("Converting dimension %s to %s.%s=%s", 

626 dimensionName, dimensionName, alternate.name, value) 

627 break 

628 else: 

629 log.warning("Type mismatch found for value '%r' provided for dimension %s. " 

630 "Could not find matching alternative (primary key has type %s) " 

631 "so attempting to use as-is.", 

632 value, dimensionName, dimension.primaryKey.getPythonType()) 

633 

634 # If we have some unrecognized dimensions we have to try to connect 

635 # them to records in other dimensions. This is made more complicated 

636 # by some dimensions having records with clashing names. A mitigation 

637 # is that we can tell by this point which dimensions are missing 

638 # for the DatasetType but this does not work for calibrations 

639 # where additional dimensions can be used to constrain the temporal 

640 # axis. 

641 if not_dimensions: 

642 # Calculate missing dimensions 

643 provided = set(newDataId) | set(kwds) | set(byRecord) 

644 missingDimensions = datasetType.dimensions.names - provided 

645 

646 # For calibrations we may well be needing temporal dimensions 

647 # so rather than always including all dimensions in the scan 

648 # restrict things a little. It is still possible for there 

649 # to be confusion over day_obs in visit vs exposure for example. 

650 # If we are not searching calibration collections things may 

651 # fail but they are going to fail anyway because of the 

652 # ambiguousness of the dataId... 

653 candidateDimensions: Set[str] = set() 

654 candidateDimensions.update(missingDimensions) 

655 if datasetType.isCalibration(): 

656 for dim in self.registry.dimensions.getStaticDimensions(): 

657 if dim.temporal: 

658 candidateDimensions.add(str(dim)) 

659 

660 # Look up table for the first association with a dimension 

661 guessedAssociation: Dict[str, Dict[str, Any]] = defaultdict(dict) 

662 

663 # Keep track of whether an item is associated with multiple 

664 # dimensions. 

665 counter: Counter[str] = Counter() 

666 assigned: Dict[str, Set[str]] = defaultdict(set) 

667 

668 # Go through the missing dimensions and associate the 

669 # given names with records within those dimensions 

670 for dimensionName in candidateDimensions: 

671 dimension = self.registry.dimensions.getStaticDimensions()[dimensionName] 

672 fields = dimension.metadata.names | dimension.uniqueKeys.names 

673 for field in not_dimensions: 

674 if field in fields: 

675 guessedAssociation[dimensionName][field] = not_dimensions[field] 

676 counter[dimensionName] += 1 

677 assigned[field].add(dimensionName) 

678 

679 # There is a chance we have allocated a single dataId item 

680 # to multiple dimensions. Need to decide which should be retained. 

681 # For now assume that the most popular alternative wins. 

682 # This means that day_obs with seq_num will result in 

683 # exposure.day_obs and not visit.day_obs 

684 # Also prefer an explicitly missing dimension over an inferred 

685 # temporal dimension. 

686 for fieldName, assignedDimensions in assigned.items(): 

687 if len(assignedDimensions) > 1: 

688 # Pick the most popular (preferring mandatory dimensions) 

689 requiredButMissing = assignedDimensions.intersection(missingDimensions) 

690 if requiredButMissing: 

691 candidateDimensions = requiredButMissing 

692 else: 

693 candidateDimensions = assignedDimensions 

694 

695 # Select the relevant items and get a new restricted 

696 # counter. 

697 theseCounts = {k: v for k, v in counter.items() if k in candidateDimensions} 

698 duplicatesCounter: Counter[str] = Counter() 

699 duplicatesCounter.update(theseCounts) 

700 

701 # Choose the most common. If they are equally common 

702 # we will pick the one that was found first. 

703 # Returns a list of tuples 

704 selected = duplicatesCounter.most_common(1)[0][0] 

705 

706 log.debug("Ambiguous dataId entry '%s' associated with multiple dimensions: %s." 

707 " Removed ambiguity by choosing dimension %s.", 

708 fieldName, ", ".join(assignedDimensions), selected) 

709 

710 for candidateDimension in assignedDimensions: 

711 if candidateDimension != selected: 

712 del guessedAssociation[candidateDimension][fieldName] 

713 

714 # Update the record look up dict with the new associations 

715 for dimensionName, values in guessedAssociation.items(): 

716 if values: # A dict might now be empty 

717 log.debug("Assigned non-dimension dataId keys to dimension %s: %s", 

718 dimensionName, values) 

719 byRecord[dimensionName].update(values) 

720 

721 if byRecord: 

722 # Some record specifiers were found so we need to convert 

723 # them to the Id form 

724 for dimensionName, values in byRecord.items(): 

725 if dimensionName in newDataId: 

726 log.warning("DataId specified explicit %s dimension value of %s in addition to" 

727 " general record specifiers for it of %s. Ignoring record information.", 

728 dimensionName, newDataId[dimensionName], str(values)) 

729 continue 

730 

731 # Build up a WHERE expression -- use single quotes 

732 def quote(s: Any) -> str: 

733 if isinstance(s, str): 

734 return f"'{s}'" 

735 else: 

736 return s 

737 

738 where = " AND ".join(f"{dimensionName}.{k} = {quote(v)}" 

739 for k, v in values.items()) 

740 

741 # Hopefully we get a single record that matches 

742 records = set(self.registry.queryDimensionRecords(dimensionName, dataId=newDataId, 

743 where=where, **kwds)) 

744 

745 if len(records) != 1: 

746 if len(records) > 1: 

747 log.debug("Received %d records from constraints of %s", len(records), str(values)) 

748 for r in records: 

749 log.debug("- %s", str(r)) 

750 raise RuntimeError(f"DataId specification for dimension {dimensionName} is not" 

751 f" uniquely constrained to a single dataset by {values}." 

752 f" Got {len(records)} results.") 

753 raise RuntimeError(f"DataId specification for dimension {dimensionName} matched no" 

754 f" records when constrained by {values}") 

755 

756 # Get the primary key from the real dimension object 

757 dimension = self.registry.dimensions.getStaticDimensions()[dimensionName] 

758 if not isinstance(dimension, Dimension): 

759 raise RuntimeError( 

760 f"{dimension.name} is not a true dimension, and cannot be used in data IDs." 

761 ) 

762 newDataId[dimensionName] = getattr(records.pop(), dimension.primaryKey.name) 

763 

764 # We have modified the dataId so need to switch to it 

765 dataId = newDataId 

766 

767 if datasetType.isCalibration(): 

768 # Because this is a calibration dataset, first try to make a 

769 # standardize the data ID without restricting the dimensions to 

770 # those of the dataset type requested, because there may be extra 

771 # dimensions that provide temporal information for a validity-range 

772 # lookup. 

773 dataId = DataCoordinate.standardize(dataId, universe=self.registry.dimensions, 

774 defaults=self.registry.defaults.dataId, **kwds) 

775 if dataId.graph.temporal: 

776 dataId = self.registry.expandDataId(dataId) 

777 timespan = dataId.timespan 

778 else: 

779 # Standardize the data ID to just the dimensions of the dataset 

780 # type instead of letting registry.findDataset do it, so we get the 

781 # result even if no dataset is found. 

782 dataId = DataCoordinate.standardize(dataId, graph=datasetType.dimensions, 

783 defaults=self.registry.defaults.dataId, **kwds) 

784 # Always lookup the DatasetRef, even if one is given, to ensure it is 

785 # present in the current collection. 

786 ref = self.registry.findDataset(datasetType, dataId, collections=collections, timespan=timespan) 

787 if ref is None: 

788 if allowUnresolved: 

789 return DatasetRef(datasetType, dataId) 

790 else: 

791 if collections is None: 

792 collections = self.registry.defaults.collections 

793 raise LookupError(f"Dataset {datasetType.name} with data ID {dataId} " 

794 f"could not be found in collections {collections}.") 

795 if idNumber is not None and idNumber != ref.id: 

796 if collections is None: 

797 collections = self.registry.defaults.collections 

798 raise ValueError(f"DatasetRef.id provided ({idNumber}) does not match " 

799 f"id ({ref.id}) in registry in collections {collections}.") 

800 return ref 

801 

802 @transactional 

803 def put(self, obj: Any, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

804 dataId: Optional[DataId] = None, *, 

805 run: Optional[str] = None, 

806 **kwds: Any) -> DatasetRef: 

807 """Store and register a dataset. 

808 

809 Parameters 

810 ---------- 

811 obj : `object` 

812 The dataset. 

813 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

814 When `DatasetRef` is provided, ``dataId`` should be `None`. 

815 Otherwise the `DatasetType` or name thereof. 

816 dataId : `dict` or `DataCoordinate` 

817 A `dict` of `Dimension` link name, value pairs that label the 

818 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

819 should be provided as the second argument. 

820 run : `str`, optional 

821 The name of the run the dataset should be added to, overriding 

822 ``self.run``. 

823 kwds 

824 Additional keyword arguments used to augment or construct a 

825 `DataCoordinate`. See `DataCoordinate.standardize` 

826 parameters. 

827 

828 Returns 

829 ------- 

830 ref : `DatasetRef` 

831 A reference to the stored dataset, updated with the correct id if 

832 given. 

833 

834 Raises 

835 ------ 

836 TypeError 

837 Raised if the butler is read-only or if no run has been provided. 

838 """ 

839 log.debug("Butler put: %s, dataId=%s, run=%s", datasetRefOrType, dataId, run) 

840 if not self.isWriteable(): 

841 raise TypeError("Butler is read-only.") 

842 datasetType, dataId = self._standardizeArgs(datasetRefOrType, dataId, **kwds) 

843 if isinstance(datasetRefOrType, DatasetRef) and datasetRefOrType.id is not None: 

844 raise ValueError("DatasetRef must not be in registry, must have None id") 

845 

846 # Add Registry Dataset entry. 

847 dataId = self.registry.expandDataId(dataId, graph=datasetType.dimensions, **kwds) 

848 ref, = self.registry.insertDatasets(datasetType, run=run, dataIds=[dataId]) 

849 

850 # Add Datastore entry. 

851 self.datastore.put(obj, ref) 

852 

853 return ref 

854 

855 def getDirect(self, ref: DatasetRef, *, parameters: Optional[Dict[str, Any]] = None) -> Any: 

856 """Retrieve a stored dataset. 

857 

858 Unlike `Butler.get`, this method allows datasets outside the Butler's 

859 collection to be read as long as the `DatasetRef` that identifies them 

860 can be obtained separately. 

861 

862 Parameters 

863 ---------- 

864 ref : `DatasetRef` 

865 Resolved reference to an already stored dataset. 

866 parameters : `dict` 

867 Additional StorageClass-defined options to control reading, 

868 typically used to efficiently read only a subset of the dataset. 

869 

870 Returns 

871 ------- 

872 obj : `object` 

873 The dataset. 

874 """ 

875 return self.datastore.get(ref, parameters=parameters) 

876 

877 def getDirectDeferred(self, ref: DatasetRef, *, 

878 parameters: Union[dict, None] = None) -> DeferredDatasetHandle: 

879 """Create a `DeferredDatasetHandle` which can later retrieve a dataset, 

880 from a resolved `DatasetRef`. 

881 

882 Parameters 

883 ---------- 

884 ref : `DatasetRef` 

885 Resolved reference to an already stored dataset. 

886 parameters : `dict` 

887 Additional StorageClass-defined options to control reading, 

888 typically used to efficiently read only a subset of the dataset. 

889 

890 Returns 

891 ------- 

892 obj : `DeferredDatasetHandle` 

893 A handle which can be used to retrieve a dataset at a later time. 

894 

895 Raises 

896 ------ 

897 AmbiguousDatasetError 

898 Raised if ``ref.id is None``, i.e. the reference is unresolved. 

899 """ 

900 if ref.id is None: 

901 raise AmbiguousDatasetError( 

902 f"Dataset of type {ref.datasetType.name} with data ID {ref.dataId} is not resolved." 

903 ) 

904 return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters) 

905 

906 def getDeferred(self, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

907 dataId: Optional[DataId] = None, *, 

908 parameters: Union[dict, None] = None, 

909 collections: Any = None, 

910 **kwds: Any) -> DeferredDatasetHandle: 

911 """Create a `DeferredDatasetHandle` which can later retrieve a dataset, 

912 after an immediate registry lookup. 

913 

914 Parameters 

915 ---------- 

916 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

917 When `DatasetRef` the `dataId` should be `None`. 

918 Otherwise the `DatasetType` or name thereof. 

919 dataId : `dict` or `DataCoordinate`, optional 

920 A `dict` of `Dimension` link name, value pairs that label the 

921 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

922 should be provided as the first argument. 

923 parameters : `dict` 

924 Additional StorageClass-defined options to control reading, 

925 typically used to efficiently read only a subset of the dataset. 

926 collections : Any, optional 

927 Collections to be searched, overriding ``self.collections``. 

928 Can be any of the types supported by the ``collections`` argument 

929 to butler construction. 

930 kwds 

931 Additional keyword arguments used to augment or construct a 

932 `DataId`. See `DataId` parameters. 

933 

934 Returns 

935 ------- 

936 obj : `DeferredDatasetHandle` 

937 A handle which can be used to retrieve a dataset at a later time. 

938 

939 Raises 

940 ------ 

941 LookupError 

942 Raised if no matching dataset exists in the `Registry` (and 

943 ``allowUnresolved is False``). 

944 ValueError 

945 Raised if a resolved `DatasetRef` was passed as an input, but it 

946 differs from the one found in the registry. 

947 TypeError 

948 Raised if no collections were provided. 

949 """ 

950 ref = self._findDatasetRef(datasetRefOrType, dataId, collections=collections, **kwds) 

951 return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters) 

952 

953 def get(self, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

954 dataId: Optional[DataId] = None, *, 

955 parameters: Optional[Dict[str, Any]] = None, 

956 collections: Any = None, 

957 **kwds: Any) -> Any: 

958 """Retrieve a stored dataset. 

959 

960 Parameters 

961 ---------- 

962 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

963 When `DatasetRef` the `dataId` should be `None`. 

964 Otherwise the `DatasetType` or name thereof. 

965 dataId : `dict` or `DataCoordinate` 

966 A `dict` of `Dimension` link name, value pairs that label the 

967 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

968 should be provided as the first argument. 

969 parameters : `dict` 

970 Additional StorageClass-defined options to control reading, 

971 typically used to efficiently read only a subset of the dataset. 

972 collections : Any, optional 

973 Collections to be searched, overriding ``self.collections``. 

974 Can be any of the types supported by the ``collections`` argument 

975 to butler construction. 

976 kwds 

977 Additional keyword arguments used to augment or construct a 

978 `DataCoordinate`. See `DataCoordinate.standardize` 

979 parameters. 

980 

981 Returns 

982 ------- 

983 obj : `object` 

984 The dataset. 

985 

986 Raises 

987 ------ 

988 ValueError 

989 Raised if a resolved `DatasetRef` was passed as an input, but it 

990 differs from the one found in the registry. 

991 LookupError 

992 Raised if no matching dataset exists in the `Registry`. 

993 TypeError 

994 Raised if no collections were provided. 

995 

996 Notes 

997 ----- 

998 When looking up datasets in a `~CollectionType.CALIBRATION` collection, 

999 this method requires that the given data ID include temporal dimensions 

1000 beyond the dimensions of the dataset type itself, in order to find the 

1001 dataset with the appropriate validity range. For example, a "bias" 

1002 dataset with native dimensions ``{instrument, detector}`` could be 

1003 fetched with a ``{instrument, detector, exposure}`` data ID, because 

1004 ``exposure`` is a temporal dimension. 

1005 """ 

1006 log.debug("Butler get: %s, dataId=%s, parameters=%s", datasetRefOrType, dataId, parameters) 

1007 ref = self._findDatasetRef(datasetRefOrType, dataId, collections=collections, **kwds) 

1008 return self.getDirect(ref, parameters=parameters) 

1009 

1010 def getURIs(self, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

1011 dataId: Optional[DataId] = None, *, 

1012 predict: bool = False, 

1013 collections: Any = None, 

1014 run: Optional[str] = None, 

1015 **kwds: Any) -> Tuple[Optional[ButlerURI], Dict[str, ButlerURI]]: 

1016 """Returns the URIs associated with the dataset. 

1017 

1018 Parameters 

1019 ---------- 

1020 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

1021 When `DatasetRef` the `dataId` should be `None`. 

1022 Otherwise the `DatasetType` or name thereof. 

1023 dataId : `dict` or `DataCoordinate` 

1024 A `dict` of `Dimension` link name, value pairs that label the 

1025 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

1026 should be provided as the first argument. 

1027 predict : `bool` 

1028 If `True`, allow URIs to be returned of datasets that have not 

1029 been written. 

1030 collections : Any, optional 

1031 Collections to be searched, overriding ``self.collections``. 

1032 Can be any of the types supported by the ``collections`` argument 

1033 to butler construction. 

1034 run : `str`, optional 

1035 Run to use for predictions, overriding ``self.run``. 

1036 kwds 

1037 Additional keyword arguments used to augment or construct a 

1038 `DataCoordinate`. See `DataCoordinate.standardize` 

1039 parameters. 

1040 

1041 Returns 

1042 ------- 

1043 primary : `ButlerURI` 

1044 The URI to the primary artifact associated with this dataset. 

1045 If the dataset was disassembled within the datastore this 

1046 may be `None`. 

1047 components : `dict` 

1048 URIs to any components associated with the dataset artifact. 

1049 Can be empty if there are no components. 

1050 """ 

1051 ref = self._findDatasetRef(datasetRefOrType, dataId, allowUnresolved=predict, 

1052 collections=collections, **kwds) 

1053 if ref.id is None: # only possible if predict is True 

1054 if run is None: 

1055 run = self.run 

1056 if run is None: 

1057 raise TypeError("Cannot predict location with run=None.") 

1058 # Lie about ID, because we can't guess it, and only 

1059 # Datastore.getURIs() will ever see it (and it doesn't use it). 

1060 ref = ref.resolved(id=0, run=run) 

1061 return self.datastore.getURIs(ref, predict) 

1062 

1063 def getURI(self, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

1064 dataId: Optional[DataId] = None, *, 

1065 predict: bool = False, 

1066 collections: Any = None, 

1067 run: Optional[str] = None, 

1068 **kwds: Any) -> ButlerURI: 

1069 """Return the URI to the Dataset. 

1070 

1071 Parameters 

1072 ---------- 

1073 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

1074 When `DatasetRef` the `dataId` should be `None`. 

1075 Otherwise the `DatasetType` or name thereof. 

1076 dataId : `dict` or `DataCoordinate` 

1077 A `dict` of `Dimension` link name, value pairs that label the 

1078 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

1079 should be provided as the first argument. 

1080 predict : `bool` 

1081 If `True`, allow URIs to be returned of datasets that have not 

1082 been written. 

1083 collections : Any, optional 

1084 Collections to be searched, overriding ``self.collections``. 

1085 Can be any of the types supported by the ``collections`` argument 

1086 to butler construction. 

1087 run : `str`, optional 

1088 Run to use for predictions, overriding ``self.run``. 

1089 kwds 

1090 Additional keyword arguments used to augment or construct a 

1091 `DataCoordinate`. See `DataCoordinate.standardize` 

1092 parameters. 

1093 

1094 Returns 

1095 ------- 

1096 uri : `ButlerURI` 

1097 URI pointing to the Dataset within the datastore. If the 

1098 Dataset does not exist in the datastore, and if ``predict`` is 

1099 `True`, the URI will be a prediction and will include a URI 

1100 fragment "#predicted". 

1101 If the datastore does not have entities that relate well 

1102 to the concept of a URI the returned URI string will be 

1103 descriptive. The returned URI is not guaranteed to be obtainable. 

1104 

1105 Raises 

1106 ------ 

1107 LookupError 

1108 A URI has been requested for a dataset that does not exist and 

1109 guessing is not allowed. 

1110 ValueError 

1111 Raised if a resolved `DatasetRef` was passed as an input, but it 

1112 differs from the one found in the registry. 

1113 TypeError 

1114 Raised if no collections were provided. 

1115 RuntimeError 

1116 Raised if a URI is requested for a dataset that consists of 

1117 multiple artifacts. 

1118 """ 

1119 primary, components = self.getURIs(datasetRefOrType, dataId=dataId, predict=predict, 

1120 collections=collections, run=run, **kwds) 

1121 

1122 if primary is None or components: 

1123 raise RuntimeError(f"Dataset ({datasetRefOrType}) includes distinct URIs for components. " 

1124 "Use Butler.getURIs() instead.") 

1125 return primary 

1126 

1127 def datasetExists(self, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

1128 dataId: Optional[DataId] = None, *, 

1129 collections: Any = None, 

1130 **kwds: Any) -> bool: 

1131 """Return True if the Dataset is actually present in the Datastore. 

1132 

1133 Parameters 

1134 ---------- 

1135 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

1136 When `DatasetRef` the `dataId` should be `None`. 

1137 Otherwise the `DatasetType` or name thereof. 

1138 dataId : `dict` or `DataCoordinate` 

1139 A `dict` of `Dimension` link name, value pairs that label the 

1140 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

1141 should be provided as the first argument. 

1142 collections : Any, optional 

1143 Collections to be searched, overriding ``self.collections``. 

1144 Can be any of the types supported by the ``collections`` argument 

1145 to butler construction. 

1146 kwds 

1147 Additional keyword arguments used to augment or construct a 

1148 `DataCoordinate`. See `DataCoordinate.standardize` 

1149 parameters. 

1150 

1151 Raises 

1152 ------ 

1153 LookupError 

1154 Raised if the dataset is not even present in the Registry. 

1155 ValueError 

1156 Raised if a resolved `DatasetRef` was passed as an input, but it 

1157 differs from the one found in the registry. 

1158 TypeError 

1159 Raised if no collections were provided. 

1160 """ 

1161 ref = self._findDatasetRef(datasetRefOrType, dataId, collections=collections, **kwds) 

1162 return self.datastore.exists(ref) 

1163 

1164 def removeRuns(self, names: Iterable[str], unstore: bool = True) -> None: 

1165 """Remove one or more `~CollectionType.RUN` collections and the 

1166 datasets within them. 

1167 

1168 Parameters 

1169 ---------- 

1170 names : `Iterable` [ `str` ] 

1171 The names of the collections to remove. 

1172 unstore : `bool`, optional 

1173 If `True` (default), delete datasets from all datastores in which 

1174 they are present, and attempt to rollback the registry deletions if 

1175 datastore deletions fail (which may not always be possible). If 

1176 `False`, datastore records for these datasets are still removed, 

1177 but any artifacts (e.g. files) will not be. 

1178 

1179 Raises 

1180 ------ 

1181 TypeError 

1182 Raised if one or more collections are not of type 

1183 `~CollectionType.RUN`. 

1184 """ 

1185 if not self.isWriteable(): 

1186 raise TypeError("Butler is read-only.") 

1187 names = list(names) 

1188 refs: List[DatasetRef] = [] 

1189 for name in names: 

1190 collectionType = self.registry.getCollectionType(name) 

1191 if collectionType is not CollectionType.RUN: 

1192 raise TypeError(f"The collection type of '{name}' is {collectionType.name}, not RUN.") 

1193 refs.extend(self.registry.queryDatasets(..., collections=name, findFirst=True)) 

1194 with self.registry.transaction(): 

1195 if unstore: 

1196 for ref in refs: 

1197 if self.datastore.exists(ref): 

1198 self.datastore.trash(ref) 

1199 else: 

1200 self.datastore.forget(refs) 

1201 for name in names: 

1202 self.registry.removeCollection(name) 

1203 if unstore: 

1204 # Point of no return for removing artifacts 

1205 self.datastore.emptyTrash() 

1206 

1207 def pruneCollection(self, name: str, purge: bool = False, unstore: bool = False, 

1208 unlink: Optional[List[str]] = None) -> None: 

1209 """Remove a collection and possibly prune datasets within it. 

1210 

1211 Parameters 

1212 ---------- 

1213 name : `str` 

1214 Name of the collection to remove. If this is a 

1215 `~CollectionType.TAGGED` or `~CollectionType.CHAINED` collection, 

1216 datasets within the collection are not modified unless ``unstore`` 

1217 is `True`. If this is a `~CollectionType.RUN` collection, 

1218 ``purge`` and ``unstore`` must be `True`, and all datasets in it 

1219 are fully removed from the data repository. 

1220 purge : `bool`, optional 

1221 If `True`, permit `~CollectionType.RUN` collections to be removed, 

1222 fully removing datasets within them. Requires ``unstore=True`` as 

1223 well as an added precaution against accidental deletion. Must be 

1224 `False` (default) if the collection is not a ``RUN``. 

1225 unstore: `bool`, optional 

1226 If `True`, remove all datasets in the collection from all 

1227 datastores in which they appear. 

1228 unlink: `list` [`str`], optional 

1229 Before removing the given `collection` unlink it from from these 

1230 parent collections. 

1231 

1232 Raises 

1233 ------ 

1234 TypeError 

1235 Raised if the butler is read-only or arguments are mutually 

1236 inconsistent. 

1237 """ 

1238 

1239 # See pruneDatasets comments for more information about the logic here; 

1240 # the cases are almost the same, but here we can rely on Registry to 

1241 # take care everything but Datastore deletion when we remove the 

1242 # collection. 

1243 if not self.isWriteable(): 

1244 raise TypeError("Butler is read-only.") 

1245 collectionType = self.registry.getCollectionType(name) 

1246 if purge and not unstore: 

1247 raise PurgeWithoutUnstorePruneCollectionsError() 

1248 if collectionType is CollectionType.RUN and not purge: 

1249 raise RunWithoutPurgePruneCollectionsError(collectionType) 

1250 if collectionType is not CollectionType.RUN and purge: 

1251 raise PurgeUnsupportedPruneCollectionsError(collectionType) 

1252 

1253 def remove(child: str, parent: str) -> None: 

1254 """Remove a child collection from a parent collection.""" 

1255 # Remove child from parent. 

1256 chain = list(self.registry.getCollectionChain(parent)) 

1257 try: 

1258 chain.remove(name) 

1259 except ValueError as e: 

1260 raise RuntimeError(f"{name} is not a child of {parent}") from e 

1261 self.registry.setCollectionChain(parent, chain) 

1262 

1263 with self.registry.transaction(): 

1264 if (unlink): 

1265 for parent in unlink: 

1266 remove(name, parent) 

1267 if unstore: 

1268 for ref in self.registry.queryDatasets(..., collections=name, findFirst=True): 

1269 if self.datastore.exists(ref): 

1270 self.datastore.trash(ref) 

1271 self.registry.removeCollection(name) 

1272 if unstore: 

1273 # Point of no return for removing artifacts 

1274 self.datastore.emptyTrash() 

1275 

1276 def pruneDatasets(self, refs: Iterable[DatasetRef], *, 

1277 disassociate: bool = True, 

1278 unstore: bool = False, 

1279 tags: Iterable[str] = (), 

1280 purge: bool = False, 

1281 run: Optional[str] = None) -> None: 

1282 """Remove one or more datasets from a collection and/or storage. 

1283 

1284 Parameters 

1285 ---------- 

1286 refs : `~collections.abc.Iterable` of `DatasetRef` 

1287 Datasets to prune. These must be "resolved" references (not just 

1288 a `DatasetType` and data ID). 

1289 disassociate : `bool`, optional 

1290 Disassociate pruned datasets from ``tags``, or from all collections 

1291 if ``purge=True``. 

1292 unstore : `bool`, optional 

1293 If `True` (`False` is default) remove these datasets from all 

1294 datastores known to this butler. Note that this will make it 

1295 impossible to retrieve these datasets even via other collections. 

1296 Datasets that are already not stored are ignored by this option. 

1297 tags : `Iterable` [ `str` ], optional 

1298 `~CollectionType.TAGGED` collections to disassociate the datasets 

1299 from. Ignored if ``disassociate`` is `False` or ``purge`` is 

1300 `True`. 

1301 purge : `bool`, optional 

1302 If `True` (`False` is default), completely remove the dataset from 

1303 the `Registry`. To prevent accidental deletions, ``purge`` may 

1304 only be `True` if all of the following conditions are met: 

1305 

1306 - All given datasets are in the given run. 

1307 - ``disassociate`` is `True`; 

1308 - ``unstore`` is `True`. 

1309 

1310 This mode may remove provenance information from datasets other 

1311 than those provided, and should be used with extreme care. 

1312 

1313 Raises 

1314 ------ 

1315 TypeError 

1316 Raised if the butler is read-only, if no collection was provided, 

1317 or the conditions for ``purge=True`` were not met. 

1318 """ 

1319 if not self.isWriteable(): 

1320 raise TypeError("Butler is read-only.") 

1321 if purge: 

1322 if not disassociate: 

1323 raise TypeError("Cannot pass purge=True without disassociate=True.") 

1324 if not unstore: 

1325 raise TypeError("Cannot pass purge=True without unstore=True.") 

1326 elif disassociate: 

1327 tags = tuple(tags) 

1328 if not tags: 

1329 raise TypeError("No tags provided but disassociate=True.") 

1330 for tag in tags: 

1331 collectionType = self.registry.getCollectionType(tag) 

1332 if collectionType is not CollectionType.TAGGED: 

1333 raise TypeError(f"Cannot disassociate from collection '{tag}' " 

1334 f"of non-TAGGED type {collectionType.name}.") 

1335 # Transform possibly-single-pass iterable into something we can iterate 

1336 # over multiple times. 

1337 refs = list(refs) 

1338 # Pruning a component of a DatasetRef makes no sense since registry 

1339 # doesn't know about components and datastore might not store 

1340 # components in a separate file 

1341 for ref in refs: 

1342 if ref.datasetType.component(): 

1343 raise ValueError(f"Can not prune a component of a dataset (ref={ref})") 

1344 # We don't need an unreliable Datastore transaction for this, because 

1345 # we've been extra careful to ensure that Datastore.trash only involves 

1346 # mutating the Registry (it can _look_ at Datastore-specific things, 

1347 # but shouldn't change them), and hence all operations here are 

1348 # Registry operations. 

1349 with self.registry.transaction(): 

1350 if unstore: 

1351 for ref in refs: 

1352 # There is a difference between a concrete composite 

1353 # and virtual composite. In a virtual composite the 

1354 # datastore is never given the top level DatasetRef. In 

1355 # the concrete composite the datastore knows all the 

1356 # refs and will clean up itself if asked to remove the 

1357 # parent ref. We can not check configuration for this 

1358 # since we can not trust that the configuration is the 

1359 # same. We therefore have to ask if the ref exists or 

1360 # not. This is consistent with the fact that we want 

1361 # to ignore already-removed-from-datastore datasets 

1362 # anyway. 

1363 if self.datastore.exists(ref): 

1364 self.datastore.trash(ref) 

1365 if purge: 

1366 self.registry.removeDatasets(refs) 

1367 elif disassociate: 

1368 assert tags, "Guaranteed by earlier logic in this function." 

1369 for tag in tags: 

1370 self.registry.disassociate(tag, refs) 

1371 # We've exited the Registry transaction, and apparently committed. 

1372 # (if there was an exception, everything rolled back, and it's as if 

1373 # nothing happened - and we never get here). 

1374 # Datastore artifacts are not yet gone, but they're clearly marked 

1375 # as trash, so if we fail to delete now because of (e.g.) filesystem 

1376 # problems we can try again later, and if manual administrative 

1377 # intervention is required, it's pretty clear what that should entail: 

1378 # deleting everything on disk and in private Datastore tables that is 

1379 # in the dataset_location_trash table. 

1380 if unstore: 

1381 # Point of no return for removing artifacts 

1382 self.datastore.emptyTrash() 

1383 

1384 @transactional 

1385 def ingest(self, *datasets: FileDataset, transfer: Optional[str] = "auto", run: Optional[str] = None, 

1386 ) -> None: 

1387 """Store and register one or more datasets that already exist on disk. 

1388 

1389 Parameters 

1390 ---------- 

1391 datasets : `FileDataset` 

1392 Each positional argument is a struct containing information about 

1393 a file to be ingested, including its path (either absolute or 

1394 relative to the datastore root, if applicable), a `DatasetRef`, 

1395 and optionally a formatter class or its fully-qualified string 

1396 name. If a formatter is not provided, the formatter that would be 

1397 used for `put` is assumed. On successful return, all 

1398 `FileDataset.ref` attributes will have their `DatasetRef.id` 

1399 attribute populated and all `FileDataset.formatter` attributes will 

1400 be set to the formatter class used. `FileDataset.path` attributes 

1401 may be modified to put paths in whatever the datastore considers a 

1402 standardized form. 

1403 transfer : `str`, optional 

1404 If not `None`, must be one of 'auto', 'move', 'copy', 'direct', 

1405 'hardlink', 'relsymlink' or 'symlink', indicating how to transfer 

1406 the file. 

1407 run : `str`, optional 

1408 The name of the run ingested datasets should be added to, 

1409 overriding ``self.run``. 

1410 

1411 Raises 

1412 ------ 

1413 TypeError 

1414 Raised if the butler is read-only or if no run was provided. 

1415 NotImplementedError 

1416 Raised if the `Datastore` does not support the given transfer mode. 

1417 DatasetTypeNotSupportedError 

1418 Raised if one or more files to be ingested have a dataset type that 

1419 is not supported by the `Datastore`.. 

1420 FileNotFoundError 

1421 Raised if one of the given files does not exist. 

1422 FileExistsError 

1423 Raised if transfer is not `None` but the (internal) location the 

1424 file would be moved to is already occupied. 

1425 

1426 Notes 

1427 ----- 

1428 This operation is not fully exception safe: if a database operation 

1429 fails, the given `FileDataset` instances may be only partially updated. 

1430 

1431 It is atomic in terms of database operations (they will either all 

1432 succeed or all fail) providing the database engine implements 

1433 transactions correctly. It will attempt to be atomic in terms of 

1434 filesystem operations as well, but this cannot be implemented 

1435 rigorously for most datastores. 

1436 """ 

1437 if not self.isWriteable(): 

1438 raise TypeError("Butler is read-only.") 

1439 progress = Progress("lsst.daf.butler.Butler.ingest", level=logging.DEBUG) 

1440 # Reorganize the inputs so they're grouped by DatasetType and then 

1441 # data ID. We also include a list of DatasetRefs for each FileDataset 

1442 # to hold the resolved DatasetRefs returned by the Registry, before 

1443 # it's safe to swap them into FileDataset.refs. 

1444 # Some type annotation aliases to make that clearer: 

1445 GroupForType = Dict[DataCoordinate, Tuple[FileDataset, List[DatasetRef]]] 

1446 GroupedData = MutableMapping[DatasetType, GroupForType] 

1447 # The actual data structure: 

1448 groupedData: GroupedData = defaultdict(dict) 

1449 # And the nested loop that populates it: 

1450 for dataset in progress.wrap(datasets, desc="Grouping by dataset type"): 

1451 # This list intentionally shared across the inner loop, since it's 

1452 # associated with `dataset`. 

1453 resolvedRefs: List[DatasetRef] = [] 

1454 for ref in dataset.refs: 

1455 if ref.dataId in groupedData[ref.datasetType]: 

1456 raise ConflictingDefinitionError(f"Ingest conflict. Dataset {dataset.path} has same" 

1457 " DataId as other ingest dataset" 

1458 f" {groupedData[ref.datasetType][ref.dataId][0].path} " 

1459 f" ({ref.dataId})") 

1460 groupedData[ref.datasetType][ref.dataId] = (dataset, resolvedRefs) 

1461 

1462 # Now we can bulk-insert into Registry for each DatasetType. 

1463 allResolvedRefs: List[DatasetRef] = [] 

1464 for datasetType, groupForType in progress.iter_item_chunks(groupedData.items(), 

1465 desc="Bulk-inserting datasets by type"): 

1466 refs = self.registry.insertDatasets( 

1467 datasetType, 

1468 dataIds=groupForType.keys(), 

1469 run=run, 

1470 expand=self.datastore.needs_expanded_data_ids(transfer, datasetType), 

1471 ) 

1472 # Append those resolved DatasetRefs to the new lists we set up for 

1473 # them. 

1474 for ref, (_, resolvedRefs) in zip(refs, groupForType.values()): 

1475 resolvedRefs.append(ref) 

1476 

1477 # Go back to the original FileDatasets to replace their refs with the 

1478 # new resolved ones, and also build a big list of all refs. 

1479 allResolvedRefs = [] 

1480 for groupForType in progress.iter_chunks(groupedData.values(), 

1481 desc="Reassociating resolved dataset refs with files"): 

1482 for dataset, resolvedRefs in groupForType.values(): 

1483 dataset.refs = resolvedRefs 

1484 allResolvedRefs.extend(resolvedRefs) 

1485 

1486 # Bulk-insert everything into Datastore. 

1487 self.datastore.ingest(*datasets, transfer=transfer) 

1488 

1489 @contextlib.contextmanager 

1490 def export(self, *, directory: Optional[str] = None, 

1491 filename: Optional[str] = None, 

1492 format: Optional[str] = None, 

1493 transfer: Optional[str] = None) -> Iterator[RepoExportContext]: 

1494 """Export datasets from the repository represented by this `Butler`. 

1495 

1496 This method is a context manager that returns a helper object 

1497 (`RepoExportContext`) that is used to indicate what information from 

1498 the repository should be exported. 

1499 

1500 Parameters 

1501 ---------- 

1502 directory : `str`, optional 

1503 Directory dataset files should be written to if ``transfer`` is not 

1504 `None`. 

1505 filename : `str`, optional 

1506 Name for the file that will include database information associated 

1507 with the exported datasets. If this is not an absolute path and 

1508 ``directory`` is not `None`, it will be written to ``directory`` 

1509 instead of the current working directory. Defaults to 

1510 "export.{format}". 

1511 format : `str`, optional 

1512 File format for the database information file. If `None`, the 

1513 extension of ``filename`` will be used. 

1514 transfer : `str`, optional 

1515 Transfer mode passed to `Datastore.export`. 

1516 

1517 Raises 

1518 ------ 

1519 TypeError 

1520 Raised if the set of arguments passed is inconsistent. 

1521 

1522 Examples 

1523 -------- 

1524 Typically the `Registry.queryDataIds` and `Registry.queryDatasets` 

1525 methods are used to provide the iterables over data IDs and/or datasets 

1526 to be exported:: 

1527 

1528 with butler.export("exports.yaml") as export: 

1529 # Export all flats, but none of the dimension element rows 

1530 # (i.e. data ID information) associated with them. 

1531 export.saveDatasets(butler.registry.queryDatasets("flat"), 

1532 elements=()) 

1533 # Export all datasets that start with "deepCoadd_" and all of 

1534 # their associated data ID information. 

1535 export.saveDatasets(butler.registry.queryDatasets("deepCoadd_*")) 

1536 """ 

1537 if directory is None and transfer is not None: 

1538 raise TypeError("Cannot transfer without providing a directory.") 

1539 if transfer == "move": 

1540 raise TypeError("Transfer may not be 'move': export is read-only") 

1541 if format is None: 

1542 if filename is None: 

1543 raise TypeError("At least one of 'filename' or 'format' must be provided.") 

1544 else: 

1545 _, format = os.path.splitext(filename) 

1546 elif filename is None: 

1547 filename = f"export.{format}" 

1548 if directory is not None: 

1549 filename = os.path.join(directory, filename) 

1550 BackendClass = getClassOf(self._config["repo_transfer_formats"][format]["export"]) 

1551 with open(filename, 'w') as stream: 

1552 backend = BackendClass(stream) 

1553 try: 

1554 helper = RepoExportContext(self.registry, self.datastore, backend=backend, 

1555 directory=directory, transfer=transfer) 

1556 yield helper 

1557 except BaseException: 

1558 raise 

1559 else: 

1560 helper._finish() 

1561 

1562 def import_(self, *, directory: Optional[str] = None, 

1563 filename: Union[str, TextIO, None] = None, 

1564 format: Optional[str] = None, 

1565 transfer: Optional[str] = None, 

1566 skip_dimensions: Optional[Set] = None) -> None: 

1567 """Import datasets into this repository that were exported from a 

1568 different butler repository via `~lsst.daf.butler.Butler.export`. 

1569 

1570 Parameters 

1571 ---------- 

1572 directory : `str`, optional 

1573 Directory containing dataset files to import from. If `None`, 

1574 ``filename`` and all dataset file paths specified therein must 

1575 be absolute. 

1576 filename : `str` or `TextIO`, optional 

1577 A stream or name of file that contains database information 

1578 associated with the exported datasets, typically generated by 

1579 `~lsst.daf.butler.Butler.export`. If this a string (name) and 

1580 is not an absolute path, does not exist in the current working 

1581 directory, and ``directory`` is not `None`, it is assumed to be in 

1582 ``directory``. Defaults to "export.{format}". 

1583 format : `str`, optional 

1584 File format for ``filename``. If `None`, the extension of 

1585 ``filename`` will be used. 

1586 transfer : `str`, optional 

1587 Transfer mode passed to `~lsst.daf.butler.Datastore.ingest`. 

1588 skip_dimensions : `set`, optional 

1589 Names of dimensions that should be skipped and not imported. 

1590 

1591 Raises 

1592 ------ 

1593 TypeError 

1594 Raised if the set of arguments passed is inconsistent, or if the 

1595 butler is read-only. 

1596 """ 

1597 if not self.isWriteable(): 

1598 raise TypeError("Butler is read-only.") 

1599 if format is None: 

1600 if filename is None: 

1601 raise TypeError("At least one of 'filename' or 'format' must be provided.") 

1602 else: 

1603 _, format = os.path.splitext(filename) # type: ignore 

1604 elif filename is None: 

1605 filename = f"export.{format}" 

1606 if isinstance(filename, str) and directory is not None and not os.path.exists(filename): 

1607 filename = os.path.join(directory, filename) 

1608 BackendClass = getClassOf(self._config["repo_transfer_formats"][format]["import"]) 

1609 

1610 def doImport(importStream: TextIO) -> None: 

1611 backend = BackendClass(importStream, self.registry) 

1612 backend.register() 

1613 with self.transaction(): 

1614 backend.load(self.datastore, directory=directory, transfer=transfer, 

1615 skip_dimensions=skip_dimensions) 

1616 

1617 if isinstance(filename, str): 

1618 with open(filename, "r") as stream: 

1619 doImport(stream) 

1620 else: 

1621 doImport(filename) 

1622 

1623 def validateConfiguration(self, logFailures: bool = False, 

1624 datasetTypeNames: Optional[Iterable[str]] = None, 

1625 ignore: Iterable[str] = None) -> None: 

1626 """Validate butler configuration. 

1627 

1628 Checks that each `DatasetType` can be stored in the `Datastore`. 

1629 

1630 Parameters 

1631 ---------- 

1632 logFailures : `bool`, optional 

1633 If `True`, output a log message for every validation error 

1634 detected. 

1635 datasetTypeNames : iterable of `str`, optional 

1636 The `DatasetType` names that should be checked. This allows 

1637 only a subset to be selected. 

1638 ignore : iterable of `str`, optional 

1639 Names of DatasetTypes to skip over. This can be used to skip 

1640 known problems. If a named `DatasetType` corresponds to a 

1641 composite, all components of that `DatasetType` will also be 

1642 ignored. 

1643 

1644 Raises 

1645 ------ 

1646 ButlerValidationError 

1647 Raised if there is some inconsistency with how this Butler 

1648 is configured. 

1649 """ 

1650 if datasetTypeNames: 

1651 datasetTypes = [self.registry.getDatasetType(name) for name in datasetTypeNames] 

1652 else: 

1653 datasetTypes = list(self.registry.queryDatasetTypes()) 

1654 

1655 # filter out anything from the ignore list 

1656 if ignore: 

1657 ignore = set(ignore) 

1658 datasetTypes = [e for e in datasetTypes 

1659 if e.name not in ignore and e.nameAndComponent()[0] not in ignore] 

1660 else: 

1661 ignore = set() 

1662 

1663 # Find all the registered instruments 

1664 instruments = set( 

1665 record.name for record in self.registry.queryDimensionRecords("instrument") 

1666 ) 

1667 

1668 # For each datasetType that has an instrument dimension, create 

1669 # a DatasetRef for each defined instrument 

1670 datasetRefs = [] 

1671 

1672 for datasetType in datasetTypes: 

1673 if "instrument" in datasetType.dimensions: 

1674 for instrument in instruments: 

1675 datasetRef = DatasetRef(datasetType, {"instrument": instrument}, # type: ignore 

1676 conform=False) 

1677 datasetRefs.append(datasetRef) 

1678 

1679 entities: List[Union[DatasetType, DatasetRef]] = [] 

1680 entities.extend(datasetTypes) 

1681 entities.extend(datasetRefs) 

1682 

1683 datastoreErrorStr = None 

1684 try: 

1685 self.datastore.validateConfiguration(entities, logFailures=logFailures) 

1686 except ValidationError as e: 

1687 datastoreErrorStr = str(e) 

1688 

1689 # Also check that the LookupKeys used by the datastores match 

1690 # registry and storage class definitions 

1691 keys = self.datastore.getLookupKeys() 

1692 

1693 failedNames = set() 

1694 failedDataId = set() 

1695 for key in keys: 

1696 if key.name is not None: 

1697 if key.name in ignore: 

1698 continue 

1699 

1700 # skip if specific datasetType names were requested and this 

1701 # name does not match 

1702 if datasetTypeNames and key.name not in datasetTypeNames: 

1703 continue 

1704 

1705 # See if it is a StorageClass or a DatasetType 

1706 if key.name in self.storageClasses: 

1707 pass 

1708 else: 

1709 try: 

1710 self.registry.getDatasetType(key.name) 

1711 except KeyError: 

1712 if logFailures: 

1713 log.critical("Key '%s' does not correspond to a DatasetType or StorageClass", key) 

1714 failedNames.add(key) 

1715 else: 

1716 # Dimensions are checked for consistency when the Butler 

1717 # is created and rendezvoused with a universe. 

1718 pass 

1719 

1720 # Check that the instrument is a valid instrument 

1721 # Currently only support instrument so check for that 

1722 if key.dataId: 

1723 dataIdKeys = set(key.dataId) 

1724 if set(["instrument"]) != dataIdKeys: 

1725 if logFailures: 

1726 log.critical("Key '%s' has unsupported DataId override", key) 

1727 failedDataId.add(key) 

1728 elif key.dataId["instrument"] not in instruments: 

1729 if logFailures: 

1730 log.critical("Key '%s' has unknown instrument", key) 

1731 failedDataId.add(key) 

1732 

1733 messages = [] 

1734 

1735 if datastoreErrorStr: 

1736 messages.append(datastoreErrorStr) 

1737 

1738 for failed, msg in ((failedNames, "Keys without corresponding DatasetType or StorageClass entry: "), 

1739 (failedDataId, "Keys with bad DataId entries: ")): 

1740 if failed: 

1741 msg += ", ".join(str(k) for k in failed) 

1742 messages.append(msg) 

1743 

1744 if messages: 

1745 raise ValidationError(";\n".join(messages)) 

1746 

1747 @property 

1748 def collections(self) -> CollectionSearch: 

1749 """The collections to search by default, in order (`CollectionSearch`). 

1750 

1751 This is an alias for ``self.registry.defaults.collections``. It cannot 

1752 be set directly in isolation, but all defaults may be changed together 

1753 by assigning a new `RegistryDefaults` instance to 

1754 ``self.registry.defaults``. 

1755 """ 

1756 return self.registry.defaults.collections 

1757 

1758 @property 

1759 def run(self) -> Optional[str]: 

1760 """Name of the run this butler writes outputs to by default (`str` or 

1761 `None`). 

1762 

1763 This is an alias for ``self.registry.defaults.run``. It cannot be set 

1764 directly in isolation, but all defaults may be changed together by 

1765 assigning a new `RegistryDefaults` instance to 

1766 ``self.registry.defaults``. 

1767 """ 

1768 return self.registry.defaults.run 

1769 

1770 registry: Registry 

1771 """The object that manages dataset metadata and relationships (`Registry`). 

1772 

1773 Most operations that don't involve reading or writing butler datasets are 

1774 accessible only via `Registry` methods. 

1775 """ 

1776 

1777 datastore: Datastore 

1778 """The object that manages actual dataset storage (`Datastore`). 

1779 

1780 Direct user access to the datastore should rarely be necessary; the primary 

1781 exception is the case where a `Datastore` implementation provides extra 

1782 functionality beyond what the base class defines. 

1783 """ 

1784 

1785 storageClasses: StorageClassFactory 

1786 """An object that maps known storage class names to objects that fully 

1787 describe them (`StorageClassFactory`). 

1788 """