Coverage for python/lsst/daf/butler/_butler.py: 10%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

634 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22""" 

23Butler top level classes. 

24""" 

25from __future__ import annotations 

26 

27__all__ = ( 

28 "Butler", 

29 "ButlerValidationError", 

30 "PruneCollectionsArgsError", 

31 "PurgeWithoutUnstorePruneCollectionsError", 

32 "RunWithoutPurgePruneCollectionsError", 

33 "PurgeUnsupportedPruneCollectionsError", 

34) 

35 

36import collections.abc 

37import contextlib 

38import logging 

39import numbers 

40import os 

41from collections import defaultdict 

42from typing import ( 

43 Any, 

44 ClassVar, 

45 Counter, 

46 Dict, 

47 Iterable, 

48 Iterator, 

49 List, 

50 MutableMapping, 

51 Optional, 

52 Set, 

53 TextIO, 

54 Tuple, 

55 Type, 

56 Union, 

57) 

58 

59from lsst.resources import ResourcePath, ResourcePathExpression 

60from lsst.utils import doImportType 

61from lsst.utils.introspection import get_class_of 

62from lsst.utils.logging import VERBOSE, getLogger 

63 

64from ._butlerConfig import ButlerConfig 

65from ._butlerRepoIndex import ButlerRepoIndex 

66from ._deferredDatasetHandle import DeferredDatasetHandle 

67from ._limited_butler import LimitedButler 

68from .core import ( 

69 AmbiguousDatasetError, 

70 Config, 

71 ConfigSubset, 

72 DataCoordinate, 

73 DataId, 

74 DataIdValue, 

75 DatasetRef, 

76 DatasetType, 

77 Datastore, 

78 Dimension, 

79 DimensionConfig, 

80 DimensionUniverse, 

81 FileDataset, 

82 Progress, 

83 StorageClassFactory, 

84 Timespan, 

85 ValidationError, 

86) 

87from .core.repoRelocation import BUTLER_ROOT_TAG 

88from .core.utils import transactional 

89from .registry import ( 

90 CollectionSearch, 

91 CollectionType, 

92 ConflictingDefinitionError, 

93 DatasetIdGenEnum, 

94 Registry, 

95 RegistryConfig, 

96 RegistryDefaults, 

97) 

98from .transfers import RepoExportContext 

99 

100log = getLogger(__name__) 

101 

102 

103class ButlerValidationError(ValidationError): 

104 """There is a problem with the Butler configuration.""" 

105 

106 pass 

107 

108 

109class PruneCollectionsArgsError(TypeError): 

110 """Base class for errors relating to Butler.pruneCollections input 

111 arguments. 

112 """ 

113 

114 pass 

115 

116 

117class PurgeWithoutUnstorePruneCollectionsError(PruneCollectionsArgsError): 

118 """Raised when purge and unstore are both required to be True, and 

119 purge is True but unstore is False. 

120 """ 

121 

122 def __init__(self) -> None: 

123 super().__init__("Cannot pass purge=True without unstore=True.") 

124 

125 

126class RunWithoutPurgePruneCollectionsError(PruneCollectionsArgsError): 

127 """Raised when pruning a RUN collection but purge is False.""" 

128 

129 def __init__(self, collectionType: CollectionType): 

130 self.collectionType = collectionType 

131 super().__init__(f"Cannot prune RUN collection {self.collectionType.name} without purge=True.") 

132 

133 

134class PurgeUnsupportedPruneCollectionsError(PruneCollectionsArgsError): 

135 """Raised when purge is True but is not supported for the given 

136 collection.""" 

137 

138 def __init__(self, collectionType: CollectionType): 

139 self.collectionType = collectionType 

140 super().__init__( 

141 f"Cannot prune {self.collectionType} collection {self.collectionType.name} with purge=True." 

142 ) 

143 

144 

145class Butler(LimitedButler): 

146 """Main entry point for the data access system. 

147 

148 Parameters 

149 ---------- 

150 config : `ButlerConfig`, `Config` or `str`, optional. 

151 Configuration. Anything acceptable to the 

152 `ButlerConfig` constructor. If a directory path 

153 is given the configuration will be read from a ``butler.yaml`` file in 

154 that location. If `None` is given default values will be used. 

155 butler : `Butler`, optional. 

156 If provided, construct a new Butler that uses the same registry and 

157 datastore as the given one, but with the given collection and run. 

158 Incompatible with the ``config``, ``searchPaths``, and ``writeable`` 

159 arguments. 

160 collections : `str` or `Iterable` [ `str` ], optional 

161 An expression specifying the collections to be searched (in order) when 

162 reading datasets. 

163 This may be a `str` collection name or an iterable thereof. 

164 See :ref:`daf_butler_collection_expressions` for more information. 

165 These collections are not registered automatically and must be 

166 manually registered before they are used by any method, but they may be 

167 manually registered after the `Butler` is initialized. 

168 run : `str`, optional 

169 Name of the `~CollectionType.RUN` collection new datasets should be 

170 inserted into. If ``collections`` is `None` and ``run`` is not `None`, 

171 ``collections`` will be set to ``[run]``. If not `None`, this 

172 collection will automatically be registered. If this is not set (and 

173 ``writeable`` is not set either), a read-only butler will be created. 

174 searchPaths : `list` of `str`, optional 

175 Directory paths to search when calculating the full Butler 

176 configuration. Not used if the supplied config is already a 

177 `ButlerConfig`. 

178 writeable : `bool`, optional 

179 Explicitly sets whether the butler supports write operations. If not 

180 provided, a read-write butler is created if any of ``run``, ``tags``, 

181 or ``chains`` is non-empty. 

182 inferDefaults : `bool`, optional 

183 If `True` (default) infer default data ID values from the values 

184 present in the datasets in ``collections``: if all collections have the 

185 same value (or no value) for a governor dimension, that value will be 

186 the default for that dimension. Nonexistent collections are ignored. 

187 If a default value is provided explicitly for a governor dimension via 

188 ``**kwargs``, no default will be inferred for that dimension. 

189 **kwargs : `str` 

190 Default data ID key-value pairs. These may only identify "governor" 

191 dimensions like ``instrument`` and ``skymap``. 

192 

193 Examples 

194 -------- 

195 While there are many ways to control exactly how a `Butler` interacts with 

196 the collections in its `Registry`, the most common cases are still simple. 

197 

198 For a read-only `Butler` that searches one collection, do:: 

199 

200 butler = Butler("/path/to/repo", collections=["u/alice/DM-50000"]) 

201 

202 For a read-write `Butler` that writes to and reads from a 

203 `~CollectionType.RUN` collection:: 

204 

205 butler = Butler("/path/to/repo", run="u/alice/DM-50000/a") 

206 

207 The `Butler` passed to a ``PipelineTask`` is often much more complex, 

208 because we want to write to one `~CollectionType.RUN` collection but read 

209 from several others (as well):: 

210 

211 butler = Butler("/path/to/repo", run="u/alice/DM-50000/a", 

212 collections=["u/alice/DM-50000/a", 

213 "u/bob/DM-49998", 

214 "HSC/defaults"]) 

215 

216 This butler will `put` new datasets to the run ``u/alice/DM-50000/a``. 

217 Datasets will be read first from that run (since it appears first in the 

218 chain), and then from ``u/bob/DM-49998`` and finally ``HSC/defaults``. 

219 

220 Finally, one can always create a `Butler` with no collections:: 

221 

222 butler = Butler("/path/to/repo", writeable=True) 

223 

224 This can be extremely useful when you just want to use ``butler.registry``, 

225 e.g. for inserting dimension data or managing collections, or when the 

226 collections you want to use with the butler are not consistent. 

227 Passing ``writeable`` explicitly here is only necessary if you want to be 

228 able to make changes to the repo - usually the value for ``writeable`` can 

229 be guessed from the collection arguments provided, but it defaults to 

230 `False` when there are not collection arguments. 

231 """ 

232 

233 def __init__( 

234 self, 

235 config: Union[Config, str, None] = None, 

236 *, 

237 butler: Optional[Butler] = None, 

238 collections: Any = None, 

239 run: Optional[str] = None, 

240 searchPaths: Optional[List[str]] = None, 

241 writeable: Optional[bool] = None, 

242 inferDefaults: bool = True, 

243 **kwargs: str, 

244 ): 

245 defaults = RegistryDefaults(collections=collections, run=run, infer=inferDefaults, **kwargs) 

246 # Load registry, datastore, etc. from config or existing butler. 

247 if butler is not None: 

248 if config is not None or searchPaths is not None or writeable is not None: 

249 raise TypeError( 

250 "Cannot pass 'config', 'searchPaths', or 'writeable' arguments with 'butler' argument." 

251 ) 

252 self.registry = butler.registry.copy(defaults) 

253 self.datastore = butler.datastore 

254 self.storageClasses = butler.storageClasses 

255 self._config: ButlerConfig = butler._config 

256 self._allow_put_of_predefined_dataset = butler._allow_put_of_predefined_dataset 

257 else: 

258 self._config = ButlerConfig(config, searchPaths=searchPaths) 

259 try: 

260 if "root" in self._config: 

261 butlerRoot = self._config["root"] 

262 else: 

263 butlerRoot = self._config.configDir 

264 if writeable is None: 

265 writeable = run is not None 

266 self.registry = Registry.fromConfig( 

267 self._config, butlerRoot=butlerRoot, writeable=writeable, defaults=defaults 

268 ) 

269 self.datastore = Datastore.fromConfig( 

270 self._config, self.registry.getDatastoreBridgeManager(), butlerRoot=butlerRoot 

271 ) 

272 self.storageClasses = StorageClassFactory() 

273 self.storageClasses.addFromConfig(self._config) 

274 self._allow_put_of_predefined_dataset = self._config.get( 

275 "allow_put_of_predefined_dataset", False 

276 ) 

277 except Exception: 

278 # Failures here usually mean that configuration is incomplete, 

279 # just issue an error message which includes config file URI. 

280 log.error(f"Failed to instantiate Butler from config {self._config.configFile}.") 

281 raise 

282 

283 if "run" in self._config or "collection" in self._config: 

284 raise ValueError("Passing a run or collection via configuration is no longer supported.") 

285 

286 GENERATION: ClassVar[int] = 3 

287 """This is a Generation 3 Butler. 

288 

289 This attribute may be removed in the future, once the Generation 2 Butler 

290 interface has been fully retired; it should only be used in transitional 

291 code. 

292 """ 

293 

294 @classmethod 

295 def get_repo_uri(cls, label: str) -> ResourcePath: 

296 """Look up the label in a butler repository index. 

297 

298 Parameters 

299 ---------- 

300 label : `str` 

301 Label of the Butler repository to look up. 

302 

303 Returns 

304 ------- 

305 uri : `lsst.resources.ResourcePath` 

306 URI to the Butler repository associated with the given label. 

307 

308 Raises 

309 ------ 

310 KeyError 

311 Raised if the label is not found in the index, or if an index 

312 can not be found at all. 

313 

314 Notes 

315 ----- 

316 See `~lsst.daf.butler.ButlerRepoIndex` for details on how the 

317 information is discovered. 

318 """ 

319 return ButlerRepoIndex.get_repo_uri(label) 

320 

321 @classmethod 

322 def get_known_repos(cls) -> Set[str]: 

323 """Retrieve the list of known repository labels. 

324 

325 Returns 

326 ------- 

327 repos : `set` of `str` 

328 All the known labels. Can be empty if no index can be found. 

329 

330 Notes 

331 ----- 

332 See `~lsst.daf.butler.ButlerRepoIndex` for details on how the 

333 information is discovered. 

334 """ 

335 return ButlerRepoIndex.get_known_repos() 

336 

337 @staticmethod 

338 def makeRepo( 

339 root: ResourcePathExpression, 

340 config: Union[Config, str, None] = None, 

341 dimensionConfig: Union[Config, str, None] = None, 

342 standalone: bool = False, 

343 searchPaths: Optional[List[str]] = None, 

344 forceConfigRoot: bool = True, 

345 outfile: Optional[ResourcePathExpression] = None, 

346 overwrite: bool = False, 

347 ) -> Config: 

348 """Create an empty data repository by adding a butler.yaml config 

349 to a repository root directory. 

350 

351 Parameters 

352 ---------- 

353 root : `lsst.resources.ResourcePathExpression` 

354 Path or URI to the root location of the new repository. Will be 

355 created if it does not exist. 

356 config : `Config` or `str`, optional 

357 Configuration to write to the repository, after setting any 

358 root-dependent Registry or Datastore config options. Can not 

359 be a `ButlerConfig` or a `ConfigSubset`. If `None`, default 

360 configuration will be used. Root-dependent config options 

361 specified in this config are overwritten if ``forceConfigRoot`` 

362 is `True`. 

363 dimensionConfig : `Config` or `str`, optional 

364 Configuration for dimensions, will be used to initialize registry 

365 database. 

366 standalone : `bool` 

367 If True, write all expanded defaults, not just customized or 

368 repository-specific settings. 

369 This (mostly) decouples the repository from the default 

370 configuration, insulating it from changes to the defaults (which 

371 may be good or bad, depending on the nature of the changes). 

372 Future *additions* to the defaults will still be picked up when 

373 initializing `Butlers` to repos created with ``standalone=True``. 

374 searchPaths : `list` of `str`, optional 

375 Directory paths to search when calculating the full butler 

376 configuration. 

377 forceConfigRoot : `bool`, optional 

378 If `False`, any values present in the supplied ``config`` that 

379 would normally be reset are not overridden and will appear 

380 directly in the output config. This allows non-standard overrides 

381 of the root directory for a datastore or registry to be given. 

382 If this parameter is `True` the values for ``root`` will be 

383 forced into the resulting config if appropriate. 

384 outfile : `lss.resources.ResourcePathExpression`, optional 

385 If not-`None`, the output configuration will be written to this 

386 location rather than into the repository itself. Can be a URI 

387 string. Can refer to a directory that will be used to write 

388 ``butler.yaml``. 

389 overwrite : `bool`, optional 

390 Create a new configuration file even if one already exists 

391 in the specified output location. Default is to raise 

392 an exception. 

393 

394 Returns 

395 ------- 

396 config : `Config` 

397 The updated `Config` instance written to the repo. 

398 

399 Raises 

400 ------ 

401 ValueError 

402 Raised if a ButlerConfig or ConfigSubset is passed instead of a 

403 regular Config (as these subclasses would make it impossible to 

404 support ``standalone=False``). 

405 FileExistsError 

406 Raised if the output config file already exists. 

407 os.error 

408 Raised if the directory does not exist, exists but is not a 

409 directory, or cannot be created. 

410 

411 Notes 

412 ----- 

413 Note that when ``standalone=False`` (the default), the configuration 

414 search path (see `ConfigSubset.defaultSearchPaths`) that was used to 

415 construct the repository should also be used to construct any Butlers 

416 to avoid configuration inconsistencies. 

417 """ 

418 if isinstance(config, (ButlerConfig, ConfigSubset)): 

419 raise ValueError("makeRepo must be passed a regular Config without defaults applied.") 

420 

421 # Ensure that the root of the repository exists or can be made 

422 root_uri = ResourcePath(root, forceDirectory=True) 

423 root_uri.mkdir() 

424 

425 config = Config(config) 

426 

427 # If we are creating a new repo from scratch with relative roots, 

428 # do not propagate an explicit root from the config file 

429 if "root" in config: 

430 del config["root"] 

431 

432 full = ButlerConfig(config, searchPaths=searchPaths) # this applies defaults 

433 imported_class = doImportType(full["datastore", "cls"]) 

434 if not issubclass(imported_class, Datastore): 

435 raise TypeError(f"Imported datastore class {full['datastore', 'cls']} is not a Datastore") 

436 datastoreClass: Type[Datastore] = imported_class 

437 datastoreClass.setConfigRoot(BUTLER_ROOT_TAG, config, full, overwrite=forceConfigRoot) 

438 

439 # if key exists in given config, parse it, otherwise parse the defaults 

440 # in the expanded config 

441 if config.get(("registry", "db")): 

442 registryConfig = RegistryConfig(config) 

443 else: 

444 registryConfig = RegistryConfig(full) 

445 defaultDatabaseUri = registryConfig.makeDefaultDatabaseUri(BUTLER_ROOT_TAG) 

446 if defaultDatabaseUri is not None: 

447 Config.updateParameters( 

448 RegistryConfig, config, full, toUpdate={"db": defaultDatabaseUri}, overwrite=forceConfigRoot 

449 ) 

450 else: 

451 Config.updateParameters(RegistryConfig, config, full, toCopy=("db",), overwrite=forceConfigRoot) 

452 

453 if standalone: 

454 config.merge(full) 

455 else: 

456 # Always expand the registry.managers section into the per-repo 

457 # config, because after the database schema is created, it's not 

458 # allowed to change anymore. Note that in the standalone=True 

459 # branch, _everything_ in the config is expanded, so there's no 

460 # need to special case this. 

461 Config.updateParameters(RegistryConfig, config, full, toMerge=("managers",), overwrite=False) 

462 configURI: ResourcePathExpression 

463 if outfile is not None: 

464 # When writing to a separate location we must include 

465 # the root of the butler repo in the config else it won't know 

466 # where to look. 

467 config["root"] = root_uri.geturl() 

468 configURI = outfile 

469 else: 

470 configURI = root_uri 

471 config.dumpToUri(configURI, overwrite=overwrite) 

472 

473 # Create Registry and populate tables 

474 registryConfig = RegistryConfig(config.get("registry")) 

475 dimensionConfig = DimensionConfig(dimensionConfig) 

476 Registry.createFromConfig(registryConfig, dimensionConfig=dimensionConfig, butlerRoot=root_uri) 

477 

478 log.verbose("Wrote new Butler configuration file to %s", configURI) 

479 

480 return config 

481 

482 @classmethod 

483 def _unpickle( 

484 cls, 

485 config: ButlerConfig, 

486 collections: Optional[CollectionSearch], 

487 run: Optional[str], 

488 defaultDataId: Dict[str, str], 

489 writeable: bool, 

490 ) -> Butler: 

491 """Callable used to unpickle a Butler. 

492 

493 We prefer not to use ``Butler.__init__`` directly so we can force some 

494 of its many arguments to be keyword-only (note that ``__reduce__`` 

495 can only invoke callables with positional arguments). 

496 

497 Parameters 

498 ---------- 

499 config : `ButlerConfig` 

500 Butler configuration, already coerced into a true `ButlerConfig` 

501 instance (and hence after any search paths for overrides have been 

502 utilized). 

503 collections : `CollectionSearch` 

504 Names of the default collections to read from. 

505 run : `str`, optional 

506 Name of the default `~CollectionType.RUN` collection to write to. 

507 defaultDataId : `dict` [ `str`, `str` ] 

508 Default data ID values. 

509 writeable : `bool` 

510 Whether the Butler should support write operations. 

511 

512 Returns 

513 ------- 

514 butler : `Butler` 

515 A new `Butler` instance. 

516 """ 

517 # MyPy doesn't recognize that the kwargs below are totally valid; it 

518 # seems to think '**defaultDataId* is a _positional_ argument! 

519 return cls( 

520 config=config, 

521 collections=collections, 

522 run=run, 

523 writeable=writeable, 

524 **defaultDataId, # type: ignore 

525 ) 

526 

527 def __reduce__(self) -> tuple: 

528 """Support pickling.""" 

529 return ( 

530 Butler._unpickle, 

531 ( 

532 self._config, 

533 self.collections, 

534 self.run, 

535 self.registry.defaults.dataId.byName(), 

536 self.registry.isWriteable(), 

537 ), 

538 ) 

539 

540 def __str__(self) -> str: 

541 return "Butler(collections={}, run={}, datastore='{}', registry='{}')".format( 

542 self.collections, self.run, self.datastore, self.registry 

543 ) 

544 

545 def isWriteable(self) -> bool: 

546 """Return `True` if this `Butler` supports write operations.""" 

547 return self.registry.isWriteable() 

548 

549 @contextlib.contextmanager 

550 def transaction(self) -> Iterator[None]: 

551 """Context manager supporting `Butler` transactions. 

552 

553 Transactions can be nested. 

554 """ 

555 with self.registry.transaction(): 

556 with self.datastore.transaction(): 

557 yield 

558 

559 def _standardizeArgs( 

560 self, 

561 datasetRefOrType: Union[DatasetRef, DatasetType, str], 

562 dataId: Optional[DataId] = None, 

563 for_put: bool = True, 

564 **kwargs: Any, 

565 ) -> Tuple[DatasetType, Optional[DataId]]: 

566 """Standardize the arguments passed to several Butler APIs. 

567 

568 Parameters 

569 ---------- 

570 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

571 When `DatasetRef` the `dataId` should be `None`. 

572 Otherwise the `DatasetType` or name thereof. 

573 dataId : `dict` or `DataCoordinate` 

574 A `dict` of `Dimension` link name, value pairs that label the 

575 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

576 should be provided as the second argument. 

577 for_put : `bool`, optional 

578 If `True` this call is invoked as part of a `Butler.put()`. 

579 Otherwise it is assumed to be part of a `Butler.get()`. This 

580 parameter is only relevant if there is dataset type 

581 inconsistency. 

582 **kwargs 

583 Additional keyword arguments used to augment or construct a 

584 `DataCoordinate`. See `DataCoordinate.standardize` 

585 parameters. 

586 

587 Returns 

588 ------- 

589 datasetType : `DatasetType` 

590 A `DatasetType` instance extracted from ``datasetRefOrType``. 

591 dataId : `dict` or `DataId`, optional 

592 Argument that can be used (along with ``kwargs``) to construct a 

593 `DataId`. 

594 

595 Notes 

596 ----- 

597 Butler APIs that conceptually need a DatasetRef also allow passing a 

598 `DatasetType` (or the name of one) and a `DataId` (or a dict and 

599 keyword arguments that can be used to construct one) separately. This 

600 method accepts those arguments and always returns a true `DatasetType` 

601 and a `DataId` or `dict`. 

602 

603 Standardization of `dict` vs `DataId` is best handled by passing the 

604 returned ``dataId`` (and ``kwargs``) to `Registry` APIs, which are 

605 generally similarly flexible. 

606 """ 

607 externalDatasetType: Optional[DatasetType] = None 

608 internalDatasetType: Optional[DatasetType] = None 

609 if isinstance(datasetRefOrType, DatasetRef): 

610 if dataId is not None or kwargs: 

611 raise ValueError("DatasetRef given, cannot use dataId as well") 

612 externalDatasetType = datasetRefOrType.datasetType 

613 dataId = datasetRefOrType.dataId 

614 else: 

615 # Don't check whether DataId is provided, because Registry APIs 

616 # can usually construct a better error message when it wasn't. 

617 if isinstance(datasetRefOrType, DatasetType): 

618 externalDatasetType = datasetRefOrType 

619 else: 

620 internalDatasetType = self.registry.getDatasetType(datasetRefOrType) 

621 

622 # Check that they are self-consistent 

623 if externalDatasetType is not None: 

624 internalDatasetType = self.registry.getDatasetType(externalDatasetType.name) 

625 if externalDatasetType != internalDatasetType: 

626 # We can allow differences if they are compatible, depending 

627 # on whether this is a get or a put. A get requires that 

628 # the python type associated with the datastore can be 

629 # converted to the user type. A put requires that the user 

630 # supplied python type can be converted to the internal 

631 # type expected by registry. 

632 relevantDatasetType = internalDatasetType 

633 if for_put: 

634 is_compatible = internalDatasetType.is_compatible_with(externalDatasetType) 

635 else: 

636 is_compatible = externalDatasetType.is_compatible_with(internalDatasetType) 

637 relevantDatasetType = externalDatasetType 

638 if not is_compatible: 

639 raise ValueError( 

640 f"Supplied dataset type ({externalDatasetType}) inconsistent with " 

641 f"registry definition ({internalDatasetType})" 

642 ) 

643 # Override the internal definition. 

644 internalDatasetType = relevantDatasetType 

645 

646 assert internalDatasetType is not None 

647 return internalDatasetType, dataId 

648 

649 def _rewrite_data_id( 

650 self, dataId: Optional[DataId], datasetType: DatasetType, **kwargs: Any 

651 ) -> Tuple[Optional[DataId], Dict[str, Any]]: 

652 """Rewrite a data ID taking into account dimension records. 

653 

654 Take a Data ID and keyword args and rewrite it if necessary to 

655 allow the user to specify dimension records rather than dimension 

656 primary values. 

657 

658 This allows a user to include a dataId dict with keys of 

659 ``exposure.day_obs`` and ``exposure.seq_num`` instead of giving 

660 the integer exposure ID. It also allows a string to be given 

661 for a dimension value rather than the integer ID if that is more 

662 convenient. For example, rather than having to specifyin the 

663 detector with ``detector.full_name``, a string given for ``detector`` 

664 will be interpreted as the full name and converted to the integer 

665 value. 

666 

667 Keyword arguments can also use strings for dimensions like detector 

668 and exposure but python does not allow them to include ``.`` and 

669 so the ``exposure.day_obs`` syntax can not be used in a keyword 

670 argument. 

671 

672 Parameters 

673 ---------- 

674 dataId : `dict` or `DataCoordinate` 

675 A `dict` of `Dimension` link name, value pairs that will label the 

676 `DatasetRef` within a Collection. 

677 datasetType : `DatasetType` 

678 The dataset type associated with this dataId. Required to 

679 determine the relevant dimensions. 

680 **kwargs 

681 Additional keyword arguments used to augment or construct a 

682 `DataId`. See `DataId` parameters. 

683 

684 Returns 

685 ------- 

686 dataId : `dict` or `DataCoordinate` 

687 The, possibly rewritten, dataId. If given a `DataCoordinate` and 

688 no keyword arguments, the original dataId will be returned 

689 unchanged. 

690 **kwargs : `dict` 

691 Any unused keyword arguments (would normally be empty dict). 

692 """ 

693 # Do nothing if we have a standalone DataCoordinate. 

694 if isinstance(dataId, DataCoordinate) and not kwargs: 

695 return dataId, kwargs 

696 

697 # Process dimension records that are using record information 

698 # rather than ids 

699 newDataId: Dict[str, DataIdValue] = {} 

700 byRecord: Dict[str, Dict[str, Any]] = defaultdict(dict) 

701 

702 # if all the dataId comes from keyword parameters we do not need 

703 # to do anything here because they can't be of the form 

704 # exposure.obs_id because a "." is not allowed in a keyword parameter. 

705 if dataId: 

706 for k, v in dataId.items(): 

707 # If we have a Dimension we do not need to do anything 

708 # because it cannot be a compound key. 

709 if isinstance(k, str) and "." in k: 

710 # Someone is using a more human-readable dataId 

711 dimensionName, record = k.split(".", 1) 

712 byRecord[dimensionName][record] = v 

713 elif isinstance(k, Dimension): 

714 newDataId[k.name] = v 

715 else: 

716 newDataId[k] = v 

717 

718 # Go through the updated dataId and check the type in case someone is 

719 # using an alternate key. We have already filtered out the compound 

720 # keys dimensions.record format. 

721 not_dimensions = {} 

722 

723 # Will need to look in the dataId and the keyword arguments 

724 # and will remove them if they need to be fixed or are unrecognized. 

725 for dataIdDict in (newDataId, kwargs): 

726 # Use a list so we can adjust the dict safely in the loop 

727 for dimensionName in list(dataIdDict): 

728 value = dataIdDict[dimensionName] 

729 try: 

730 dimension = self.registry.dimensions.getStaticDimensions()[dimensionName] 

731 except KeyError: 

732 # This is not a real dimension 

733 not_dimensions[dimensionName] = value 

734 del dataIdDict[dimensionName] 

735 continue 

736 

737 # Convert an integral type to an explicit int to simplify 

738 # comparisons here 

739 if isinstance(value, numbers.Integral): 

740 value = int(value) 

741 

742 if not isinstance(value, dimension.primaryKey.getPythonType()): 

743 for alternate in dimension.alternateKeys: 

744 if isinstance(value, alternate.getPythonType()): 

745 byRecord[dimensionName][alternate.name] = value 

746 del dataIdDict[dimensionName] 

747 log.debug( 

748 "Converting dimension %s to %s.%s=%s", 

749 dimensionName, 

750 dimensionName, 

751 alternate.name, 

752 value, 

753 ) 

754 break 

755 else: 

756 log.warning( 

757 "Type mismatch found for value '%r' provided for dimension %s. " 

758 "Could not find matching alternative (primary key has type %s) " 

759 "so attempting to use as-is.", 

760 value, 

761 dimensionName, 

762 dimension.primaryKey.getPythonType(), 

763 ) 

764 

765 # By this point kwargs and newDataId should only include valid 

766 # dimensions. Merge kwargs in to the new dataId and log if there 

767 # are dimensions in both (rather than calling update). 

768 for k, v in kwargs.items(): 

769 if k in newDataId and newDataId[k] != v: 

770 log.debug( 

771 "Keyword arg %s overriding explicit value in dataId of %s with %s", k, newDataId[k], v 

772 ) 

773 newDataId[k] = v 

774 # No need to retain any values in kwargs now. 

775 kwargs = {} 

776 

777 # If we have some unrecognized dimensions we have to try to connect 

778 # them to records in other dimensions. This is made more complicated 

779 # by some dimensions having records with clashing names. A mitigation 

780 # is that we can tell by this point which dimensions are missing 

781 # for the DatasetType but this does not work for calibrations 

782 # where additional dimensions can be used to constrain the temporal 

783 # axis. 

784 if not_dimensions: 

785 # Search for all dimensions even if we have been given a value 

786 # explicitly. In some cases records are given as well as the 

787 # actually dimension and this should not be an error if they 

788 # match. 

789 mandatoryDimensions = datasetType.dimensions.names # - provided 

790 

791 candidateDimensions: Set[str] = set() 

792 candidateDimensions.update(mandatoryDimensions) 

793 

794 # For calibrations we may well be needing temporal dimensions 

795 # so rather than always including all dimensions in the scan 

796 # restrict things a little. It is still possible for there 

797 # to be confusion over day_obs in visit vs exposure for example. 

798 # If we are not searching calibration collections things may 

799 # fail but they are going to fail anyway because of the 

800 # ambiguousness of the dataId... 

801 if datasetType.isCalibration(): 

802 for dim in self.registry.dimensions.getStaticDimensions(): 

803 if dim.temporal: 

804 candidateDimensions.add(str(dim)) 

805 

806 # Look up table for the first association with a dimension 

807 guessedAssociation: Dict[str, Dict[str, Any]] = defaultdict(dict) 

808 

809 # Keep track of whether an item is associated with multiple 

810 # dimensions. 

811 counter: Counter[str] = Counter() 

812 assigned: Dict[str, Set[str]] = defaultdict(set) 

813 

814 # Go through the missing dimensions and associate the 

815 # given names with records within those dimensions 

816 matched_dims = set() 

817 for dimensionName in candidateDimensions: 

818 dimension = self.registry.dimensions.getStaticDimensions()[dimensionName] 

819 fields = dimension.metadata.names | dimension.uniqueKeys.names 

820 for field in not_dimensions: 

821 if field in fields: 

822 guessedAssociation[dimensionName][field] = not_dimensions[field] 

823 counter[dimensionName] += 1 

824 assigned[field].add(dimensionName) 

825 matched_dims.add(field) 

826 

827 # Calculate the fields that matched nothing. 

828 never_found = set(not_dimensions) - matched_dims 

829 

830 if never_found: 

831 raise ValueError(f"Unrecognized keyword args given: {never_found}") 

832 

833 # There is a chance we have allocated a single dataId item 

834 # to multiple dimensions. Need to decide which should be retained. 

835 # For now assume that the most popular alternative wins. 

836 # This means that day_obs with seq_num will result in 

837 # exposure.day_obs and not visit.day_obs 

838 # Also prefer an explicitly missing dimension over an inferred 

839 # temporal dimension. 

840 for fieldName, assignedDimensions in assigned.items(): 

841 if len(assignedDimensions) > 1: 

842 # Pick the most popular (preferring mandatory dimensions) 

843 requiredButMissing = assignedDimensions.intersection(mandatoryDimensions) 

844 if requiredButMissing: 

845 candidateDimensions = requiredButMissing 

846 else: 

847 candidateDimensions = assignedDimensions 

848 

849 # Select the relevant items and get a new restricted 

850 # counter. 

851 theseCounts = {k: v for k, v in counter.items() if k in candidateDimensions} 

852 duplicatesCounter: Counter[str] = Counter() 

853 duplicatesCounter.update(theseCounts) 

854 

855 # Choose the most common. If they are equally common 

856 # we will pick the one that was found first. 

857 # Returns a list of tuples 

858 selected = duplicatesCounter.most_common(1)[0][0] 

859 

860 log.debug( 

861 "Ambiguous dataId entry '%s' associated with multiple dimensions: %s." 

862 " Removed ambiguity by choosing dimension %s.", 

863 fieldName, 

864 ", ".join(assignedDimensions), 

865 selected, 

866 ) 

867 

868 for candidateDimension in assignedDimensions: 

869 if candidateDimension != selected: 

870 del guessedAssociation[candidateDimension][fieldName] 

871 

872 # Update the record look up dict with the new associations 

873 for dimensionName, values in guessedAssociation.items(): 

874 if values: # A dict might now be empty 

875 log.debug("Assigned non-dimension dataId keys to dimension %s: %s", dimensionName, values) 

876 byRecord[dimensionName].update(values) 

877 

878 if byRecord: 

879 # Some record specifiers were found so we need to convert 

880 # them to the Id form 

881 for dimensionName, values in byRecord.items(): 

882 if dimensionName in newDataId: 

883 log.debug( 

884 "DataId specified explicit %s dimension value of %s in addition to" 

885 " general record specifiers for it of %s. Ignoring record information.", 

886 dimensionName, 

887 newDataId[dimensionName], 

888 str(values), 

889 ) 

890 # Get the actual record and compare with these values. 

891 try: 

892 recs = list(self.registry.queryDimensionRecords(dimensionName, dataId=newDataId)) 

893 except LookupError: 

894 raise ValueError( 

895 f"Could not find dimension '{dimensionName}'" 

896 f" with dataId {newDataId} as part of comparing with" 

897 f" record values {byRecord[dimensionName]}" 

898 ) from None 

899 if len(recs) == 1: 

900 errmsg: List[str] = [] 

901 for k, v in values.items(): 

902 if (recval := getattr(recs[0], k)) != v: 

903 errmsg.append(f"{k}({recval} != {v})") 

904 if errmsg: 

905 raise ValueError( 

906 f"Dimension {dimensionName} in dataId has explicit value" 

907 " inconsistent with records: " + ", ".join(errmsg) 

908 ) 

909 else: 

910 # Multiple matches for an explicit dimension 

911 # should never happen but let downstream complain. 

912 pass 

913 continue 

914 

915 # Build up a WHERE expression 

916 bind = {k: v for k, v in values.items()} 

917 where = " AND ".join(f"{dimensionName}.{k} = {k}" for k in bind) 

918 

919 # Hopefully we get a single record that matches 

920 records = set( 

921 self.registry.queryDimensionRecords( 

922 dimensionName, dataId=newDataId, where=where, bind=bind, **kwargs 

923 ) 

924 ) 

925 

926 if len(records) != 1: 

927 if len(records) > 1: 

928 log.debug("Received %d records from constraints of %s", len(records), str(values)) 

929 for r in records: 

930 log.debug("- %s", str(r)) 

931 raise ValueError( 

932 f"DataId specification for dimension {dimensionName} is not" 

933 f" uniquely constrained to a single dataset by {values}." 

934 f" Got {len(records)} results." 

935 ) 

936 raise ValueError( 

937 f"DataId specification for dimension {dimensionName} matched no" 

938 f" records when constrained by {values}" 

939 ) 

940 

941 # Get the primary key from the real dimension object 

942 dimension = self.registry.dimensions.getStaticDimensions()[dimensionName] 

943 if not isinstance(dimension, Dimension): 

944 raise RuntimeError( 

945 f"{dimension.name} is not a true dimension, and cannot be used in data IDs." 

946 ) 

947 newDataId[dimensionName] = getattr(records.pop(), dimension.primaryKey.name) 

948 

949 return newDataId, kwargs 

950 

951 def _findDatasetRef( 

952 self, 

953 datasetRefOrType: Union[DatasetRef, DatasetType, str], 

954 dataId: Optional[DataId] = None, 

955 *, 

956 collections: Any = None, 

957 allowUnresolved: bool = False, 

958 **kwargs: Any, 

959 ) -> DatasetRef: 

960 """Shared logic for methods that start with a search for a dataset in 

961 the registry. 

962 

963 Parameters 

964 ---------- 

965 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

966 When `DatasetRef` the `dataId` should be `None`. 

967 Otherwise the `DatasetType` or name thereof. 

968 dataId : `dict` or `DataCoordinate`, optional 

969 A `dict` of `Dimension` link name, value pairs that label the 

970 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

971 should be provided as the first argument. 

972 collections : Any, optional 

973 Collections to be searched, overriding ``self.collections``. 

974 Can be any of the types supported by the ``collections`` argument 

975 to butler construction. 

976 allowUnresolved : `bool`, optional 

977 If `True`, return an unresolved `DatasetRef` if finding a resolved 

978 one in the `Registry` fails. Defaults to `False`. 

979 **kwargs 

980 Additional keyword arguments used to augment or construct a 

981 `DataId`. See `DataId` parameters. 

982 

983 Returns 

984 ------- 

985 ref : `DatasetRef` 

986 A reference to the dataset identified by the given arguments. 

987 

988 Raises 

989 ------ 

990 LookupError 

991 Raised if no matching dataset exists in the `Registry` (and 

992 ``allowUnresolved is False``). 

993 ValueError 

994 Raised if a resolved `DatasetRef` was passed as an input, but it 

995 differs from the one found in the registry. 

996 TypeError 

997 Raised if no collections were provided. 

998 """ 

999 datasetType, dataId = self._standardizeArgs(datasetRefOrType, dataId, for_put=False, **kwargs) 

1000 if isinstance(datasetRefOrType, DatasetRef): 

1001 idNumber = datasetRefOrType.id 

1002 else: 

1003 idNumber = None 

1004 timespan: Optional[Timespan] = None 

1005 

1006 dataId, kwargs = self._rewrite_data_id(dataId, datasetType, **kwargs) 

1007 

1008 if datasetType.isCalibration(): 

1009 # Because this is a calibration dataset, first try to make a 

1010 # standardize the data ID without restricting the dimensions to 

1011 # those of the dataset type requested, because there may be extra 

1012 # dimensions that provide temporal information for a validity-range 

1013 # lookup. 

1014 dataId = DataCoordinate.standardize( 

1015 dataId, universe=self.registry.dimensions, defaults=self.registry.defaults.dataId, **kwargs 

1016 ) 

1017 if dataId.graph.temporal: 

1018 dataId = self.registry.expandDataId(dataId) 

1019 timespan = dataId.timespan 

1020 else: 

1021 # Standardize the data ID to just the dimensions of the dataset 

1022 # type instead of letting registry.findDataset do it, so we get the 

1023 # result even if no dataset is found. 

1024 dataId = DataCoordinate.standardize( 

1025 dataId, graph=datasetType.dimensions, defaults=self.registry.defaults.dataId, **kwargs 

1026 ) 

1027 # Always lookup the DatasetRef, even if one is given, to ensure it is 

1028 # present in the current collection. 

1029 ref = self.registry.findDataset(datasetType, dataId, collections=collections, timespan=timespan) 

1030 if ref is None: 

1031 if allowUnresolved: 

1032 return DatasetRef(datasetType, dataId) 

1033 else: 

1034 if collections is None: 

1035 collections = self.registry.defaults.collections 

1036 raise LookupError( 

1037 f"Dataset {datasetType.name} with data ID {dataId} " 

1038 f"could not be found in collections {collections}." 

1039 ) 

1040 if idNumber is not None and idNumber != ref.id: 

1041 if collections is None: 

1042 collections = self.registry.defaults.collections 

1043 raise ValueError( 

1044 f"DatasetRef.id provided ({idNumber}) does not match " 

1045 f"id ({ref.id}) in registry in collections {collections}." 

1046 ) 

1047 if datasetType != ref.datasetType: 

1048 # If they differ it is because the user explicitly specified 

1049 # a compatible dataset type to this call rather than using the 

1050 # registry definition. The DatasetRef must therefore be recreated 

1051 # using the user definition such that the expected type is 

1052 # returned. 

1053 ref = DatasetRef(datasetType, ref.dataId, run=ref.run, id=ref.id) 

1054 

1055 return ref 

1056 

1057 @transactional 

1058 def putDirect(self, obj: Any, ref: DatasetRef) -> DatasetRef: 

1059 # Docstring inherited. 

1060 (imported_ref,) = self.registry._importDatasets( 

1061 [ref], 

1062 expand=True, 

1063 ) 

1064 if imported_ref.id != ref.getCheckedId(): 

1065 raise RuntimeError("This registry configuration does not support putDirect.") 

1066 self.datastore.put(obj, ref) 

1067 return ref 

1068 

1069 @transactional 

1070 def put( 

1071 self, 

1072 obj: Any, 

1073 datasetRefOrType: Union[DatasetRef, DatasetType, str], 

1074 dataId: Optional[DataId] = None, 

1075 *, 

1076 run: Optional[str] = None, 

1077 **kwargs: Any, 

1078 ) -> DatasetRef: 

1079 """Store and register a dataset. 

1080 

1081 Parameters 

1082 ---------- 

1083 obj : `object` 

1084 The dataset. 

1085 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

1086 When `DatasetRef` is provided, ``dataId`` should be `None`. 

1087 Otherwise the `DatasetType` or name thereof. 

1088 dataId : `dict` or `DataCoordinate` 

1089 A `dict` of `Dimension` link name, value pairs that label the 

1090 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

1091 should be provided as the second argument. 

1092 run : `str`, optional 

1093 The name of the run the dataset should be added to, overriding 

1094 ``self.run``. 

1095 **kwargs 

1096 Additional keyword arguments used to augment or construct a 

1097 `DataCoordinate`. See `DataCoordinate.standardize` 

1098 parameters. 

1099 

1100 Returns 

1101 ------- 

1102 ref : `DatasetRef` 

1103 A reference to the stored dataset, updated with the correct id if 

1104 given. 

1105 

1106 Raises 

1107 ------ 

1108 TypeError 

1109 Raised if the butler is read-only or if no run has been provided. 

1110 """ 

1111 log.debug("Butler put: %s, dataId=%s, run=%s", datasetRefOrType, dataId, run) 

1112 if not self.isWriteable(): 

1113 raise TypeError("Butler is read-only.") 

1114 datasetType, dataId = self._standardizeArgs(datasetRefOrType, dataId, **kwargs) 

1115 if isinstance(datasetRefOrType, DatasetRef) and datasetRefOrType.id is not None: 

1116 raise ValueError("DatasetRef must not be in registry, must have None id") 

1117 

1118 # Handle dimension records in dataId 

1119 dataId, kwargs = self._rewrite_data_id(dataId, datasetType, **kwargs) 

1120 

1121 # Add Registry Dataset entry. 

1122 dataId = self.registry.expandDataId(dataId, graph=datasetType.dimensions, **kwargs) 

1123 

1124 # For an execution butler the datasets will be pre-defined. 

1125 # If the butler is configured that way datasets should only be inserted 

1126 # if they do not already exist in registry. Trying and catching 

1127 # ConflictingDefinitionError will not work because the transaction 

1128 # will be corrupted. Instead, in this mode always check first. 

1129 ref = None 

1130 ref_is_predefined = False 

1131 if self._allow_put_of_predefined_dataset: 

1132 # Get the matching ref for this run. 

1133 ref = self.registry.findDataset(datasetType, collections=run, dataId=dataId) 

1134 

1135 if ref: 

1136 # Must be expanded form for datastore templating 

1137 dataId = self.registry.expandDataId(dataId, graph=datasetType.dimensions) 

1138 ref = ref.expanded(dataId) 

1139 ref_is_predefined = True 

1140 

1141 if not ref: 

1142 (ref,) = self.registry.insertDatasets(datasetType, run=run, dataIds=[dataId]) 

1143 

1144 # If the ref is predefined it is possible that the datastore also 

1145 # has the record. Asking datastore to put it again will result in 

1146 # the artifact being recreated, overwriting previous, then will cause 

1147 # a failure in writing the record which will cause the artifact 

1148 # to be removed. Much safer to ask first before attempting to 

1149 # overwrite. Race conditions should not be an issue for the 

1150 # execution butler environment. 

1151 if ref_is_predefined: 

1152 if self.datastore.knows(ref): 

1153 raise ConflictingDefinitionError(f"Dataset associated {ref} already exists.") 

1154 

1155 self.datastore.put(obj, ref) 

1156 

1157 return ref 

1158 

1159 def getDirect(self, ref: DatasetRef, *, parameters: Optional[Dict[str, Any]] = None) -> Any: 

1160 """Retrieve a stored dataset. 

1161 

1162 Unlike `Butler.get`, this method allows datasets outside the Butler's 

1163 collection to be read as long as the `DatasetRef` that identifies them 

1164 can be obtained separately. 

1165 

1166 Parameters 

1167 ---------- 

1168 ref : `DatasetRef` 

1169 Resolved reference to an already stored dataset. 

1170 parameters : `dict` 

1171 Additional StorageClass-defined options to control reading, 

1172 typically used to efficiently read only a subset of the dataset. 

1173 

1174 Returns 

1175 ------- 

1176 obj : `object` 

1177 The dataset. 

1178 """ 

1179 return self.datastore.get(ref, parameters=parameters) 

1180 

1181 def getDirectDeferred( 

1182 self, ref: DatasetRef, *, parameters: Union[dict, None] = None 

1183 ) -> DeferredDatasetHandle: 

1184 """Create a `DeferredDatasetHandle` which can later retrieve a dataset, 

1185 from a resolved `DatasetRef`. 

1186 

1187 Parameters 

1188 ---------- 

1189 ref : `DatasetRef` 

1190 Resolved reference to an already stored dataset. 

1191 parameters : `dict` 

1192 Additional StorageClass-defined options to control reading, 

1193 typically used to efficiently read only a subset of the dataset. 

1194 

1195 Returns 

1196 ------- 

1197 obj : `DeferredDatasetHandle` 

1198 A handle which can be used to retrieve a dataset at a later time. 

1199 

1200 Raises 

1201 ------ 

1202 AmbiguousDatasetError 

1203 Raised if ``ref.id is None``, i.e. the reference is unresolved. 

1204 """ 

1205 if ref.id is None: 

1206 raise AmbiguousDatasetError( 

1207 f"Dataset of type {ref.datasetType.name} with data ID {ref.dataId} is not resolved." 

1208 ) 

1209 return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters) 

1210 

1211 def getDeferred( 

1212 self, 

1213 datasetRefOrType: Union[DatasetRef, DatasetType, str], 

1214 dataId: Optional[DataId] = None, 

1215 *, 

1216 parameters: Union[dict, None] = None, 

1217 collections: Any = None, 

1218 **kwargs: Any, 

1219 ) -> DeferredDatasetHandle: 

1220 """Create a `DeferredDatasetHandle` which can later retrieve a dataset, 

1221 after an immediate registry lookup. 

1222 

1223 Parameters 

1224 ---------- 

1225 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

1226 When `DatasetRef` the `dataId` should be `None`. 

1227 Otherwise the `DatasetType` or name thereof. 

1228 dataId : `dict` or `DataCoordinate`, optional 

1229 A `dict` of `Dimension` link name, value pairs that label the 

1230 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

1231 should be provided as the first argument. 

1232 parameters : `dict` 

1233 Additional StorageClass-defined options to control reading, 

1234 typically used to efficiently read only a subset of the dataset. 

1235 collections : Any, optional 

1236 Collections to be searched, overriding ``self.collections``. 

1237 Can be any of the types supported by the ``collections`` argument 

1238 to butler construction. 

1239 **kwargs 

1240 Additional keyword arguments used to augment or construct a 

1241 `DataId`. See `DataId` parameters. 

1242 

1243 Returns 

1244 ------- 

1245 obj : `DeferredDatasetHandle` 

1246 A handle which can be used to retrieve a dataset at a later time. 

1247 

1248 Raises 

1249 ------ 

1250 LookupError 

1251 Raised if no matching dataset exists in the `Registry` (and 

1252 ``allowUnresolved is False``). 

1253 ValueError 

1254 Raised if a resolved `DatasetRef` was passed as an input, but it 

1255 differs from the one found in the registry. 

1256 TypeError 

1257 Raised if no collections were provided. 

1258 """ 

1259 ref = self._findDatasetRef(datasetRefOrType, dataId, collections=collections, **kwargs) 

1260 return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters) 

1261 

1262 def get( 

1263 self, 

1264 datasetRefOrType: Union[DatasetRef, DatasetType, str], 

1265 dataId: Optional[DataId] = None, 

1266 *, 

1267 parameters: Optional[Dict[str, Any]] = None, 

1268 collections: Any = None, 

1269 **kwargs: Any, 

1270 ) -> Any: 

1271 """Retrieve a stored dataset. 

1272 

1273 Parameters 

1274 ---------- 

1275 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

1276 When `DatasetRef` the `dataId` should be `None`. 

1277 Otherwise the `DatasetType` or name thereof. 

1278 dataId : `dict` or `DataCoordinate` 

1279 A `dict` of `Dimension` link name, value pairs that label the 

1280 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

1281 should be provided as the first argument. 

1282 parameters : `dict` 

1283 Additional StorageClass-defined options to control reading, 

1284 typically used to efficiently read only a subset of the dataset. 

1285 collections : Any, optional 

1286 Collections to be searched, overriding ``self.collections``. 

1287 Can be any of the types supported by the ``collections`` argument 

1288 to butler construction. 

1289 **kwargs 

1290 Additional keyword arguments used to augment or construct a 

1291 `DataCoordinate`. See `DataCoordinate.standardize` 

1292 parameters. 

1293 

1294 Returns 

1295 ------- 

1296 obj : `object` 

1297 The dataset. 

1298 

1299 Raises 

1300 ------ 

1301 ValueError 

1302 Raised if a resolved `DatasetRef` was passed as an input, but it 

1303 differs from the one found in the registry. 

1304 LookupError 

1305 Raised if no matching dataset exists in the `Registry`. 

1306 TypeError 

1307 Raised if no collections were provided. 

1308 

1309 Notes 

1310 ----- 

1311 When looking up datasets in a `~CollectionType.CALIBRATION` collection, 

1312 this method requires that the given data ID include temporal dimensions 

1313 beyond the dimensions of the dataset type itself, in order to find the 

1314 dataset with the appropriate validity range. For example, a "bias" 

1315 dataset with native dimensions ``{instrument, detector}`` could be 

1316 fetched with a ``{instrument, detector, exposure}`` data ID, because 

1317 ``exposure`` is a temporal dimension. 

1318 """ 

1319 log.debug("Butler get: %s, dataId=%s, parameters=%s", datasetRefOrType, dataId, parameters) 

1320 ref = self._findDatasetRef(datasetRefOrType, dataId, collections=collections, **kwargs) 

1321 return self.getDirect(ref, parameters=parameters) 

1322 

1323 def getURIs( 

1324 self, 

1325 datasetRefOrType: Union[DatasetRef, DatasetType, str], 

1326 dataId: Optional[DataId] = None, 

1327 *, 

1328 predict: bool = False, 

1329 collections: Any = None, 

1330 run: Optional[str] = None, 

1331 **kwargs: Any, 

1332 ) -> Tuple[Optional[ResourcePath], Dict[str, ResourcePath]]: 

1333 """Returns the URIs associated with the dataset. 

1334 

1335 Parameters 

1336 ---------- 

1337 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

1338 When `DatasetRef` the `dataId` should be `None`. 

1339 Otherwise the `DatasetType` or name thereof. 

1340 dataId : `dict` or `DataCoordinate` 

1341 A `dict` of `Dimension` link name, value pairs that label the 

1342 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

1343 should be provided as the first argument. 

1344 predict : `bool` 

1345 If `True`, allow URIs to be returned of datasets that have not 

1346 been written. 

1347 collections : Any, optional 

1348 Collections to be searched, overriding ``self.collections``. 

1349 Can be any of the types supported by the ``collections`` argument 

1350 to butler construction. 

1351 run : `str`, optional 

1352 Run to use for predictions, overriding ``self.run``. 

1353 **kwargs 

1354 Additional keyword arguments used to augment or construct a 

1355 `DataCoordinate`. See `DataCoordinate.standardize` 

1356 parameters. 

1357 

1358 Returns 

1359 ------- 

1360 primary : `lsst.resources.ResourcePath` 

1361 The URI to the primary artifact associated with this dataset. 

1362 If the dataset was disassembled within the datastore this 

1363 may be `None`. 

1364 components : `dict` 

1365 URIs to any components associated with the dataset artifact. 

1366 Can be empty if there are no components. 

1367 """ 

1368 ref = self._findDatasetRef( 

1369 datasetRefOrType, dataId, allowUnresolved=predict, collections=collections, **kwargs 

1370 ) 

1371 if ref.id is None: # only possible if predict is True 

1372 if run is None: 

1373 run = self.run 

1374 if run is None: 

1375 raise TypeError("Cannot predict location with run=None.") 

1376 # Lie about ID, because we can't guess it, and only 

1377 # Datastore.getURIs() will ever see it (and it doesn't use it). 

1378 ref = ref.resolved(id=0, run=run) 

1379 return self.datastore.getURIs(ref, predict) 

1380 

1381 def getURI( 

1382 self, 

1383 datasetRefOrType: Union[DatasetRef, DatasetType, str], 

1384 dataId: Optional[DataId] = None, 

1385 *, 

1386 predict: bool = False, 

1387 collections: Any = None, 

1388 run: Optional[str] = None, 

1389 **kwargs: Any, 

1390 ) -> ResourcePath: 

1391 """Return the URI to the Dataset. 

1392 

1393 Parameters 

1394 ---------- 

1395 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

1396 When `DatasetRef` the `dataId` should be `None`. 

1397 Otherwise the `DatasetType` or name thereof. 

1398 dataId : `dict` or `DataCoordinate` 

1399 A `dict` of `Dimension` link name, value pairs that label the 

1400 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

1401 should be provided as the first argument. 

1402 predict : `bool` 

1403 If `True`, allow URIs to be returned of datasets that have not 

1404 been written. 

1405 collections : Any, optional 

1406 Collections to be searched, overriding ``self.collections``. 

1407 Can be any of the types supported by the ``collections`` argument 

1408 to butler construction. 

1409 run : `str`, optional 

1410 Run to use for predictions, overriding ``self.run``. 

1411 **kwargs 

1412 Additional keyword arguments used to augment or construct a 

1413 `DataCoordinate`. See `DataCoordinate.standardize` 

1414 parameters. 

1415 

1416 Returns 

1417 ------- 

1418 uri : `lsst.resources.ResourcePath` 

1419 URI pointing to the Dataset within the datastore. If the 

1420 Dataset does not exist in the datastore, and if ``predict`` is 

1421 `True`, the URI will be a prediction and will include a URI 

1422 fragment "#predicted". 

1423 If the datastore does not have entities that relate well 

1424 to the concept of a URI the returned URI string will be 

1425 descriptive. The returned URI is not guaranteed to be obtainable. 

1426 

1427 Raises 

1428 ------ 

1429 LookupError 

1430 A URI has been requested for a dataset that does not exist and 

1431 guessing is not allowed. 

1432 ValueError 

1433 Raised if a resolved `DatasetRef` was passed as an input, but it 

1434 differs from the one found in the registry. 

1435 TypeError 

1436 Raised if no collections were provided. 

1437 RuntimeError 

1438 Raised if a URI is requested for a dataset that consists of 

1439 multiple artifacts. 

1440 """ 

1441 primary, components = self.getURIs( 

1442 datasetRefOrType, dataId=dataId, predict=predict, collections=collections, run=run, **kwargs 

1443 ) 

1444 

1445 if primary is None or components: 

1446 raise RuntimeError( 

1447 f"Dataset ({datasetRefOrType}) includes distinct URIs for components. " 

1448 "Use Butler.getURIs() instead." 

1449 ) 

1450 return primary 

1451 

1452 def retrieveArtifacts( 

1453 self, 

1454 refs: Iterable[DatasetRef], 

1455 destination: ResourcePathExpression, 

1456 transfer: str = "auto", 

1457 preserve_path: bool = True, 

1458 overwrite: bool = False, 

1459 ) -> List[ResourcePath]: 

1460 """Retrieve the artifacts associated with the supplied refs. 

1461 

1462 Parameters 

1463 ---------- 

1464 refs : iterable of `DatasetRef` 

1465 The datasets for which artifacts are to be retrieved. 

1466 A single ref can result in multiple artifacts. The refs must 

1467 be resolved. 

1468 destination : `lsst.resources.ResourcePath` or `str` 

1469 Location to write the artifacts. 

1470 transfer : `str`, optional 

1471 Method to use to transfer the artifacts. Must be one of the options 

1472 supported by `~lsst.resources.ResourcePath.transfer_from()`. 

1473 "move" is not allowed. 

1474 preserve_path : `bool`, optional 

1475 If `True` the full path of the artifact within the datastore 

1476 is preserved. If `False` the final file component of the path 

1477 is used. 

1478 overwrite : `bool`, optional 

1479 If `True` allow transfers to overwrite existing files at the 

1480 destination. 

1481 

1482 Returns 

1483 ------- 

1484 targets : `list` of `lsst.resources.ResourcePath` 

1485 URIs of file artifacts in destination location. Order is not 

1486 preserved. 

1487 

1488 Notes 

1489 ----- 

1490 For non-file datastores the artifacts written to the destination 

1491 may not match the representation inside the datastore. For example 

1492 a hierarchical data structure in a NoSQL database may well be stored 

1493 as a JSON file. 

1494 """ 

1495 return self.datastore.retrieveArtifacts( 

1496 refs, 

1497 ResourcePath(destination), 

1498 transfer=transfer, 

1499 preserve_path=preserve_path, 

1500 overwrite=overwrite, 

1501 ) 

1502 

1503 def datasetExists( 

1504 self, 

1505 datasetRefOrType: Union[DatasetRef, DatasetType, str], 

1506 dataId: Optional[DataId] = None, 

1507 *, 

1508 collections: Any = None, 

1509 **kwargs: Any, 

1510 ) -> bool: 

1511 """Return True if the Dataset is actually present in the Datastore. 

1512 

1513 Parameters 

1514 ---------- 

1515 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

1516 When `DatasetRef` the `dataId` should be `None`. 

1517 Otherwise the `DatasetType` or name thereof. 

1518 dataId : `dict` or `DataCoordinate` 

1519 A `dict` of `Dimension` link name, value pairs that label the 

1520 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

1521 should be provided as the first argument. 

1522 collections : Any, optional 

1523 Collections to be searched, overriding ``self.collections``. 

1524 Can be any of the types supported by the ``collections`` argument 

1525 to butler construction. 

1526 **kwargs 

1527 Additional keyword arguments used to augment or construct a 

1528 `DataCoordinate`. See `DataCoordinate.standardize` 

1529 parameters. 

1530 

1531 Raises 

1532 ------ 

1533 LookupError 

1534 Raised if the dataset is not even present in the Registry. 

1535 ValueError 

1536 Raised if a resolved `DatasetRef` was passed as an input, but it 

1537 differs from the one found in the registry. 

1538 TypeError 

1539 Raised if no collections were provided. 

1540 """ 

1541 ref = self._findDatasetRef(datasetRefOrType, dataId, collections=collections, **kwargs) 

1542 return self.datastore.exists(ref) 

1543 

1544 def removeRuns(self, names: Iterable[str], unstore: bool = True) -> None: 

1545 """Remove one or more `~CollectionType.RUN` collections and the 

1546 datasets within them. 

1547 

1548 Parameters 

1549 ---------- 

1550 names : `Iterable` [ `str` ] 

1551 The names of the collections to remove. 

1552 unstore : `bool`, optional 

1553 If `True` (default), delete datasets from all datastores in which 

1554 they are present, and attempt to rollback the registry deletions if 

1555 datastore deletions fail (which may not always be possible). If 

1556 `False`, datastore records for these datasets are still removed, 

1557 but any artifacts (e.g. files) will not be. 

1558 

1559 Raises 

1560 ------ 

1561 TypeError 

1562 Raised if one or more collections are not of type 

1563 `~CollectionType.RUN`. 

1564 """ 

1565 if not self.isWriteable(): 

1566 raise TypeError("Butler is read-only.") 

1567 names = list(names) 

1568 refs: List[DatasetRef] = [] 

1569 for name in names: 

1570 collectionType = self.registry.getCollectionType(name) 

1571 if collectionType is not CollectionType.RUN: 

1572 raise TypeError(f"The collection type of '{name}' is {collectionType.name}, not RUN.") 

1573 refs.extend(self.registry.queryDatasets(..., collections=name, findFirst=True)) 

1574 with self.registry.transaction(): 

1575 if unstore: 

1576 self.datastore.trash(refs) 

1577 else: 

1578 self.datastore.forget(refs) 

1579 for name in names: 

1580 self.registry.removeCollection(name) 

1581 if unstore: 

1582 # Point of no return for removing artifacts 

1583 self.datastore.emptyTrash() 

1584 

1585 def pruneCollection( 

1586 self, name: str, purge: bool = False, unstore: bool = False, unlink: Optional[List[str]] = None 

1587 ) -> None: 

1588 """Remove a collection and possibly prune datasets within it. 

1589 

1590 Parameters 

1591 ---------- 

1592 name : `str` 

1593 Name of the collection to remove. If this is a 

1594 `~CollectionType.TAGGED` or `~CollectionType.CHAINED` collection, 

1595 datasets within the collection are not modified unless ``unstore`` 

1596 is `True`. If this is a `~CollectionType.RUN` collection, 

1597 ``purge`` and ``unstore`` must be `True`, and all datasets in it 

1598 are fully removed from the data repository. 

1599 purge : `bool`, optional 

1600 If `True`, permit `~CollectionType.RUN` collections to be removed, 

1601 fully removing datasets within them. Requires ``unstore=True`` as 

1602 well as an added precaution against accidental deletion. Must be 

1603 `False` (default) if the collection is not a ``RUN``. 

1604 unstore: `bool`, optional 

1605 If `True`, remove all datasets in the collection from all 

1606 datastores in which they appear. 

1607 unlink: `list` [`str`], optional 

1608 Before removing the given `collection` unlink it from from these 

1609 parent collections. 

1610 

1611 Raises 

1612 ------ 

1613 TypeError 

1614 Raised if the butler is read-only or arguments are mutually 

1615 inconsistent. 

1616 """ 

1617 # See pruneDatasets comments for more information about the logic here; 

1618 # the cases are almost the same, but here we can rely on Registry to 

1619 # take care everything but Datastore deletion when we remove the 

1620 # collection. 

1621 if not self.isWriteable(): 

1622 raise TypeError("Butler is read-only.") 

1623 collectionType = self.registry.getCollectionType(name) 

1624 if purge and not unstore: 

1625 raise PurgeWithoutUnstorePruneCollectionsError() 

1626 if collectionType is CollectionType.RUN and not purge: 

1627 raise RunWithoutPurgePruneCollectionsError(collectionType) 

1628 if collectionType is not CollectionType.RUN and purge: 

1629 raise PurgeUnsupportedPruneCollectionsError(collectionType) 

1630 

1631 def remove(child: str, parent: str) -> None: 

1632 """Remove a child collection from a parent collection.""" 

1633 # Remove child from parent. 

1634 chain = list(self.registry.getCollectionChain(parent)) 

1635 try: 

1636 chain.remove(name) 

1637 except ValueError as e: 

1638 raise RuntimeError(f"{name} is not a child of {parent}") from e 

1639 self.registry.setCollectionChain(parent, chain) 

1640 

1641 with self.registry.transaction(): 

1642 if unlink: 

1643 for parent in unlink: 

1644 remove(name, parent) 

1645 if unstore: 

1646 refs = self.registry.queryDatasets(..., collections=name, findFirst=True) 

1647 self.datastore.trash(refs) 

1648 self.registry.removeCollection(name) 

1649 

1650 if unstore: 

1651 # Point of no return for removing artifacts 

1652 self.datastore.emptyTrash() 

1653 

1654 def pruneDatasets( 

1655 self, 

1656 refs: Iterable[DatasetRef], 

1657 *, 

1658 disassociate: bool = True, 

1659 unstore: bool = False, 

1660 tags: Iterable[str] = (), 

1661 purge: bool = False, 

1662 run: Optional[str] = None, 

1663 ) -> None: 

1664 """Remove one or more datasets from a collection and/or storage. 

1665 

1666 Parameters 

1667 ---------- 

1668 refs : `~collections.abc.Iterable` of `DatasetRef` 

1669 Datasets to prune. These must be "resolved" references (not just 

1670 a `DatasetType` and data ID). 

1671 disassociate : `bool`, optional 

1672 Disassociate pruned datasets from ``tags``, or from all collections 

1673 if ``purge=True``. 

1674 unstore : `bool`, optional 

1675 If `True` (`False` is default) remove these datasets from all 

1676 datastores known to this butler. Note that this will make it 

1677 impossible to retrieve these datasets even via other collections. 

1678 Datasets that are already not stored are ignored by this option. 

1679 tags : `Iterable` [ `str` ], optional 

1680 `~CollectionType.TAGGED` collections to disassociate the datasets 

1681 from. Ignored if ``disassociate`` is `False` or ``purge`` is 

1682 `True`. 

1683 purge : `bool`, optional 

1684 If `True` (`False` is default), completely remove the dataset from 

1685 the `Registry`. To prevent accidental deletions, ``purge`` may 

1686 only be `True` if all of the following conditions are met: 

1687 

1688 - All given datasets are in the given run. 

1689 - ``disassociate`` is `True`; 

1690 - ``unstore`` is `True`. 

1691 

1692 This mode may remove provenance information from datasets other 

1693 than those provided, and should be used with extreme care. 

1694 

1695 Raises 

1696 ------ 

1697 TypeError 

1698 Raised if the butler is read-only, if no collection was provided, 

1699 or the conditions for ``purge=True`` were not met. 

1700 """ 

1701 if not self.isWriteable(): 

1702 raise TypeError("Butler is read-only.") 

1703 if purge: 

1704 if not disassociate: 

1705 raise TypeError("Cannot pass purge=True without disassociate=True.") 

1706 if not unstore: 

1707 raise TypeError("Cannot pass purge=True without unstore=True.") 

1708 elif disassociate: 

1709 tags = tuple(tags) 

1710 if not tags: 

1711 raise TypeError("No tags provided but disassociate=True.") 

1712 for tag in tags: 

1713 collectionType = self.registry.getCollectionType(tag) 

1714 if collectionType is not CollectionType.TAGGED: 

1715 raise TypeError( 

1716 f"Cannot disassociate from collection '{tag}' " 

1717 f"of non-TAGGED type {collectionType.name}." 

1718 ) 

1719 # Transform possibly-single-pass iterable into something we can iterate 

1720 # over multiple times. 

1721 refs = list(refs) 

1722 # Pruning a component of a DatasetRef makes no sense since registry 

1723 # doesn't know about components and datastore might not store 

1724 # components in a separate file 

1725 for ref in refs: 

1726 if ref.datasetType.component(): 

1727 raise ValueError(f"Can not prune a component of a dataset (ref={ref})") 

1728 # We don't need an unreliable Datastore transaction for this, because 

1729 # we've been extra careful to ensure that Datastore.trash only involves 

1730 # mutating the Registry (it can _look_ at Datastore-specific things, 

1731 # but shouldn't change them), and hence all operations here are 

1732 # Registry operations. 

1733 with self.registry.transaction(): 

1734 if unstore: 

1735 self.datastore.trash(refs) 

1736 if purge: 

1737 self.registry.removeDatasets(refs) 

1738 elif disassociate: 

1739 assert tags, "Guaranteed by earlier logic in this function." 

1740 for tag in tags: 

1741 self.registry.disassociate(tag, refs) 

1742 # We've exited the Registry transaction, and apparently committed. 

1743 # (if there was an exception, everything rolled back, and it's as if 

1744 # nothing happened - and we never get here). 

1745 # Datastore artifacts are not yet gone, but they're clearly marked 

1746 # as trash, so if we fail to delete now because of (e.g.) filesystem 

1747 # problems we can try again later, and if manual administrative 

1748 # intervention is required, it's pretty clear what that should entail: 

1749 # deleting everything on disk and in private Datastore tables that is 

1750 # in the dataset_location_trash table. 

1751 if unstore: 

1752 # Point of no return for removing artifacts 

1753 self.datastore.emptyTrash() 

1754 

1755 @transactional 

1756 def ingest( 

1757 self, 

1758 *datasets: FileDataset, 

1759 transfer: Optional[str] = "auto", 

1760 run: Optional[str] = None, 

1761 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

1762 record_validation_info: bool = True, 

1763 ) -> None: 

1764 """Store and register one or more datasets that already exist on disk. 

1765 

1766 Parameters 

1767 ---------- 

1768 datasets : `FileDataset` 

1769 Each positional argument is a struct containing information about 

1770 a file to be ingested, including its URI (either absolute or 

1771 relative to the datastore root, if applicable), a `DatasetRef`, 

1772 and optionally a formatter class or its fully-qualified string 

1773 name. If a formatter is not provided, the formatter that would be 

1774 used for `put` is assumed. On successful return, all 

1775 `FileDataset.ref` attributes will have their `DatasetRef.id` 

1776 attribute populated and all `FileDataset.formatter` attributes will 

1777 be set to the formatter class used. `FileDataset.path` attributes 

1778 may be modified to put paths in whatever the datastore considers a 

1779 standardized form. 

1780 transfer : `str`, optional 

1781 If not `None`, must be one of 'auto', 'move', 'copy', 'direct', 

1782 'split', 'hardlink', 'relsymlink' or 'symlink', indicating how to 

1783 transfer the file. 

1784 run : `str`, optional 

1785 The name of the run ingested datasets should be added to, 

1786 overriding ``self.run``. 

1787 idGenerationMode : `DatasetIdGenEnum`, optional 

1788 Specifies option for generating dataset IDs. By default unique IDs 

1789 are generated for each inserted dataset. 

1790 record_validation_info : `bool`, optional 

1791 If `True`, the default, the datastore can record validation 

1792 information associated with the file. If `False` the datastore 

1793 will not attempt to track any information such as checksums 

1794 or file sizes. This can be useful if such information is tracked 

1795 in an external system or if the file is to be compressed in place. 

1796 It is up to the datastore whether this parameter is relevant. 

1797 

1798 Raises 

1799 ------ 

1800 TypeError 

1801 Raised if the butler is read-only or if no run was provided. 

1802 NotImplementedError 

1803 Raised if the `Datastore` does not support the given transfer mode. 

1804 DatasetTypeNotSupportedError 

1805 Raised if one or more files to be ingested have a dataset type that 

1806 is not supported by the `Datastore`.. 

1807 FileNotFoundError 

1808 Raised if one of the given files does not exist. 

1809 FileExistsError 

1810 Raised if transfer is not `None` but the (internal) location the 

1811 file would be moved to is already occupied. 

1812 

1813 Notes 

1814 ----- 

1815 This operation is not fully exception safe: if a database operation 

1816 fails, the given `FileDataset` instances may be only partially updated. 

1817 

1818 It is atomic in terms of database operations (they will either all 

1819 succeed or all fail) providing the database engine implements 

1820 transactions correctly. It will attempt to be atomic in terms of 

1821 filesystem operations as well, but this cannot be implemented 

1822 rigorously for most datastores. 

1823 """ 

1824 if not self.isWriteable(): 

1825 raise TypeError("Butler is read-only.") 

1826 progress = Progress("lsst.daf.butler.Butler.ingest", level=logging.DEBUG) 

1827 # Reorganize the inputs so they're grouped by DatasetType and then 

1828 # data ID. We also include a list of DatasetRefs for each FileDataset 

1829 # to hold the resolved DatasetRefs returned by the Registry, before 

1830 # it's safe to swap them into FileDataset.refs. 

1831 # Some type annotation aliases to make that clearer: 

1832 GroupForType = Dict[DataCoordinate, Tuple[FileDataset, List[DatasetRef]]] 

1833 GroupedData = MutableMapping[DatasetType, GroupForType] 

1834 # The actual data structure: 

1835 groupedData: GroupedData = defaultdict(dict) 

1836 # And the nested loop that populates it: 

1837 for dataset in progress.wrap(datasets, desc="Grouping by dataset type"): 

1838 # This list intentionally shared across the inner loop, since it's 

1839 # associated with `dataset`. 

1840 resolvedRefs: List[DatasetRef] = [] 

1841 

1842 # Somewhere to store pre-existing refs if we have an 

1843 # execution butler. 

1844 existingRefs: List[DatasetRef] = [] 

1845 

1846 for ref in dataset.refs: 

1847 if ref.dataId in groupedData[ref.datasetType]: 

1848 raise ConflictingDefinitionError( 

1849 f"Ingest conflict. Dataset {dataset.path} has same" 

1850 " DataId as other ingest dataset" 

1851 f" {groupedData[ref.datasetType][ref.dataId][0].path} " 

1852 f" ({ref.dataId})" 

1853 ) 

1854 if self._allow_put_of_predefined_dataset: 

1855 existing_ref = self.registry.findDataset( 

1856 ref.datasetType, dataId=ref.dataId, collections=run 

1857 ) 

1858 if existing_ref: 

1859 if self.datastore.knows(existing_ref): 

1860 raise ConflictingDefinitionError( 

1861 f"Dataset associated with path {dataset.path}" 

1862 f" already exists as {existing_ref}." 

1863 ) 

1864 # Store this ref elsewhere since it already exists 

1865 # and we do not want to remake it but we do want 

1866 # to store it in the datastore. 

1867 existingRefs.append(existing_ref) 

1868 

1869 # Nothing else to do until we have finished 

1870 # iterating. 

1871 continue 

1872 

1873 groupedData[ref.datasetType][ref.dataId] = (dataset, resolvedRefs) 

1874 

1875 if existingRefs: 

1876 

1877 if len(dataset.refs) != len(existingRefs): 

1878 # Keeping track of partially pre-existing datasets is hard 

1879 # and should generally never happen. For now don't allow 

1880 # it. 

1881 raise ConflictingDefinitionError( 

1882 f"For dataset {dataset.path} some dataIds already exist" 

1883 " in registry but others do not. This is not supported." 

1884 ) 

1885 

1886 # Attach the resolved refs if we found them. 

1887 dataset.refs = existingRefs 

1888 

1889 # Now we can bulk-insert into Registry for each DatasetType. 

1890 for datasetType, groupForType in progress.iter_item_chunks( 

1891 groupedData.items(), desc="Bulk-inserting datasets by type" 

1892 ): 

1893 refs = self.registry.insertDatasets( 

1894 datasetType, 

1895 dataIds=groupForType.keys(), 

1896 run=run, 

1897 expand=self.datastore.needs_expanded_data_ids(transfer, datasetType), 

1898 idGenerationMode=idGenerationMode, 

1899 ) 

1900 # Append those resolved DatasetRefs to the new lists we set up for 

1901 # them. 

1902 for ref, (_, resolvedRefs) in zip(refs, groupForType.values()): 

1903 resolvedRefs.append(ref) 

1904 

1905 # Go back to the original FileDatasets to replace their refs with the 

1906 # new resolved ones. 

1907 for groupForType in progress.iter_chunks( 

1908 groupedData.values(), desc="Reassociating resolved dataset refs with files" 

1909 ): 

1910 for dataset, resolvedRefs in groupForType.values(): 

1911 dataset.refs = resolvedRefs 

1912 

1913 # Bulk-insert everything into Datastore. 

1914 self.datastore.ingest(*datasets, transfer=transfer, record_validation_info=record_validation_info) 

1915 

1916 @contextlib.contextmanager 

1917 def export( 

1918 self, 

1919 *, 

1920 directory: Optional[str] = None, 

1921 filename: Optional[str] = None, 

1922 format: Optional[str] = None, 

1923 transfer: Optional[str] = None, 

1924 ) -> Iterator[RepoExportContext]: 

1925 """Export datasets from the repository represented by this `Butler`. 

1926 

1927 This method is a context manager that returns a helper object 

1928 (`RepoExportContext`) that is used to indicate what information from 

1929 the repository should be exported. 

1930 

1931 Parameters 

1932 ---------- 

1933 directory : `str`, optional 

1934 Directory dataset files should be written to if ``transfer`` is not 

1935 `None`. 

1936 filename : `str`, optional 

1937 Name for the file that will include database information associated 

1938 with the exported datasets. If this is not an absolute path and 

1939 ``directory`` is not `None`, it will be written to ``directory`` 

1940 instead of the current working directory. Defaults to 

1941 "export.{format}". 

1942 format : `str`, optional 

1943 File format for the database information file. If `None`, the 

1944 extension of ``filename`` will be used. 

1945 transfer : `str`, optional 

1946 Transfer mode passed to `Datastore.export`. 

1947 

1948 Raises 

1949 ------ 

1950 TypeError 

1951 Raised if the set of arguments passed is inconsistent. 

1952 

1953 Examples 

1954 -------- 

1955 Typically the `Registry.queryDataIds` and `Registry.queryDatasets` 

1956 methods are used to provide the iterables over data IDs and/or datasets 

1957 to be exported:: 

1958 

1959 with butler.export("exports.yaml") as export: 

1960 # Export all flats, but none of the dimension element rows 

1961 # (i.e. data ID information) associated with them. 

1962 export.saveDatasets(butler.registry.queryDatasets("flat"), 

1963 elements=()) 

1964 # Export all datasets that start with "deepCoadd_" and all of 

1965 # their associated data ID information. 

1966 export.saveDatasets(butler.registry.queryDatasets("deepCoadd_*")) 

1967 """ 

1968 if directory is None and transfer is not None: 

1969 raise TypeError("Cannot transfer without providing a directory.") 

1970 if transfer == "move": 

1971 raise TypeError("Transfer may not be 'move': export is read-only") 

1972 if format is None: 

1973 if filename is None: 

1974 raise TypeError("At least one of 'filename' or 'format' must be provided.") 

1975 else: 

1976 _, format = os.path.splitext(filename) 

1977 elif filename is None: 

1978 filename = f"export.{format}" 

1979 if directory is not None: 

1980 filename = os.path.join(directory, filename) 

1981 BackendClass = get_class_of(self._config["repo_transfer_formats"][format]["export"]) 

1982 with open(filename, "w") as stream: 

1983 backend = BackendClass(stream) 

1984 try: 

1985 helper = RepoExportContext( 

1986 self.registry, self.datastore, backend=backend, directory=directory, transfer=transfer 

1987 ) 

1988 yield helper 

1989 except BaseException: 

1990 raise 

1991 else: 

1992 helper._finish() 

1993 

1994 def import_( 

1995 self, 

1996 *, 

1997 directory: Optional[str] = None, 

1998 filename: Union[str, TextIO, None] = None, 

1999 format: Optional[str] = None, 

2000 transfer: Optional[str] = None, 

2001 skip_dimensions: Optional[Set] = None, 

2002 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

2003 reuseIds: bool = False, 

2004 ) -> None: 

2005 """Import datasets into this repository that were exported from a 

2006 different butler repository via `~lsst.daf.butler.Butler.export`. 

2007 

2008 Parameters 

2009 ---------- 

2010 directory : `str`, optional 

2011 Directory containing dataset files to import from. If `None`, 

2012 ``filename`` and all dataset file paths specified therein must 

2013 be absolute. 

2014 filename : `str` or `TextIO`, optional 

2015 A stream or name of file that contains database information 

2016 associated with the exported datasets, typically generated by 

2017 `~lsst.daf.butler.Butler.export`. If this a string (name) and 

2018 is not an absolute path, does not exist in the current working 

2019 directory, and ``directory`` is not `None`, it is assumed to be in 

2020 ``directory``. Defaults to "export.{format}". 

2021 format : `str`, optional 

2022 File format for ``filename``. If `None`, the extension of 

2023 ``filename`` will be used. 

2024 transfer : `str`, optional 

2025 Transfer mode passed to `~lsst.daf.butler.Datastore.ingest`. 

2026 skip_dimensions : `set`, optional 

2027 Names of dimensions that should be skipped and not imported. 

2028 idGenerationMode : `DatasetIdGenEnum`, optional 

2029 Specifies option for generating dataset IDs when IDs are not 

2030 provided or their type does not match backend type. By default 

2031 unique IDs are generated for each inserted dataset. 

2032 reuseIds : `bool`, optional 

2033 If `True` then forces re-use of imported dataset IDs for integer 

2034 IDs which are normally generated as auto-incremented; exception 

2035 will be raised if imported IDs clash with existing ones. This 

2036 option has no effect on the use of globally-unique IDs which are 

2037 always re-used (or generated if integer IDs are being imported). 

2038 

2039 Raises 

2040 ------ 

2041 TypeError 

2042 Raised if the set of arguments passed is inconsistent, or if the 

2043 butler is read-only. 

2044 """ 

2045 if not self.isWriteable(): 

2046 raise TypeError("Butler is read-only.") 

2047 if format is None: 

2048 if filename is None: 

2049 raise TypeError("At least one of 'filename' or 'format' must be provided.") 

2050 else: 

2051 _, format = os.path.splitext(filename) # type: ignore 

2052 elif filename is None: 

2053 filename = f"export.{format}" 

2054 if isinstance(filename, str) and directory is not None and not os.path.exists(filename): 

2055 filename = os.path.join(directory, filename) 

2056 BackendClass = get_class_of(self._config["repo_transfer_formats"][format]["import"]) 

2057 

2058 def doImport(importStream: TextIO) -> None: 

2059 backend = BackendClass(importStream, self.registry) 

2060 backend.register() 

2061 with self.transaction(): 

2062 backend.load( 

2063 self.datastore, 

2064 directory=directory, 

2065 transfer=transfer, 

2066 skip_dimensions=skip_dimensions, 

2067 idGenerationMode=idGenerationMode, 

2068 reuseIds=reuseIds, 

2069 ) 

2070 

2071 if isinstance(filename, str): 

2072 with open(filename, "r") as stream: 

2073 doImport(stream) 

2074 else: 

2075 doImport(filename) 

2076 

2077 def transfer_from( 

2078 self, 

2079 source_butler: Butler, 

2080 source_refs: Iterable[DatasetRef], 

2081 transfer: str = "auto", 

2082 id_gen_map: Dict[str, DatasetIdGenEnum] = None, 

2083 skip_missing: bool = True, 

2084 register_dataset_types: bool = False, 

2085 ) -> List[DatasetRef]: 

2086 """Transfer datasets to this Butler from a run in another Butler. 

2087 

2088 Parameters 

2089 ---------- 

2090 source_butler : `Butler` 

2091 Butler from which the datasets are to be transferred. 

2092 source_refs : iterable of `DatasetRef` 

2093 Datasets defined in the source butler that should be transferred to 

2094 this butler. 

2095 transfer : `str`, optional 

2096 Transfer mode passed to `~lsst.daf.butler.Datastore.transfer_from`. 

2097 id_gen_map : `dict` of [`str`, `DatasetIdGenEnum`], optional 

2098 A mapping of dataset type to ID generation mode. Only used if 

2099 the source butler is using integer IDs. Should not be used 

2100 if this receiving butler uses integer IDs. Without this dataset 

2101 import always uses unique. 

2102 skip_missing : `bool` 

2103 If `True`, datasets with no datastore artifact associated with 

2104 them are not transferred. If `False` a registry entry will be 

2105 created even if no datastore record is created (and so will 

2106 look equivalent to the dataset being unstored). 

2107 register_dataset_types : `bool` 

2108 If `True` any missing dataset types are registered. Otherwise 

2109 an exception is raised. 

2110 

2111 Returns 

2112 ------- 

2113 refs : `list` of `DatasetRef` 

2114 The refs added to this Butler. 

2115 

2116 Notes 

2117 ----- 

2118 Requires that any dimension definitions are already present in the 

2119 receiving Butler. The datastore artifact has to exist for a transfer 

2120 to be made but non-existence is not an error. 

2121 

2122 Datasets that already exist in this run will be skipped. 

2123 

2124 The datasets are imported as part of a transaction, although 

2125 dataset types are registered before the transaction is started. 

2126 This means that it is possible for a dataset type to be registered 

2127 even though transfer has failed. 

2128 """ 

2129 if not self.isWriteable(): 

2130 raise TypeError("Butler is read-only.") 

2131 progress = Progress("lsst.daf.butler.Butler.transfer_from", level=VERBOSE) 

2132 

2133 # Will iterate through the refs multiple times so need to convert 

2134 # to a list if this isn't a collection. 

2135 if not isinstance(source_refs, collections.abc.Collection): 

2136 source_refs = list(source_refs) 

2137 

2138 original_count = len(source_refs) 

2139 log.info("Transferring %d datasets into %s", original_count, str(self)) 

2140 

2141 if id_gen_map is None: 

2142 id_gen_map = {} 

2143 

2144 # In some situations the datastore artifact may be missing 

2145 # and we do not want that registry entry to be imported. 

2146 # Asking datastore is not sufficient, the records may have been 

2147 # purged, we have to ask for the (predicted) URI and check 

2148 # existence explicitly. Execution butler is set up exactly like 

2149 # this with no datastore records. 

2150 artifact_existence: Dict[ResourcePath, bool] = {} 

2151 if skip_missing: 

2152 dataset_existence = source_butler.datastore.mexists( 

2153 source_refs, artifact_existence=artifact_existence 

2154 ) 

2155 source_refs = [ref for ref, exists in dataset_existence.items() if exists] 

2156 filtered_count = len(source_refs) 

2157 log.verbose( 

2158 "%d datasets removed because the artifact does not exist. Now have %d.", 

2159 original_count - filtered_count, 

2160 filtered_count, 

2161 ) 

2162 

2163 # Importing requires that we group the refs by dataset type and run 

2164 # before doing the import. 

2165 source_dataset_types = set() 

2166 grouped_refs = defaultdict(list) 

2167 grouped_indices = defaultdict(list) 

2168 for i, ref in enumerate(source_refs): 

2169 grouped_refs[ref.datasetType, ref.run].append(ref) 

2170 grouped_indices[ref.datasetType, ref.run].append(i) 

2171 source_dataset_types.add(ref.datasetType) 

2172 

2173 # Check to see if the dataset type in the source butler has 

2174 # the same definition in the target butler and register missing 

2175 # ones if requested. Registration must happen outside a transaction. 

2176 newly_registered_dataset_types = set() 

2177 for datasetType in source_dataset_types: 

2178 if register_dataset_types: 

2179 # Let this raise immediately if inconsistent. Continuing 

2180 # on to find additional inconsistent dataset types 

2181 # might result in additional unwanted dataset types being 

2182 # registered. 

2183 if self.registry.registerDatasetType(datasetType): 

2184 newly_registered_dataset_types.add(datasetType) 

2185 else: 

2186 # If the dataset type is missing, let it fail immediately. 

2187 target_dataset_type = self.registry.getDatasetType(datasetType.name) 

2188 if target_dataset_type != datasetType: 

2189 raise ConflictingDefinitionError( 

2190 "Source butler dataset type differs from definition" 

2191 f" in target butler: {datasetType} !=" 

2192 f" {target_dataset_type}" 

2193 ) 

2194 if newly_registered_dataset_types: 

2195 # We may have registered some even if there were inconsistencies 

2196 # but should let people know (or else remove them again). 

2197 log.log( 

2198 VERBOSE, 

2199 "Registered the following dataset types in the target Butler: %s", 

2200 ", ".join(d.name for d in newly_registered_dataset_types), 

2201 ) 

2202 else: 

2203 log.log(VERBOSE, "All required dataset types are known to the target Butler") 

2204 

2205 # The returned refs should be identical for UUIDs. 

2206 # For now must also support integers and so need to retain the 

2207 # newly-created refs from this registry. 

2208 # Pre-size it so we can assign refs into the correct slots 

2209 transferred_refs_tmp: List[Optional[DatasetRef]] = [None] * len(source_refs) 

2210 default_id_gen = DatasetIdGenEnum.UNIQUE 

2211 

2212 handled_collections: Set[str] = set() 

2213 

2214 # Do all the importing in a single transaction. 

2215 with self.transaction(): 

2216 for (datasetType, run), refs_to_import in progress.iter_item_chunks( 

2217 grouped_refs.items(), desc="Importing to registry by run and dataset type" 

2218 ): 

2219 if run not in handled_collections: 

2220 run_doc = source_butler.registry.getCollectionDocumentation(run) 

2221 registered = self.registry.registerRun(run, doc=run_doc) 

2222 handled_collections.add(run) 

2223 if registered: 

2224 log.log(VERBOSE, "Creating output run %s", run) 

2225 

2226 id_generation_mode = default_id_gen 

2227 if isinstance(refs_to_import[0].id, int): 

2228 # ID generation mode might need to be overridden when 

2229 # targetting UUID 

2230 id_generation_mode = id_gen_map.get(datasetType.name, default_id_gen) 

2231 

2232 n_refs = len(refs_to_import) 

2233 log.verbose( 

2234 "Importing %d ref%s of dataset type %s into run %s", 

2235 n_refs, 

2236 "" if n_refs == 1 else "s", 

2237 datasetType.name, 

2238 run, 

2239 ) 

2240 

2241 # No way to know if this butler's registry uses UUID. 

2242 # We have to trust the caller on this. If it fails they will 

2243 # have to change their approach. We can't catch the exception 

2244 # and retry with unique because that will mess up the 

2245 # transaction handling. We aren't allowed to ask the registry 

2246 # manager what type of ID it is using. 

2247 imported_refs = self.registry._importDatasets( 

2248 refs_to_import, idGenerationMode=id_generation_mode, expand=False 

2249 ) 

2250 

2251 # Map them into the correct slots to match the initial order 

2252 for i, ref in zip(grouped_indices[datasetType, run], imported_refs): 

2253 transferred_refs_tmp[i] = ref 

2254 

2255 # Mypy insists that we might have None in here so we have to make 

2256 # that explicit by assigning to a new variable and filtering out 

2257 # something that won't be there. 

2258 transferred_refs = [ref for ref in transferred_refs_tmp if ref is not None] 

2259 

2260 # Check consistency 

2261 assert len(source_refs) == len(transferred_refs), "Different number of refs imported than given" 

2262 

2263 log.verbose("Imported %d datasets into destination butler", len(transferred_refs)) 

2264 

2265 # The transferred refs need to be reordered to match the original 

2266 # ordering given by the caller. Without this the datastore transfer 

2267 # will be broken. 

2268 

2269 # Ask the datastore to transfer. The datastore has to check that 

2270 # the source datastore is compatible with the target datastore. 

2271 self.datastore.transfer_from( 

2272 source_butler.datastore, 

2273 source_refs, 

2274 local_refs=transferred_refs, 

2275 transfer=transfer, 

2276 artifact_existence=artifact_existence, 

2277 ) 

2278 

2279 return transferred_refs 

2280 

2281 def validateConfiguration( 

2282 self, 

2283 logFailures: bool = False, 

2284 datasetTypeNames: Optional[Iterable[str]] = None, 

2285 ignore: Iterable[str] = None, 

2286 ) -> None: 

2287 """Validate butler configuration. 

2288 

2289 Checks that each `DatasetType` can be stored in the `Datastore`. 

2290 

2291 Parameters 

2292 ---------- 

2293 logFailures : `bool`, optional 

2294 If `True`, output a log message for every validation error 

2295 detected. 

2296 datasetTypeNames : iterable of `str`, optional 

2297 The `DatasetType` names that should be checked. This allows 

2298 only a subset to be selected. 

2299 ignore : iterable of `str`, optional 

2300 Names of DatasetTypes to skip over. This can be used to skip 

2301 known problems. If a named `DatasetType` corresponds to a 

2302 composite, all components of that `DatasetType` will also be 

2303 ignored. 

2304 

2305 Raises 

2306 ------ 

2307 ButlerValidationError 

2308 Raised if there is some inconsistency with how this Butler 

2309 is configured. 

2310 """ 

2311 if datasetTypeNames: 

2312 datasetTypes = [self.registry.getDatasetType(name) for name in datasetTypeNames] 

2313 else: 

2314 datasetTypes = list(self.registry.queryDatasetTypes()) 

2315 

2316 # filter out anything from the ignore list 

2317 if ignore: 

2318 ignore = set(ignore) 

2319 datasetTypes = [ 

2320 e for e in datasetTypes if e.name not in ignore and e.nameAndComponent()[0] not in ignore 

2321 ] 

2322 else: 

2323 ignore = set() 

2324 

2325 # Find all the registered instruments 

2326 instruments = set(record.name for record in self.registry.queryDimensionRecords("instrument")) 

2327 

2328 # For each datasetType that has an instrument dimension, create 

2329 # a DatasetRef for each defined instrument 

2330 datasetRefs = [] 

2331 

2332 for datasetType in datasetTypes: 

2333 if "instrument" in datasetType.dimensions: 

2334 for instrument in instruments: 

2335 datasetRef = DatasetRef( 

2336 datasetType, {"instrument": instrument}, conform=False # type: ignore 

2337 ) 

2338 datasetRefs.append(datasetRef) 

2339 

2340 entities: List[Union[DatasetType, DatasetRef]] = [] 

2341 entities.extend(datasetTypes) 

2342 entities.extend(datasetRefs) 

2343 

2344 datastoreErrorStr = None 

2345 try: 

2346 self.datastore.validateConfiguration(entities, logFailures=logFailures) 

2347 except ValidationError as e: 

2348 datastoreErrorStr = str(e) 

2349 

2350 # Also check that the LookupKeys used by the datastores match 

2351 # registry and storage class definitions 

2352 keys = self.datastore.getLookupKeys() 

2353 

2354 failedNames = set() 

2355 failedDataId = set() 

2356 for key in keys: 

2357 if key.name is not None: 

2358 if key.name in ignore: 

2359 continue 

2360 

2361 # skip if specific datasetType names were requested and this 

2362 # name does not match 

2363 if datasetTypeNames and key.name not in datasetTypeNames: 

2364 continue 

2365 

2366 # See if it is a StorageClass or a DatasetType 

2367 if key.name in self.storageClasses: 

2368 pass 

2369 else: 

2370 try: 

2371 self.registry.getDatasetType(key.name) 

2372 except KeyError: 

2373 if logFailures: 

2374 log.critical("Key '%s' does not correspond to a DatasetType or StorageClass", key) 

2375 failedNames.add(key) 

2376 else: 

2377 # Dimensions are checked for consistency when the Butler 

2378 # is created and rendezvoused with a universe. 

2379 pass 

2380 

2381 # Check that the instrument is a valid instrument 

2382 # Currently only support instrument so check for that 

2383 if key.dataId: 

2384 dataIdKeys = set(key.dataId) 

2385 if set(["instrument"]) != dataIdKeys: 

2386 if logFailures: 

2387 log.critical("Key '%s' has unsupported DataId override", key) 

2388 failedDataId.add(key) 

2389 elif key.dataId["instrument"] not in instruments: 

2390 if logFailures: 

2391 log.critical("Key '%s' has unknown instrument", key) 

2392 failedDataId.add(key) 

2393 

2394 messages = [] 

2395 

2396 if datastoreErrorStr: 

2397 messages.append(datastoreErrorStr) 

2398 

2399 for failed, msg in ( 

2400 (failedNames, "Keys without corresponding DatasetType or StorageClass entry: "), 

2401 (failedDataId, "Keys with bad DataId entries: "), 

2402 ): 

2403 if failed: 

2404 msg += ", ".join(str(k) for k in failed) 

2405 messages.append(msg) 

2406 

2407 if messages: 

2408 raise ValidationError(";\n".join(messages)) 

2409 

2410 @property 

2411 def collections(self) -> CollectionSearch: 

2412 """The collections to search by default, in order (`CollectionSearch`). 

2413 

2414 This is an alias for ``self.registry.defaults.collections``. It cannot 

2415 be set directly in isolation, but all defaults may be changed together 

2416 by assigning a new `RegistryDefaults` instance to 

2417 ``self.registry.defaults``. 

2418 """ 

2419 return self.registry.defaults.collections 

2420 

2421 @property 

2422 def run(self) -> Optional[str]: 

2423 """Name of the run this butler writes outputs to by default (`str` or 

2424 `None`). 

2425 

2426 This is an alias for ``self.registry.defaults.run``. It cannot be set 

2427 directly in isolation, but all defaults may be changed together by 

2428 assigning a new `RegistryDefaults` instance to 

2429 ``self.registry.defaults``. 

2430 """ 

2431 return self.registry.defaults.run 

2432 

2433 @property 

2434 def dimensions(self) -> DimensionUniverse: 

2435 # Docstring inherited. 

2436 return self.registry.dimensions 

2437 

2438 registry: Registry 

2439 """The object that manages dataset metadata and relationships (`Registry`). 

2440 

2441 Most operations that don't involve reading or writing butler datasets are 

2442 accessible only via `Registry` methods. 

2443 """ 

2444 

2445 datastore: Datastore 

2446 """The object that manages actual dataset storage (`Datastore`). 

2447 

2448 Direct user access to the datastore should rarely be necessary; the primary 

2449 exception is the case where a `Datastore` implementation provides extra 

2450 functionality beyond what the base class defines. 

2451 """ 

2452 

2453 storageClasses: StorageClassFactory 

2454 """An object that maps known storage class names to objects that fully 

2455 describe them (`StorageClassFactory`). 

2456 """ 

2457 

2458 _allow_put_of_predefined_dataset: bool 

2459 """Allow a put to succeed even if there is already a registry entry for it 

2460 but not a datastore record. (`bool`)."""