Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22""" 

23Butler top level classes. 

24""" 

25from __future__ import annotations 

26 

27__all__ = ( 

28 "Butler", 

29 "ButlerValidationError", 

30 "PruneCollectionsArgsError", 

31 "PurgeWithoutUnstorePruneCollectionsError", 

32 "RunWithoutPurgePruneCollectionsError", 

33 "PurgeUnsupportedPruneCollectionsError", 

34) 

35 

36 

37from collections import defaultdict 

38import contextlib 

39import logging 

40import numbers 

41import os 

42from typing import ( 

43 Any, 

44 ClassVar, 

45 Counter, 

46 Dict, 

47 Iterable, 

48 Iterator, 

49 List, 

50 MutableMapping, 

51 Optional, 

52 Set, 

53 TextIO, 

54 Tuple, 

55 Type, 

56 Union, 

57) 

58 

59try: 

60 import boto3 

61except ImportError: 

62 boto3 = None 

63 

64from lsst.utils import doImport 

65from .core import ( 

66 AmbiguousDatasetError, 

67 ButlerURI, 

68 Config, 

69 ConfigSubset, 

70 DataCoordinate, 

71 DataId, 

72 DataIdValue, 

73 DatasetRef, 

74 DatasetType, 

75 Datastore, 

76 Dimension, 

77 DimensionConfig, 

78 FileDataset, 

79 Progress, 

80 StorageClassFactory, 

81 Timespan, 

82 ValidationError, 

83) 

84from .core.repoRelocation import BUTLER_ROOT_TAG 

85from .core.utils import transactional, getClassOf 

86from ._deferredDatasetHandle import DeferredDatasetHandle 

87from ._butlerConfig import ButlerConfig 

88from .registry import Registry, RegistryConfig, RegistryDefaults, CollectionType, ConflictingDefinitionError 

89from .registry.interfaces import DatasetIdGenEnum 

90from .registry.wildcards import CollectionSearch 

91from .transfers import RepoExportContext 

92 

93log = logging.getLogger(__name__) 

94 

95 

96class ButlerValidationError(ValidationError): 

97 """There is a problem with the Butler configuration.""" 

98 pass 

99 

100 

101class PruneCollectionsArgsError(TypeError): 

102 """Base class for errors relating to Butler.pruneCollections input 

103 arguments. 

104 """ 

105 pass 

106 

107 

108class PurgeWithoutUnstorePruneCollectionsError(PruneCollectionsArgsError): 

109 """Raised when purge and unstore are both required to be True, and 

110 purge is True but unstore is False. 

111 """ 

112 

113 def __init__(self) -> None: 

114 super().__init__("Cannot pass purge=True without unstore=True.") 

115 

116 

117class RunWithoutPurgePruneCollectionsError(PruneCollectionsArgsError): 

118 """Raised when pruning a RUN collection but purge is False.""" 

119 

120 def __init__(self, collectionType: CollectionType): 

121 self.collectionType = collectionType 

122 super().__init__(f"Cannot prune RUN collection {self.collectionType.name} without purge=True.") 

123 

124 

125class PurgeUnsupportedPruneCollectionsError(PruneCollectionsArgsError): 

126 """Raised when purge is True but is not supported for the given 

127 collection.""" 

128 

129 def __init__(self, collectionType: CollectionType): 

130 self.collectionType = collectionType 

131 super().__init__( 

132 f"Cannot prune {self.collectionType} collection {self.collectionType.name} with purge=True.") 

133 

134 

135class Butler: 

136 """Main entry point for the data access system. 

137 

138 Parameters 

139 ---------- 

140 config : `ButlerConfig`, `Config` or `str`, optional. 

141 Configuration. Anything acceptable to the 

142 `ButlerConfig` constructor. If a directory path 

143 is given the configuration will be read from a ``butler.yaml`` file in 

144 that location. If `None` is given default values will be used. 

145 butler : `Butler`, optional. 

146 If provided, construct a new Butler that uses the same registry and 

147 datastore as the given one, but with the given collection and run. 

148 Incompatible with the ``config``, ``searchPaths``, and ``writeable`` 

149 arguments. 

150 collections : `str` or `Iterable` [ `str` ], optional 

151 An expression specifying the collections to be searched (in order) when 

152 reading datasets. 

153 This may be a `str` collection name or an iterable thereof. 

154 See :ref:`daf_butler_collection_expressions` for more information. 

155 These collections are not registered automatically and must be 

156 manually registered before they are used by any method, but they may be 

157 manually registered after the `Butler` is initialized. 

158 run : `str`, optional 

159 Name of the `~CollectionType.RUN` collection new datasets should be 

160 inserted into. If ``collections`` is `None` and ``run`` is not `None`, 

161 ``collections`` will be set to ``[run]``. If not `None`, this 

162 collection will automatically be registered. If this is not set (and 

163 ``writeable`` is not set either), a read-only butler will be created. 

164 searchPaths : `list` of `str`, optional 

165 Directory paths to search when calculating the full Butler 

166 configuration. Not used if the supplied config is already a 

167 `ButlerConfig`. 

168 writeable : `bool`, optional 

169 Explicitly sets whether the butler supports write operations. If not 

170 provided, a read-write butler is created if any of ``run``, ``tags``, 

171 or ``chains`` is non-empty. 

172 inferDefaults : `bool`, optional 

173 If `True` (default) infer default data ID values from the values 

174 present in the datasets in ``collections``: if all collections have the 

175 same value (or no value) for a governor dimension, that value will be 

176 the default for that dimension. Nonexistent collections are ignored. 

177 If a default value is provided explicitly for a governor dimension via 

178 ``**kwargs``, no default will be inferred for that dimension. 

179 **kwargs : `str` 

180 Default data ID key-value pairs. These may only identify "governor" 

181 dimensions like ``instrument`` and ``skymap``. 

182 

183 Examples 

184 -------- 

185 While there are many ways to control exactly how a `Butler` interacts with 

186 the collections in its `Registry`, the most common cases are still simple. 

187 

188 For a read-only `Butler` that searches one collection, do:: 

189 

190 butler = Butler("/path/to/repo", collections=["u/alice/DM-50000"]) 

191 

192 For a read-write `Butler` that writes to and reads from a 

193 `~CollectionType.RUN` collection:: 

194 

195 butler = Butler("/path/to/repo", run="u/alice/DM-50000/a") 

196 

197 The `Butler` passed to a ``PipelineTask`` is often much more complex, 

198 because we want to write to one `~CollectionType.RUN` collection but read 

199 from several others (as well):: 

200 

201 butler = Butler("/path/to/repo", run="u/alice/DM-50000/a", 

202 collections=["u/alice/DM-50000/a", 

203 "u/bob/DM-49998", 

204 "HSC/defaults"]) 

205 

206 This butler will `put` new datasets to the run ``u/alice/DM-50000/a``. 

207 Datasets will be read first from that run (since it appears first in the 

208 chain), and then from ``u/bob/DM-49998`` and finally ``HSC/defaults``. 

209 

210 Finally, one can always create a `Butler` with no collections:: 

211 

212 butler = Butler("/path/to/repo", writeable=True) 

213 

214 This can be extremely useful when you just want to use ``butler.registry``, 

215 e.g. for inserting dimension data or managing collections, or when the 

216 collections you want to use with the butler are not consistent. 

217 Passing ``writeable`` explicitly here is only necessary if you want to be 

218 able to make changes to the repo - usually the value for ``writeable`` can 

219 be guessed from the collection arguments provided, but it defaults to 

220 `False` when there are not collection arguments. 

221 """ 

222 def __init__(self, config: Union[Config, str, None] = None, *, 

223 butler: Optional[Butler] = None, 

224 collections: Any = None, 

225 run: Optional[str] = None, 

226 searchPaths: Optional[List[str]] = None, 

227 writeable: Optional[bool] = None, 

228 inferDefaults: bool = True, 

229 **kwargs: str, 

230 ): 

231 defaults = RegistryDefaults(collections=collections, run=run, infer=inferDefaults, **kwargs) 

232 # Load registry, datastore, etc. from config or existing butler. 

233 if butler is not None: 

234 if config is not None or searchPaths is not None or writeable is not None: 

235 raise TypeError("Cannot pass 'config', 'searchPaths', or 'writeable' " 

236 "arguments with 'butler' argument.") 

237 self.registry = butler.registry.copy(defaults) 

238 self.datastore = butler.datastore 

239 self.storageClasses = butler.storageClasses 

240 self._config: ButlerConfig = butler._config 

241 else: 

242 self._config = ButlerConfig(config, searchPaths=searchPaths) 

243 if "root" in self._config: 

244 butlerRoot = self._config["root"] 

245 else: 

246 butlerRoot = self._config.configDir 

247 if writeable is None: 

248 writeable = run is not None 

249 self.registry = Registry.fromConfig(self._config, butlerRoot=butlerRoot, writeable=writeable, 

250 defaults=defaults) 

251 self.datastore = Datastore.fromConfig(self._config, self.registry.getDatastoreBridgeManager(), 

252 butlerRoot=butlerRoot) 

253 self.storageClasses = StorageClassFactory() 

254 self.storageClasses.addFromConfig(self._config) 

255 if "run" in self._config or "collection" in self._config: 

256 raise ValueError("Passing a run or collection via configuration is no longer supported.") 

257 

258 GENERATION: ClassVar[int] = 3 

259 """This is a Generation 3 Butler. 

260 

261 This attribute may be removed in the future, once the Generation 2 Butler 

262 interface has been fully retired; it should only be used in transitional 

263 code. 

264 """ 

265 

266 @staticmethod 

267 def makeRepo(root: str, config: Union[Config, str, None] = None, 

268 dimensionConfig: Union[Config, str, None] = None, standalone: bool = False, 

269 searchPaths: Optional[List[str]] = None, forceConfigRoot: bool = True, 

270 outfile: Optional[str] = None, overwrite: bool = False) -> Config: 

271 """Create an empty data repository by adding a butler.yaml config 

272 to a repository root directory. 

273 

274 Parameters 

275 ---------- 

276 root : `str` or `ButlerURI` 

277 Path or URI to the root location of the new repository. Will be 

278 created if it does not exist. 

279 config : `Config` or `str`, optional 

280 Configuration to write to the repository, after setting any 

281 root-dependent Registry or Datastore config options. Can not 

282 be a `ButlerConfig` or a `ConfigSubset`. If `None`, default 

283 configuration will be used. Root-dependent config options 

284 specified in this config are overwritten if ``forceConfigRoot`` 

285 is `True`. 

286 dimensionConfig : `Config` or `str`, optional 

287 Configuration for dimensions, will be used to initialize registry 

288 database. 

289 standalone : `bool` 

290 If True, write all expanded defaults, not just customized or 

291 repository-specific settings. 

292 This (mostly) decouples the repository from the default 

293 configuration, insulating it from changes to the defaults (which 

294 may be good or bad, depending on the nature of the changes). 

295 Future *additions* to the defaults will still be picked up when 

296 initializing `Butlers` to repos created with ``standalone=True``. 

297 searchPaths : `list` of `str`, optional 

298 Directory paths to search when calculating the full butler 

299 configuration. 

300 forceConfigRoot : `bool`, optional 

301 If `False`, any values present in the supplied ``config`` that 

302 would normally be reset are not overridden and will appear 

303 directly in the output config. This allows non-standard overrides 

304 of the root directory for a datastore or registry to be given. 

305 If this parameter is `True` the values for ``root`` will be 

306 forced into the resulting config if appropriate. 

307 outfile : `str`, optional 

308 If not-`None`, the output configuration will be written to this 

309 location rather than into the repository itself. Can be a URI 

310 string. Can refer to a directory that will be used to write 

311 ``butler.yaml``. 

312 overwrite : `bool`, optional 

313 Create a new configuration file even if one already exists 

314 in the specified output location. Default is to raise 

315 an exception. 

316 

317 Returns 

318 ------- 

319 config : `Config` 

320 The updated `Config` instance written to the repo. 

321 

322 Raises 

323 ------ 

324 ValueError 

325 Raised if a ButlerConfig or ConfigSubset is passed instead of a 

326 regular Config (as these subclasses would make it impossible to 

327 support ``standalone=False``). 

328 FileExistsError 

329 Raised if the output config file already exists. 

330 os.error 

331 Raised if the directory does not exist, exists but is not a 

332 directory, or cannot be created. 

333 

334 Notes 

335 ----- 

336 Note that when ``standalone=False`` (the default), the configuration 

337 search path (see `ConfigSubset.defaultSearchPaths`) that was used to 

338 construct the repository should also be used to construct any Butlers 

339 to avoid configuration inconsistencies. 

340 """ 

341 if isinstance(config, (ButlerConfig, ConfigSubset)): 

342 raise ValueError("makeRepo must be passed a regular Config without defaults applied.") 

343 

344 # Ensure that the root of the repository exists or can be made 

345 uri = ButlerURI(root, forceDirectory=True) 

346 uri.mkdir() 

347 

348 config = Config(config) 

349 

350 # If we are creating a new repo from scratch with relative roots, 

351 # do not propagate an explicit root from the config file 

352 if "root" in config: 

353 del config["root"] 

354 

355 full = ButlerConfig(config, searchPaths=searchPaths) # this applies defaults 

356 datastoreClass: Type[Datastore] = doImport(full["datastore", "cls"]) 

357 datastoreClass.setConfigRoot(BUTLER_ROOT_TAG, config, full, overwrite=forceConfigRoot) 

358 

359 # if key exists in given config, parse it, otherwise parse the defaults 

360 # in the expanded config 

361 if config.get(("registry", "db")): 

362 registryConfig = RegistryConfig(config) 

363 else: 

364 registryConfig = RegistryConfig(full) 

365 defaultDatabaseUri = registryConfig.makeDefaultDatabaseUri(BUTLER_ROOT_TAG) 

366 if defaultDatabaseUri is not None: 

367 Config.updateParameters(RegistryConfig, config, full, 

368 toUpdate={"db": defaultDatabaseUri}, 

369 overwrite=forceConfigRoot) 

370 else: 

371 Config.updateParameters(RegistryConfig, config, full, toCopy=("db",), 

372 overwrite=forceConfigRoot) 

373 

374 if standalone: 

375 config.merge(full) 

376 else: 

377 # Always expand the registry.managers section into the per-repo 

378 # config, because after the database schema is created, it's not 

379 # allowed to change anymore. Note that in the standalone=True 

380 # branch, _everything_ in the config is expanded, so there's no 

381 # need to special case this. 

382 Config.updateParameters(RegistryConfig, config, full, toCopy=("managers",), overwrite=False) 

383 configURI: Union[str, ButlerURI] 

384 if outfile is not None: 

385 # When writing to a separate location we must include 

386 # the root of the butler repo in the config else it won't know 

387 # where to look. 

388 config["root"] = uri.geturl() 

389 configURI = outfile 

390 else: 

391 configURI = uri 

392 config.dumpToUri(configURI, overwrite=overwrite) 

393 

394 # Create Registry and populate tables 

395 registryConfig = RegistryConfig(config.get("registry")) 

396 dimensionConfig = DimensionConfig(dimensionConfig) 

397 Registry.createFromConfig(registryConfig, dimensionConfig=dimensionConfig, butlerRoot=root) 

398 

399 return config 

400 

401 @classmethod 

402 def _unpickle(cls, config: ButlerConfig, collections: Optional[CollectionSearch], run: Optional[str], 

403 defaultDataId: Dict[str, str], writeable: bool) -> Butler: 

404 """Callable used to unpickle a Butler. 

405 

406 We prefer not to use ``Butler.__init__`` directly so we can force some 

407 of its many arguments to be keyword-only (note that ``__reduce__`` 

408 can only invoke callables with positional arguments). 

409 

410 Parameters 

411 ---------- 

412 config : `ButlerConfig` 

413 Butler configuration, already coerced into a true `ButlerConfig` 

414 instance (and hence after any search paths for overrides have been 

415 utilized). 

416 collections : `CollectionSearch` 

417 Names of the default collections to read from. 

418 run : `str`, optional 

419 Name of the default `~CollectionType.RUN` collection to write to. 

420 defaultDataId : `dict` [ `str`, `str` ] 

421 Default data ID values. 

422 writeable : `bool` 

423 Whether the Butler should support write operations. 

424 

425 Returns 

426 ------- 

427 butler : `Butler` 

428 A new `Butler` instance. 

429 """ 

430 # MyPy doesn't recognize that the kwargs below are totally valid; it 

431 # seems to think '**defaultDataId* is a _positional_ argument! 

432 return cls(config=config, collections=collections, run=run, writeable=writeable, 

433 **defaultDataId) # type: ignore 

434 

435 def __reduce__(self) -> tuple: 

436 """Support pickling. 

437 """ 

438 return (Butler._unpickle, (self._config, self.collections, self.run, 

439 self.registry.defaults.dataId.byName(), 

440 self.registry.isWriteable())) 

441 

442 def __str__(self) -> str: 

443 return "Butler(collections={}, run={}, datastore='{}', registry='{}')".format( 

444 self.collections, self.run, self.datastore, self.registry) 

445 

446 def isWriteable(self) -> bool: 

447 """Return `True` if this `Butler` supports write operations. 

448 """ 

449 return self.registry.isWriteable() 

450 

451 @contextlib.contextmanager 

452 def transaction(self) -> Iterator[None]: 

453 """Context manager supporting `Butler` transactions. 

454 

455 Transactions can be nested. 

456 """ 

457 with self.registry.transaction(): 

458 with self.datastore.transaction(): 

459 yield 

460 

461 def _standardizeArgs(self, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

462 dataId: Optional[DataId] = None, **kwds: Any 

463 ) -> Tuple[DatasetType, Optional[DataId]]: 

464 """Standardize the arguments passed to several Butler APIs. 

465 

466 Parameters 

467 ---------- 

468 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

469 When `DatasetRef` the `dataId` should be `None`. 

470 Otherwise the `DatasetType` or name thereof. 

471 dataId : `dict` or `DataCoordinate` 

472 A `dict` of `Dimension` link name, value pairs that label the 

473 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

474 should be provided as the second argument. 

475 kwds 

476 Additional keyword arguments used to augment or construct a 

477 `DataCoordinate`. See `DataCoordinate.standardize` 

478 parameters. 

479 

480 Returns 

481 ------- 

482 datasetType : `DatasetType` 

483 A `DatasetType` instance extracted from ``datasetRefOrType``. 

484 dataId : `dict` or `DataId`, optional 

485 Argument that can be used (along with ``kwds``) to construct a 

486 `DataId`. 

487 

488 Notes 

489 ----- 

490 Butler APIs that conceptually need a DatasetRef also allow passing a 

491 `DatasetType` (or the name of one) and a `DataId` (or a dict and 

492 keyword arguments that can be used to construct one) separately. This 

493 method accepts those arguments and always returns a true `DatasetType` 

494 and a `DataId` or `dict`. 

495 

496 Standardization of `dict` vs `DataId` is best handled by passing the 

497 returned ``dataId`` (and ``kwds``) to `Registry` APIs, which are 

498 generally similarly flexible. 

499 """ 

500 externalDatasetType: Optional[DatasetType] = None 

501 internalDatasetType: Optional[DatasetType] = None 

502 if isinstance(datasetRefOrType, DatasetRef): 

503 if dataId is not None or kwds: 

504 raise ValueError("DatasetRef given, cannot use dataId as well") 

505 externalDatasetType = datasetRefOrType.datasetType 

506 dataId = datasetRefOrType.dataId 

507 else: 

508 # Don't check whether DataId is provided, because Registry APIs 

509 # can usually construct a better error message when it wasn't. 

510 if isinstance(datasetRefOrType, DatasetType): 

511 externalDatasetType = datasetRefOrType 

512 else: 

513 internalDatasetType = self.registry.getDatasetType(datasetRefOrType) 

514 

515 # Check that they are self-consistent 

516 if externalDatasetType is not None: 

517 internalDatasetType = self.registry.getDatasetType(externalDatasetType.name) 

518 if externalDatasetType != internalDatasetType: 

519 raise ValueError(f"Supplied dataset type ({externalDatasetType}) inconsistent with " 

520 f"registry definition ({internalDatasetType})") 

521 

522 assert internalDatasetType is not None 

523 return internalDatasetType, dataId 

524 

525 def _findDatasetRef(self, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

526 dataId: Optional[DataId] = None, *, 

527 collections: Any = None, 

528 allowUnresolved: bool = False, 

529 **kwds: Any) -> DatasetRef: 

530 """Shared logic for methods that start with a search for a dataset in 

531 the registry. 

532 

533 Parameters 

534 ---------- 

535 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

536 When `DatasetRef` the `dataId` should be `None`. 

537 Otherwise the `DatasetType` or name thereof. 

538 dataId : `dict` or `DataCoordinate`, optional 

539 A `dict` of `Dimension` link name, value pairs that label the 

540 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

541 should be provided as the first argument. 

542 collections : Any, optional 

543 Collections to be searched, overriding ``self.collections``. 

544 Can be any of the types supported by the ``collections`` argument 

545 to butler construction. 

546 allowUnresolved : `bool`, optional 

547 If `True`, return an unresolved `DatasetRef` if finding a resolved 

548 one in the `Registry` fails. Defaults to `False`. 

549 kwds 

550 Additional keyword arguments used to augment or construct a 

551 `DataId`. See `DataId` parameters. 

552 

553 Returns 

554 ------- 

555 ref : `DatasetRef` 

556 A reference to the dataset identified by the given arguments. 

557 

558 Raises 

559 ------ 

560 LookupError 

561 Raised if no matching dataset exists in the `Registry` (and 

562 ``allowUnresolved is False``). 

563 ValueError 

564 Raised if a resolved `DatasetRef` was passed as an input, but it 

565 differs from the one found in the registry. 

566 TypeError 

567 Raised if no collections were provided. 

568 """ 

569 datasetType, dataId = self._standardizeArgs(datasetRefOrType, dataId, **kwds) 

570 if isinstance(datasetRefOrType, DatasetRef): 

571 idNumber = datasetRefOrType.id 

572 else: 

573 idNumber = None 

574 timespan: Optional[Timespan] = None 

575 

576 # Process dimension records that are using record information 

577 # rather than ids 

578 newDataId: Dict[str, DataIdValue] = {} 

579 byRecord: Dict[str, Dict[str, Any]] = defaultdict(dict) 

580 

581 # if all the dataId comes from keyword parameters we do not need 

582 # to do anything here because they can't be of the form 

583 # exposure.obs_id because a "." is not allowed in a keyword parameter. 

584 if dataId: 

585 for k, v in dataId.items(): 

586 # If we have a Dimension we do not need to do anything 

587 # because it cannot be a compound key. 

588 if isinstance(k, str) and "." in k: 

589 # Someone is using a more human-readable dataId 

590 dimensionName, record = k.split(".", 1) 

591 byRecord[dimensionName][record] = v 

592 elif isinstance(k, Dimension): 

593 newDataId[k.name] = v 

594 else: 

595 newDataId[k] = v 

596 

597 # Go through the updated dataId and check the type in case someone is 

598 # using an alternate key. We have already filtered out the compound 

599 # keys dimensions.record format. 

600 not_dimensions = {} 

601 

602 # Will need to look in the dataId and the keyword arguments 

603 # and will remove them if they need to be fixed or are unrecognized. 

604 for dataIdDict in (newDataId, kwds): 

605 # Use a list so we can adjust the dict safely in the loop 

606 for dimensionName in list(dataIdDict): 

607 value = dataIdDict[dimensionName] 

608 try: 

609 dimension = self.registry.dimensions.getStaticDimensions()[dimensionName] 

610 except KeyError: 

611 # This is not a real dimension 

612 not_dimensions[dimensionName] = value 

613 del dataIdDict[dimensionName] 

614 continue 

615 

616 # Convert an integral type to an explicit int to simplify 

617 # comparisons here 

618 if isinstance(value, numbers.Integral): 

619 value = int(value) 

620 

621 if not isinstance(value, dimension.primaryKey.getPythonType()): 

622 for alternate in dimension.alternateKeys: 

623 if isinstance(value, alternate.getPythonType()): 

624 byRecord[dimensionName][alternate.name] = value 

625 del dataIdDict[dimensionName] 

626 log.debug("Converting dimension %s to %s.%s=%s", 

627 dimensionName, dimensionName, alternate.name, value) 

628 break 

629 else: 

630 log.warning("Type mismatch found for value '%r' provided for dimension %s. " 

631 "Could not find matching alternative (primary key has type %s) " 

632 "so attempting to use as-is.", 

633 value, dimensionName, dimension.primaryKey.getPythonType()) 

634 

635 # If we have some unrecognized dimensions we have to try to connect 

636 # them to records in other dimensions. This is made more complicated 

637 # by some dimensions having records with clashing names. A mitigation 

638 # is that we can tell by this point which dimensions are missing 

639 # for the DatasetType but this does not work for calibrations 

640 # where additional dimensions can be used to constrain the temporal 

641 # axis. 

642 if not_dimensions: 

643 # Calculate missing dimensions 

644 provided = set(newDataId) | set(kwds) | set(byRecord) 

645 missingDimensions = datasetType.dimensions.names - provided 

646 

647 # For calibrations we may well be needing temporal dimensions 

648 # so rather than always including all dimensions in the scan 

649 # restrict things a little. It is still possible for there 

650 # to be confusion over day_obs in visit vs exposure for example. 

651 # If we are not searching calibration collections things may 

652 # fail but they are going to fail anyway because of the 

653 # ambiguousness of the dataId... 

654 candidateDimensions: Set[str] = set() 

655 candidateDimensions.update(missingDimensions) 

656 if datasetType.isCalibration(): 

657 for dim in self.registry.dimensions.getStaticDimensions(): 

658 if dim.temporal: 

659 candidateDimensions.add(str(dim)) 

660 

661 # Look up table for the first association with a dimension 

662 guessedAssociation: Dict[str, Dict[str, Any]] = defaultdict(dict) 

663 

664 # Keep track of whether an item is associated with multiple 

665 # dimensions. 

666 counter: Counter[str] = Counter() 

667 assigned: Dict[str, Set[str]] = defaultdict(set) 

668 

669 # Go through the missing dimensions and associate the 

670 # given names with records within those dimensions 

671 for dimensionName in candidateDimensions: 

672 dimension = self.registry.dimensions.getStaticDimensions()[dimensionName] 

673 fields = dimension.metadata.names | dimension.uniqueKeys.names 

674 for field in not_dimensions: 

675 if field in fields: 

676 guessedAssociation[dimensionName][field] = not_dimensions[field] 

677 counter[dimensionName] += 1 

678 assigned[field].add(dimensionName) 

679 

680 # There is a chance we have allocated a single dataId item 

681 # to multiple dimensions. Need to decide which should be retained. 

682 # For now assume that the most popular alternative wins. 

683 # This means that day_obs with seq_num will result in 

684 # exposure.day_obs and not visit.day_obs 

685 # Also prefer an explicitly missing dimension over an inferred 

686 # temporal dimension. 

687 for fieldName, assignedDimensions in assigned.items(): 

688 if len(assignedDimensions) > 1: 

689 # Pick the most popular (preferring mandatory dimensions) 

690 requiredButMissing = assignedDimensions.intersection(missingDimensions) 

691 if requiredButMissing: 

692 candidateDimensions = requiredButMissing 

693 else: 

694 candidateDimensions = assignedDimensions 

695 

696 # Select the relevant items and get a new restricted 

697 # counter. 

698 theseCounts = {k: v for k, v in counter.items() if k in candidateDimensions} 

699 duplicatesCounter: Counter[str] = Counter() 

700 duplicatesCounter.update(theseCounts) 

701 

702 # Choose the most common. If they are equally common 

703 # we will pick the one that was found first. 

704 # Returns a list of tuples 

705 selected = duplicatesCounter.most_common(1)[0][0] 

706 

707 log.debug("Ambiguous dataId entry '%s' associated with multiple dimensions: %s." 

708 " Removed ambiguity by choosing dimension %s.", 

709 fieldName, ", ".join(assignedDimensions), selected) 

710 

711 for candidateDimension in assignedDimensions: 

712 if candidateDimension != selected: 

713 del guessedAssociation[candidateDimension][fieldName] 

714 

715 # Update the record look up dict with the new associations 

716 for dimensionName, values in guessedAssociation.items(): 

717 if values: # A dict might now be empty 

718 log.debug("Assigned non-dimension dataId keys to dimension %s: %s", 

719 dimensionName, values) 

720 byRecord[dimensionName].update(values) 

721 

722 if byRecord: 

723 # Some record specifiers were found so we need to convert 

724 # them to the Id form 

725 for dimensionName, values in byRecord.items(): 

726 if dimensionName in newDataId: 

727 log.warning("DataId specified explicit %s dimension value of %s in addition to" 

728 " general record specifiers for it of %s. Ignoring record information.", 

729 dimensionName, newDataId[dimensionName], str(values)) 

730 continue 

731 

732 # Build up a WHERE expression -- use single quotes 

733 def quote(s: Any) -> str: 

734 if isinstance(s, str): 

735 return f"'{s}'" 

736 else: 

737 return s 

738 

739 where = " AND ".join(f"{dimensionName}.{k} = {quote(v)}" 

740 for k, v in values.items()) 

741 

742 # Hopefully we get a single record that matches 

743 records = set(self.registry.queryDimensionRecords(dimensionName, dataId=newDataId, 

744 where=where, **kwds)) 

745 

746 if len(records) != 1: 

747 if len(records) > 1: 

748 log.debug("Received %d records from constraints of %s", len(records), str(values)) 

749 for r in records: 

750 log.debug("- %s", str(r)) 

751 raise RuntimeError(f"DataId specification for dimension {dimensionName} is not" 

752 f" uniquely constrained to a single dataset by {values}." 

753 f" Got {len(records)} results.") 

754 raise RuntimeError(f"DataId specification for dimension {dimensionName} matched no" 

755 f" records when constrained by {values}") 

756 

757 # Get the primary key from the real dimension object 

758 dimension = self.registry.dimensions.getStaticDimensions()[dimensionName] 

759 if not isinstance(dimension, Dimension): 

760 raise RuntimeError( 

761 f"{dimension.name} is not a true dimension, and cannot be used in data IDs." 

762 ) 

763 newDataId[dimensionName] = getattr(records.pop(), dimension.primaryKey.name) 

764 

765 # We have modified the dataId so need to switch to it 

766 dataId = newDataId 

767 

768 if datasetType.isCalibration(): 

769 # Because this is a calibration dataset, first try to make a 

770 # standardize the data ID without restricting the dimensions to 

771 # those of the dataset type requested, because there may be extra 

772 # dimensions that provide temporal information for a validity-range 

773 # lookup. 

774 dataId = DataCoordinate.standardize(dataId, universe=self.registry.dimensions, 

775 defaults=self.registry.defaults.dataId, **kwds) 

776 if dataId.graph.temporal: 

777 dataId = self.registry.expandDataId(dataId) 

778 timespan = dataId.timespan 

779 else: 

780 # Standardize the data ID to just the dimensions of the dataset 

781 # type instead of letting registry.findDataset do it, so we get the 

782 # result even if no dataset is found. 

783 dataId = DataCoordinate.standardize(dataId, graph=datasetType.dimensions, 

784 defaults=self.registry.defaults.dataId, **kwds) 

785 # Always lookup the DatasetRef, even if one is given, to ensure it is 

786 # present in the current collection. 

787 ref = self.registry.findDataset(datasetType, dataId, collections=collections, timespan=timespan) 

788 if ref is None: 

789 if allowUnresolved: 

790 return DatasetRef(datasetType, dataId) 

791 else: 

792 if collections is None: 

793 collections = self.registry.defaults.collections 

794 raise LookupError(f"Dataset {datasetType.name} with data ID {dataId} " 

795 f"could not be found in collections {collections}.") 

796 if idNumber is not None and idNumber != ref.id: 

797 if collections is None: 

798 collections = self.registry.defaults.collections 

799 raise ValueError(f"DatasetRef.id provided ({idNumber}) does not match " 

800 f"id ({ref.id}) in registry in collections {collections}.") 

801 return ref 

802 

803 @transactional 

804 def put(self, obj: Any, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

805 dataId: Optional[DataId] = None, *, 

806 run: Optional[str] = None, 

807 **kwds: Any) -> DatasetRef: 

808 """Store and register a dataset. 

809 

810 Parameters 

811 ---------- 

812 obj : `object` 

813 The dataset. 

814 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

815 When `DatasetRef` is provided, ``dataId`` should be `None`. 

816 Otherwise the `DatasetType` or name thereof. 

817 dataId : `dict` or `DataCoordinate` 

818 A `dict` of `Dimension` link name, value pairs that label the 

819 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

820 should be provided as the second argument. 

821 run : `str`, optional 

822 The name of the run the dataset should be added to, overriding 

823 ``self.run``. 

824 kwds 

825 Additional keyword arguments used to augment or construct a 

826 `DataCoordinate`. See `DataCoordinate.standardize` 

827 parameters. 

828 

829 Returns 

830 ------- 

831 ref : `DatasetRef` 

832 A reference to the stored dataset, updated with the correct id if 

833 given. 

834 

835 Raises 

836 ------ 

837 TypeError 

838 Raised if the butler is read-only or if no run has been provided. 

839 """ 

840 log.debug("Butler put: %s, dataId=%s, run=%s", datasetRefOrType, dataId, run) 

841 if not self.isWriteable(): 

842 raise TypeError("Butler is read-only.") 

843 datasetType, dataId = self._standardizeArgs(datasetRefOrType, dataId, **kwds) 

844 if isinstance(datasetRefOrType, DatasetRef) and datasetRefOrType.id is not None: 

845 raise ValueError("DatasetRef must not be in registry, must have None id") 

846 

847 # Add Registry Dataset entry. 

848 dataId = self.registry.expandDataId(dataId, graph=datasetType.dimensions, **kwds) 

849 ref, = self.registry.insertDatasets(datasetType, run=run, dataIds=[dataId]) 

850 

851 # Add Datastore entry. 

852 self.datastore.put(obj, ref) 

853 

854 return ref 

855 

856 def getDirect(self, ref: DatasetRef, *, parameters: Optional[Dict[str, Any]] = None) -> Any: 

857 """Retrieve a stored dataset. 

858 

859 Unlike `Butler.get`, this method allows datasets outside the Butler's 

860 collection to be read as long as the `DatasetRef` that identifies them 

861 can be obtained separately. 

862 

863 Parameters 

864 ---------- 

865 ref : `DatasetRef` 

866 Resolved reference to an already stored dataset. 

867 parameters : `dict` 

868 Additional StorageClass-defined options to control reading, 

869 typically used to efficiently read only a subset of the dataset. 

870 

871 Returns 

872 ------- 

873 obj : `object` 

874 The dataset. 

875 """ 

876 return self.datastore.get(ref, parameters=parameters) 

877 

878 def getDirectDeferred(self, ref: DatasetRef, *, 

879 parameters: Union[dict, None] = None) -> DeferredDatasetHandle: 

880 """Create a `DeferredDatasetHandle` which can later retrieve a dataset, 

881 from a resolved `DatasetRef`. 

882 

883 Parameters 

884 ---------- 

885 ref : `DatasetRef` 

886 Resolved reference to an already stored dataset. 

887 parameters : `dict` 

888 Additional StorageClass-defined options to control reading, 

889 typically used to efficiently read only a subset of the dataset. 

890 

891 Returns 

892 ------- 

893 obj : `DeferredDatasetHandle` 

894 A handle which can be used to retrieve a dataset at a later time. 

895 

896 Raises 

897 ------ 

898 AmbiguousDatasetError 

899 Raised if ``ref.id is None``, i.e. the reference is unresolved. 

900 """ 

901 if ref.id is None: 

902 raise AmbiguousDatasetError( 

903 f"Dataset of type {ref.datasetType.name} with data ID {ref.dataId} is not resolved." 

904 ) 

905 return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters) 

906 

907 def getDeferred(self, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

908 dataId: Optional[DataId] = None, *, 

909 parameters: Union[dict, None] = None, 

910 collections: Any = None, 

911 **kwds: Any) -> DeferredDatasetHandle: 

912 """Create a `DeferredDatasetHandle` which can later retrieve a dataset, 

913 after an immediate registry lookup. 

914 

915 Parameters 

916 ---------- 

917 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

918 When `DatasetRef` the `dataId` should be `None`. 

919 Otherwise the `DatasetType` or name thereof. 

920 dataId : `dict` or `DataCoordinate`, optional 

921 A `dict` of `Dimension` link name, value pairs that label the 

922 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

923 should be provided as the first argument. 

924 parameters : `dict` 

925 Additional StorageClass-defined options to control reading, 

926 typically used to efficiently read only a subset of the dataset. 

927 collections : Any, optional 

928 Collections to be searched, overriding ``self.collections``. 

929 Can be any of the types supported by the ``collections`` argument 

930 to butler construction. 

931 kwds 

932 Additional keyword arguments used to augment or construct a 

933 `DataId`. See `DataId` parameters. 

934 

935 Returns 

936 ------- 

937 obj : `DeferredDatasetHandle` 

938 A handle which can be used to retrieve a dataset at a later time. 

939 

940 Raises 

941 ------ 

942 LookupError 

943 Raised if no matching dataset exists in the `Registry` (and 

944 ``allowUnresolved is False``). 

945 ValueError 

946 Raised if a resolved `DatasetRef` was passed as an input, but it 

947 differs from the one found in the registry. 

948 TypeError 

949 Raised if no collections were provided. 

950 """ 

951 ref = self._findDatasetRef(datasetRefOrType, dataId, collections=collections, **kwds) 

952 return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters) 

953 

954 def get(self, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

955 dataId: Optional[DataId] = None, *, 

956 parameters: Optional[Dict[str, Any]] = None, 

957 collections: Any = None, 

958 **kwds: Any) -> Any: 

959 """Retrieve a stored dataset. 

960 

961 Parameters 

962 ---------- 

963 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

964 When `DatasetRef` the `dataId` should be `None`. 

965 Otherwise the `DatasetType` or name thereof. 

966 dataId : `dict` or `DataCoordinate` 

967 A `dict` of `Dimension` link name, value pairs that label the 

968 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

969 should be provided as the first argument. 

970 parameters : `dict` 

971 Additional StorageClass-defined options to control reading, 

972 typically used to efficiently read only a subset of the dataset. 

973 collections : Any, optional 

974 Collections to be searched, overriding ``self.collections``. 

975 Can be any of the types supported by the ``collections`` argument 

976 to butler construction. 

977 kwds 

978 Additional keyword arguments used to augment or construct a 

979 `DataCoordinate`. See `DataCoordinate.standardize` 

980 parameters. 

981 

982 Returns 

983 ------- 

984 obj : `object` 

985 The dataset. 

986 

987 Raises 

988 ------ 

989 ValueError 

990 Raised if a resolved `DatasetRef` was passed as an input, but it 

991 differs from the one found in the registry. 

992 LookupError 

993 Raised if no matching dataset exists in the `Registry`. 

994 TypeError 

995 Raised if no collections were provided. 

996 

997 Notes 

998 ----- 

999 When looking up datasets in a `~CollectionType.CALIBRATION` collection, 

1000 this method requires that the given data ID include temporal dimensions 

1001 beyond the dimensions of the dataset type itself, in order to find the 

1002 dataset with the appropriate validity range. For example, a "bias" 

1003 dataset with native dimensions ``{instrument, detector}`` could be 

1004 fetched with a ``{instrument, detector, exposure}`` data ID, because 

1005 ``exposure`` is a temporal dimension. 

1006 """ 

1007 log.debug("Butler get: %s, dataId=%s, parameters=%s", datasetRefOrType, dataId, parameters) 

1008 ref = self._findDatasetRef(datasetRefOrType, dataId, collections=collections, **kwds) 

1009 return self.getDirect(ref, parameters=parameters) 

1010 

1011 def getURIs(self, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

1012 dataId: Optional[DataId] = None, *, 

1013 predict: bool = False, 

1014 collections: Any = None, 

1015 run: Optional[str] = None, 

1016 **kwds: Any) -> Tuple[Optional[ButlerURI], Dict[str, ButlerURI]]: 

1017 """Returns the URIs associated with the dataset. 

1018 

1019 Parameters 

1020 ---------- 

1021 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

1022 When `DatasetRef` the `dataId` should be `None`. 

1023 Otherwise the `DatasetType` or name thereof. 

1024 dataId : `dict` or `DataCoordinate` 

1025 A `dict` of `Dimension` link name, value pairs that label the 

1026 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

1027 should be provided as the first argument. 

1028 predict : `bool` 

1029 If `True`, allow URIs to be returned of datasets that have not 

1030 been written. 

1031 collections : Any, optional 

1032 Collections to be searched, overriding ``self.collections``. 

1033 Can be any of the types supported by the ``collections`` argument 

1034 to butler construction. 

1035 run : `str`, optional 

1036 Run to use for predictions, overriding ``self.run``. 

1037 kwds 

1038 Additional keyword arguments used to augment or construct a 

1039 `DataCoordinate`. See `DataCoordinate.standardize` 

1040 parameters. 

1041 

1042 Returns 

1043 ------- 

1044 primary : `ButlerURI` 

1045 The URI to the primary artifact associated with this dataset. 

1046 If the dataset was disassembled within the datastore this 

1047 may be `None`. 

1048 components : `dict` 

1049 URIs to any components associated with the dataset artifact. 

1050 Can be empty if there are no components. 

1051 """ 

1052 ref = self._findDatasetRef(datasetRefOrType, dataId, allowUnresolved=predict, 

1053 collections=collections, **kwds) 

1054 if ref.id is None: # only possible if predict is True 

1055 if run is None: 

1056 run = self.run 

1057 if run is None: 

1058 raise TypeError("Cannot predict location with run=None.") 

1059 # Lie about ID, because we can't guess it, and only 

1060 # Datastore.getURIs() will ever see it (and it doesn't use it). 

1061 ref = ref.resolved(id=0, run=run) 

1062 return self.datastore.getURIs(ref, predict) 

1063 

1064 def getURI(self, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

1065 dataId: Optional[DataId] = None, *, 

1066 predict: bool = False, 

1067 collections: Any = None, 

1068 run: Optional[str] = None, 

1069 **kwds: Any) -> ButlerURI: 

1070 """Return the URI to the Dataset. 

1071 

1072 Parameters 

1073 ---------- 

1074 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

1075 When `DatasetRef` the `dataId` should be `None`. 

1076 Otherwise the `DatasetType` or name thereof. 

1077 dataId : `dict` or `DataCoordinate` 

1078 A `dict` of `Dimension` link name, value pairs that label the 

1079 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

1080 should be provided as the first argument. 

1081 predict : `bool` 

1082 If `True`, allow URIs to be returned of datasets that have not 

1083 been written. 

1084 collections : Any, optional 

1085 Collections to be searched, overriding ``self.collections``. 

1086 Can be any of the types supported by the ``collections`` argument 

1087 to butler construction. 

1088 run : `str`, optional 

1089 Run to use for predictions, overriding ``self.run``. 

1090 kwds 

1091 Additional keyword arguments used to augment or construct a 

1092 `DataCoordinate`. See `DataCoordinate.standardize` 

1093 parameters. 

1094 

1095 Returns 

1096 ------- 

1097 uri : `ButlerURI` 

1098 URI pointing to the Dataset within the datastore. If the 

1099 Dataset does not exist in the datastore, and if ``predict`` is 

1100 `True`, the URI will be a prediction and will include a URI 

1101 fragment "#predicted". 

1102 If the datastore does not have entities that relate well 

1103 to the concept of a URI the returned URI string will be 

1104 descriptive. The returned URI is not guaranteed to be obtainable. 

1105 

1106 Raises 

1107 ------ 

1108 LookupError 

1109 A URI has been requested for a dataset that does not exist and 

1110 guessing is not allowed. 

1111 ValueError 

1112 Raised if a resolved `DatasetRef` was passed as an input, but it 

1113 differs from the one found in the registry. 

1114 TypeError 

1115 Raised if no collections were provided. 

1116 RuntimeError 

1117 Raised if a URI is requested for a dataset that consists of 

1118 multiple artifacts. 

1119 """ 

1120 primary, components = self.getURIs(datasetRefOrType, dataId=dataId, predict=predict, 

1121 collections=collections, run=run, **kwds) 

1122 

1123 if primary is None or components: 

1124 raise RuntimeError(f"Dataset ({datasetRefOrType}) includes distinct URIs for components. " 

1125 "Use Butler.getURIs() instead.") 

1126 return primary 

1127 

1128 def datasetExists(self, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

1129 dataId: Optional[DataId] = None, *, 

1130 collections: Any = None, 

1131 **kwds: Any) -> bool: 

1132 """Return True if the Dataset is actually present in the Datastore. 

1133 

1134 Parameters 

1135 ---------- 

1136 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

1137 When `DatasetRef` the `dataId` should be `None`. 

1138 Otherwise the `DatasetType` or name thereof. 

1139 dataId : `dict` or `DataCoordinate` 

1140 A `dict` of `Dimension` link name, value pairs that label the 

1141 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

1142 should be provided as the first argument. 

1143 collections : Any, optional 

1144 Collections to be searched, overriding ``self.collections``. 

1145 Can be any of the types supported by the ``collections`` argument 

1146 to butler construction. 

1147 kwds 

1148 Additional keyword arguments used to augment or construct a 

1149 `DataCoordinate`. See `DataCoordinate.standardize` 

1150 parameters. 

1151 

1152 Raises 

1153 ------ 

1154 LookupError 

1155 Raised if the dataset is not even present in the Registry. 

1156 ValueError 

1157 Raised if a resolved `DatasetRef` was passed as an input, but it 

1158 differs from the one found in the registry. 

1159 TypeError 

1160 Raised if no collections were provided. 

1161 """ 

1162 ref = self._findDatasetRef(datasetRefOrType, dataId, collections=collections, **kwds) 

1163 return self.datastore.exists(ref) 

1164 

1165 def removeRuns(self, names: Iterable[str], unstore: bool = True) -> None: 

1166 """Remove one or more `~CollectionType.RUN` collections and the 

1167 datasets within them. 

1168 

1169 Parameters 

1170 ---------- 

1171 names : `Iterable` [ `str` ] 

1172 The names of the collections to remove. 

1173 unstore : `bool`, optional 

1174 If `True` (default), delete datasets from all datastores in which 

1175 they are present, and attempt to rollback the registry deletions if 

1176 datastore deletions fail (which may not always be possible). If 

1177 `False`, datastore records for these datasets are still removed, 

1178 but any artifacts (e.g. files) will not be. 

1179 

1180 Raises 

1181 ------ 

1182 TypeError 

1183 Raised if one or more collections are not of type 

1184 `~CollectionType.RUN`. 

1185 """ 

1186 if not self.isWriteable(): 

1187 raise TypeError("Butler is read-only.") 

1188 names = list(names) 

1189 refs: List[DatasetRef] = [] 

1190 for name in names: 

1191 collectionType = self.registry.getCollectionType(name) 

1192 if collectionType is not CollectionType.RUN: 

1193 raise TypeError(f"The collection type of '{name}' is {collectionType.name}, not RUN.") 

1194 refs.extend(self.registry.queryDatasets(..., collections=name, findFirst=True)) 

1195 with self.registry.transaction(): 

1196 if unstore: 

1197 for ref in refs: 

1198 if self.datastore.exists(ref): 

1199 self.datastore.trash(ref) 

1200 else: 

1201 self.datastore.forget(refs) 

1202 for name in names: 

1203 self.registry.removeCollection(name) 

1204 if unstore: 

1205 # Point of no return for removing artifacts 

1206 self.datastore.emptyTrash() 

1207 

1208 def pruneCollection(self, name: str, purge: bool = False, unstore: bool = False, 

1209 unlink: Optional[List[str]] = None) -> None: 

1210 """Remove a collection and possibly prune datasets within it. 

1211 

1212 Parameters 

1213 ---------- 

1214 name : `str` 

1215 Name of the collection to remove. If this is a 

1216 `~CollectionType.TAGGED` or `~CollectionType.CHAINED` collection, 

1217 datasets within the collection are not modified unless ``unstore`` 

1218 is `True`. If this is a `~CollectionType.RUN` collection, 

1219 ``purge`` and ``unstore`` must be `True`, and all datasets in it 

1220 are fully removed from the data repository. 

1221 purge : `bool`, optional 

1222 If `True`, permit `~CollectionType.RUN` collections to be removed, 

1223 fully removing datasets within them. Requires ``unstore=True`` as 

1224 well as an added precaution against accidental deletion. Must be 

1225 `False` (default) if the collection is not a ``RUN``. 

1226 unstore: `bool`, optional 

1227 If `True`, remove all datasets in the collection from all 

1228 datastores in which they appear. 

1229 unlink: `list` [`str`], optional 

1230 Before removing the given `collection` unlink it from from these 

1231 parent collections. 

1232 

1233 Raises 

1234 ------ 

1235 TypeError 

1236 Raised if the butler is read-only or arguments are mutually 

1237 inconsistent. 

1238 """ 

1239 

1240 # See pruneDatasets comments for more information about the logic here; 

1241 # the cases are almost the same, but here we can rely on Registry to 

1242 # take care everything but Datastore deletion when we remove the 

1243 # collection. 

1244 if not self.isWriteable(): 

1245 raise TypeError("Butler is read-only.") 

1246 collectionType = self.registry.getCollectionType(name) 

1247 if purge and not unstore: 

1248 raise PurgeWithoutUnstorePruneCollectionsError() 

1249 if collectionType is CollectionType.RUN and not purge: 

1250 raise RunWithoutPurgePruneCollectionsError(collectionType) 

1251 if collectionType is not CollectionType.RUN and purge: 

1252 raise PurgeUnsupportedPruneCollectionsError(collectionType) 

1253 

1254 def remove(child: str, parent: str) -> None: 

1255 """Remove a child collection from a parent collection.""" 

1256 # Remove child from parent. 

1257 chain = list(self.registry.getCollectionChain(parent)) 

1258 try: 

1259 chain.remove(name) 

1260 except ValueError as e: 

1261 raise RuntimeError(f"{name} is not a child of {parent}") from e 

1262 self.registry.setCollectionChain(parent, chain) 

1263 

1264 with self.registry.transaction(): 

1265 if (unlink): 

1266 for parent in unlink: 

1267 remove(name, parent) 

1268 if unstore: 

1269 for ref in self.registry.queryDatasets(..., collections=name, findFirst=True): 

1270 if self.datastore.exists(ref): 

1271 self.datastore.trash(ref) 

1272 self.registry.removeCollection(name) 

1273 if unstore: 

1274 # Point of no return for removing artifacts 

1275 self.datastore.emptyTrash() 

1276 

1277 def pruneDatasets(self, refs: Iterable[DatasetRef], *, 

1278 disassociate: bool = True, 

1279 unstore: bool = False, 

1280 tags: Iterable[str] = (), 

1281 purge: bool = False, 

1282 run: Optional[str] = None) -> None: 

1283 """Remove one or more datasets from a collection and/or storage. 

1284 

1285 Parameters 

1286 ---------- 

1287 refs : `~collections.abc.Iterable` of `DatasetRef` 

1288 Datasets to prune. These must be "resolved" references (not just 

1289 a `DatasetType` and data ID). 

1290 disassociate : `bool`, optional 

1291 Disassociate pruned datasets from ``tags``, or from all collections 

1292 if ``purge=True``. 

1293 unstore : `bool`, optional 

1294 If `True` (`False` is default) remove these datasets from all 

1295 datastores known to this butler. Note that this will make it 

1296 impossible to retrieve these datasets even via other collections. 

1297 Datasets that are already not stored are ignored by this option. 

1298 tags : `Iterable` [ `str` ], optional 

1299 `~CollectionType.TAGGED` collections to disassociate the datasets 

1300 from. Ignored if ``disassociate`` is `False` or ``purge`` is 

1301 `True`. 

1302 purge : `bool`, optional 

1303 If `True` (`False` is default), completely remove the dataset from 

1304 the `Registry`. To prevent accidental deletions, ``purge`` may 

1305 only be `True` if all of the following conditions are met: 

1306 

1307 - All given datasets are in the given run. 

1308 - ``disassociate`` is `True`; 

1309 - ``unstore`` is `True`. 

1310 

1311 This mode may remove provenance information from datasets other 

1312 than those provided, and should be used with extreme care. 

1313 

1314 Raises 

1315 ------ 

1316 TypeError 

1317 Raised if the butler is read-only, if no collection was provided, 

1318 or the conditions for ``purge=True`` were not met. 

1319 """ 

1320 if not self.isWriteable(): 

1321 raise TypeError("Butler is read-only.") 

1322 if purge: 

1323 if not disassociate: 

1324 raise TypeError("Cannot pass purge=True without disassociate=True.") 

1325 if not unstore: 

1326 raise TypeError("Cannot pass purge=True without unstore=True.") 

1327 elif disassociate: 

1328 tags = tuple(tags) 

1329 if not tags: 

1330 raise TypeError("No tags provided but disassociate=True.") 

1331 for tag in tags: 

1332 collectionType = self.registry.getCollectionType(tag) 

1333 if collectionType is not CollectionType.TAGGED: 

1334 raise TypeError(f"Cannot disassociate from collection '{tag}' " 

1335 f"of non-TAGGED type {collectionType.name}.") 

1336 # Transform possibly-single-pass iterable into something we can iterate 

1337 # over multiple times. 

1338 refs = list(refs) 

1339 # Pruning a component of a DatasetRef makes no sense since registry 

1340 # doesn't know about components and datastore might not store 

1341 # components in a separate file 

1342 for ref in refs: 

1343 if ref.datasetType.component(): 

1344 raise ValueError(f"Can not prune a component of a dataset (ref={ref})") 

1345 # We don't need an unreliable Datastore transaction for this, because 

1346 # we've been extra careful to ensure that Datastore.trash only involves 

1347 # mutating the Registry (it can _look_ at Datastore-specific things, 

1348 # but shouldn't change them), and hence all operations here are 

1349 # Registry operations. 

1350 with self.registry.transaction(): 

1351 if unstore: 

1352 for ref in refs: 

1353 # There is a difference between a concrete composite 

1354 # and virtual composite. In a virtual composite the 

1355 # datastore is never given the top level DatasetRef. In 

1356 # the concrete composite the datastore knows all the 

1357 # refs and will clean up itself if asked to remove the 

1358 # parent ref. We can not check configuration for this 

1359 # since we can not trust that the configuration is the 

1360 # same. We therefore have to ask if the ref exists or 

1361 # not. This is consistent with the fact that we want 

1362 # to ignore already-removed-from-datastore datasets 

1363 # anyway. 

1364 if self.datastore.exists(ref): 

1365 self.datastore.trash(ref) 

1366 if purge: 

1367 self.registry.removeDatasets(refs) 

1368 elif disassociate: 

1369 assert tags, "Guaranteed by earlier logic in this function." 

1370 for tag in tags: 

1371 self.registry.disassociate(tag, refs) 

1372 # We've exited the Registry transaction, and apparently committed. 

1373 # (if there was an exception, everything rolled back, and it's as if 

1374 # nothing happened - and we never get here). 

1375 # Datastore artifacts are not yet gone, but they're clearly marked 

1376 # as trash, so if we fail to delete now because of (e.g.) filesystem 

1377 # problems we can try again later, and if manual administrative 

1378 # intervention is required, it's pretty clear what that should entail: 

1379 # deleting everything on disk and in private Datastore tables that is 

1380 # in the dataset_location_trash table. 

1381 if unstore: 

1382 # Point of no return for removing artifacts 

1383 self.datastore.emptyTrash() 

1384 

1385 @transactional 

1386 def ingest(self, *datasets: FileDataset, transfer: Optional[str] = "auto", run: Optional[str] = None, 

1387 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

1388 ) -> None: 

1389 """Store and register one or more datasets that already exist on disk. 

1390 

1391 Parameters 

1392 ---------- 

1393 datasets : `FileDataset` 

1394 Each positional argument is a struct containing information about 

1395 a file to be ingested, including its path (either absolute or 

1396 relative to the datastore root, if applicable), a `DatasetRef`, 

1397 and optionally a formatter class or its fully-qualified string 

1398 name. If a formatter is not provided, the formatter that would be 

1399 used for `put` is assumed. On successful return, all 

1400 `FileDataset.ref` attributes will have their `DatasetRef.id` 

1401 attribute populated and all `FileDataset.formatter` attributes will 

1402 be set to the formatter class used. `FileDataset.path` attributes 

1403 may be modified to put paths in whatever the datastore considers a 

1404 standardized form. 

1405 transfer : `str`, optional 

1406 If not `None`, must be one of 'auto', 'move', 'copy', 'direct', 

1407 'hardlink', 'relsymlink' or 'symlink', indicating how to transfer 

1408 the file. 

1409 run : `str`, optional 

1410 The name of the run ingested datasets should be added to, 

1411 overriding ``self.run``. 

1412 idGenerationMode : `DatasetIdGenEnum`, optional 

1413 Specifies option for generating dataset IDs. By default unique IDs 

1414 are generated for each inserted dataset. 

1415 

1416 Raises 

1417 ------ 

1418 TypeError 

1419 Raised if the butler is read-only or if no run was provided. 

1420 NotImplementedError 

1421 Raised if the `Datastore` does not support the given transfer mode. 

1422 DatasetTypeNotSupportedError 

1423 Raised if one or more files to be ingested have a dataset type that 

1424 is not supported by the `Datastore`.. 

1425 FileNotFoundError 

1426 Raised if one of the given files does not exist. 

1427 FileExistsError 

1428 Raised if transfer is not `None` but the (internal) location the 

1429 file would be moved to is already occupied. 

1430 

1431 Notes 

1432 ----- 

1433 This operation is not fully exception safe: if a database operation 

1434 fails, the given `FileDataset` instances may be only partially updated. 

1435 

1436 It is atomic in terms of database operations (they will either all 

1437 succeed or all fail) providing the database engine implements 

1438 transactions correctly. It will attempt to be atomic in terms of 

1439 filesystem operations as well, but this cannot be implemented 

1440 rigorously for most datastores. 

1441 """ 

1442 if not self.isWriteable(): 

1443 raise TypeError("Butler is read-only.") 

1444 progress = Progress("lsst.daf.butler.Butler.ingest", level=logging.DEBUG) 

1445 # Reorganize the inputs so they're grouped by DatasetType and then 

1446 # data ID. We also include a list of DatasetRefs for each FileDataset 

1447 # to hold the resolved DatasetRefs returned by the Registry, before 

1448 # it's safe to swap them into FileDataset.refs. 

1449 # Some type annotation aliases to make that clearer: 

1450 GroupForType = Dict[DataCoordinate, Tuple[FileDataset, List[DatasetRef]]] 

1451 GroupedData = MutableMapping[DatasetType, GroupForType] 

1452 # The actual data structure: 

1453 groupedData: GroupedData = defaultdict(dict) 

1454 # And the nested loop that populates it: 

1455 for dataset in progress.wrap(datasets, desc="Grouping by dataset type"): 

1456 # This list intentionally shared across the inner loop, since it's 

1457 # associated with `dataset`. 

1458 resolvedRefs: List[DatasetRef] = [] 

1459 for ref in dataset.refs: 

1460 if ref.dataId in groupedData[ref.datasetType]: 

1461 raise ConflictingDefinitionError(f"Ingest conflict. Dataset {dataset.path} has same" 

1462 " DataId as other ingest dataset" 

1463 f" {groupedData[ref.datasetType][ref.dataId][0].path} " 

1464 f" ({ref.dataId})") 

1465 groupedData[ref.datasetType][ref.dataId] = (dataset, resolvedRefs) 

1466 

1467 # Now we can bulk-insert into Registry for each DatasetType. 

1468 allResolvedRefs: List[DatasetRef] = [] 

1469 for datasetType, groupForType in progress.iter_item_chunks(groupedData.items(), 

1470 desc="Bulk-inserting datasets by type"): 

1471 refs = self.registry.insertDatasets( 

1472 datasetType, 

1473 dataIds=groupForType.keys(), 

1474 run=run, 

1475 expand=self.datastore.needs_expanded_data_ids(transfer, datasetType), 

1476 idGenerationMode=idGenerationMode, 

1477 ) 

1478 # Append those resolved DatasetRefs to the new lists we set up for 

1479 # them. 

1480 for ref, (_, resolvedRefs) in zip(refs, groupForType.values()): 

1481 resolvedRefs.append(ref) 

1482 

1483 # Go back to the original FileDatasets to replace their refs with the 

1484 # new resolved ones, and also build a big list of all refs. 

1485 allResolvedRefs = [] 

1486 for groupForType in progress.iter_chunks(groupedData.values(), 

1487 desc="Reassociating resolved dataset refs with files"): 

1488 for dataset, resolvedRefs in groupForType.values(): 

1489 dataset.refs = resolvedRefs 

1490 allResolvedRefs.extend(resolvedRefs) 

1491 

1492 # Bulk-insert everything into Datastore. 

1493 self.datastore.ingest(*datasets, transfer=transfer) 

1494 

1495 @contextlib.contextmanager 

1496 def export(self, *, directory: Optional[str] = None, 

1497 filename: Optional[str] = None, 

1498 format: Optional[str] = None, 

1499 transfer: Optional[str] = None) -> Iterator[RepoExportContext]: 

1500 """Export datasets from the repository represented by this `Butler`. 

1501 

1502 This method is a context manager that returns a helper object 

1503 (`RepoExportContext`) that is used to indicate what information from 

1504 the repository should be exported. 

1505 

1506 Parameters 

1507 ---------- 

1508 directory : `str`, optional 

1509 Directory dataset files should be written to if ``transfer`` is not 

1510 `None`. 

1511 filename : `str`, optional 

1512 Name for the file that will include database information associated 

1513 with the exported datasets. If this is not an absolute path and 

1514 ``directory`` is not `None`, it will be written to ``directory`` 

1515 instead of the current working directory. Defaults to 

1516 "export.{format}". 

1517 format : `str`, optional 

1518 File format for the database information file. If `None`, the 

1519 extension of ``filename`` will be used. 

1520 transfer : `str`, optional 

1521 Transfer mode passed to `Datastore.export`. 

1522 

1523 Raises 

1524 ------ 

1525 TypeError 

1526 Raised if the set of arguments passed is inconsistent. 

1527 

1528 Examples 

1529 -------- 

1530 Typically the `Registry.queryDataIds` and `Registry.queryDatasets` 

1531 methods are used to provide the iterables over data IDs and/or datasets 

1532 to be exported:: 

1533 

1534 with butler.export("exports.yaml") as export: 

1535 # Export all flats, but none of the dimension element rows 

1536 # (i.e. data ID information) associated with them. 

1537 export.saveDatasets(butler.registry.queryDatasets("flat"), 

1538 elements=()) 

1539 # Export all datasets that start with "deepCoadd_" and all of 

1540 # their associated data ID information. 

1541 export.saveDatasets(butler.registry.queryDatasets("deepCoadd_*")) 

1542 """ 

1543 if directory is None and transfer is not None: 

1544 raise TypeError("Cannot transfer without providing a directory.") 

1545 if transfer == "move": 

1546 raise TypeError("Transfer may not be 'move': export is read-only") 

1547 if format is None: 

1548 if filename is None: 

1549 raise TypeError("At least one of 'filename' or 'format' must be provided.") 

1550 else: 

1551 _, format = os.path.splitext(filename) 

1552 elif filename is None: 

1553 filename = f"export.{format}" 

1554 if directory is not None: 

1555 filename = os.path.join(directory, filename) 

1556 BackendClass = getClassOf(self._config["repo_transfer_formats"][format]["export"]) 

1557 with open(filename, 'w') as stream: 

1558 backend = BackendClass(stream) 

1559 try: 

1560 helper = RepoExportContext(self.registry, self.datastore, backend=backend, 

1561 directory=directory, transfer=transfer) 

1562 yield helper 

1563 except BaseException: 

1564 raise 

1565 else: 

1566 helper._finish() 

1567 

1568 def import_(self, *, directory: Optional[str] = None, 

1569 filename: Union[str, TextIO, None] = None, 

1570 format: Optional[str] = None, 

1571 transfer: Optional[str] = None, 

1572 skip_dimensions: Optional[Set] = None, 

1573 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

1574 reuseIds: bool = False) -> None: 

1575 """Import datasets into this repository that were exported from a 

1576 different butler repository via `~lsst.daf.butler.Butler.export`. 

1577 

1578 Parameters 

1579 ---------- 

1580 directory : `str`, optional 

1581 Directory containing dataset files to import from. If `None`, 

1582 ``filename`` and all dataset file paths specified therein must 

1583 be absolute. 

1584 filename : `str` or `TextIO`, optional 

1585 A stream or name of file that contains database information 

1586 associated with the exported datasets, typically generated by 

1587 `~lsst.daf.butler.Butler.export`. If this a string (name) and 

1588 is not an absolute path, does not exist in the current working 

1589 directory, and ``directory`` is not `None`, it is assumed to be in 

1590 ``directory``. Defaults to "export.{format}". 

1591 format : `str`, optional 

1592 File format for ``filename``. If `None`, the extension of 

1593 ``filename`` will be used. 

1594 transfer : `str`, optional 

1595 Transfer mode passed to `~lsst.daf.butler.Datastore.ingest`. 

1596 skip_dimensions : `set`, optional 

1597 Names of dimensions that should be skipped and not imported. 

1598 idGenerationMode : `DatasetIdGenEnum`, optional 

1599 Specifies option for generating dataset IDs when IDs are not 

1600 provided or their type does not match backend type. By default 

1601 unique IDs are generated for each inserted dataset. 

1602 reuseIds : `bool`, optional 

1603 If `True` then forces re-use of imported dataset IDs for integer 

1604 IDs which are normally generated as auto-incremented; exception 

1605 will be raised if imported IDs clash with existing ones. This 

1606 option has no effect on the use of globally-unique IDs which are 

1607 always re-used (or generated if integer IDs are being imported). 

1608 

1609 Raises 

1610 ------ 

1611 TypeError 

1612 Raised if the set of arguments passed is inconsistent, or if the 

1613 butler is read-only. 

1614 """ 

1615 if not self.isWriteable(): 

1616 raise TypeError("Butler is read-only.") 

1617 if format is None: 

1618 if filename is None: 

1619 raise TypeError("At least one of 'filename' or 'format' must be provided.") 

1620 else: 

1621 _, format = os.path.splitext(filename) # type: ignore 

1622 elif filename is None: 

1623 filename = f"export.{format}" 

1624 if isinstance(filename, str) and directory is not None and not os.path.exists(filename): 

1625 filename = os.path.join(directory, filename) 

1626 BackendClass = getClassOf(self._config["repo_transfer_formats"][format]["import"]) 

1627 

1628 def doImport(importStream: TextIO) -> None: 

1629 backend = BackendClass(importStream, self.registry) 

1630 backend.register() 

1631 with self.transaction(): 

1632 backend.load(self.datastore, directory=directory, transfer=transfer, 

1633 skip_dimensions=skip_dimensions, idGenerationMode=idGenerationMode, 

1634 reuseIds=reuseIds) 

1635 

1636 if isinstance(filename, str): 

1637 with open(filename, "r") as stream: 

1638 doImport(stream) 

1639 else: 

1640 doImport(filename) 

1641 

1642 def validateConfiguration(self, logFailures: bool = False, 

1643 datasetTypeNames: Optional[Iterable[str]] = None, 

1644 ignore: Iterable[str] = None) -> None: 

1645 """Validate butler configuration. 

1646 

1647 Checks that each `DatasetType` can be stored in the `Datastore`. 

1648 

1649 Parameters 

1650 ---------- 

1651 logFailures : `bool`, optional 

1652 If `True`, output a log message for every validation error 

1653 detected. 

1654 datasetTypeNames : iterable of `str`, optional 

1655 The `DatasetType` names that should be checked. This allows 

1656 only a subset to be selected. 

1657 ignore : iterable of `str`, optional 

1658 Names of DatasetTypes to skip over. This can be used to skip 

1659 known problems. If a named `DatasetType` corresponds to a 

1660 composite, all components of that `DatasetType` will also be 

1661 ignored. 

1662 

1663 Raises 

1664 ------ 

1665 ButlerValidationError 

1666 Raised if there is some inconsistency with how this Butler 

1667 is configured. 

1668 """ 

1669 if datasetTypeNames: 

1670 datasetTypes = [self.registry.getDatasetType(name) for name in datasetTypeNames] 

1671 else: 

1672 datasetTypes = list(self.registry.queryDatasetTypes()) 

1673 

1674 # filter out anything from the ignore list 

1675 if ignore: 

1676 ignore = set(ignore) 

1677 datasetTypes = [e for e in datasetTypes 

1678 if e.name not in ignore and e.nameAndComponent()[0] not in ignore] 

1679 else: 

1680 ignore = set() 

1681 

1682 # Find all the registered instruments 

1683 instruments = set( 

1684 record.name for record in self.registry.queryDimensionRecords("instrument") 

1685 ) 

1686 

1687 # For each datasetType that has an instrument dimension, create 

1688 # a DatasetRef for each defined instrument 

1689 datasetRefs = [] 

1690 

1691 for datasetType in datasetTypes: 

1692 if "instrument" in datasetType.dimensions: 

1693 for instrument in instruments: 

1694 datasetRef = DatasetRef(datasetType, {"instrument": instrument}, # type: ignore 

1695 conform=False) 

1696 datasetRefs.append(datasetRef) 

1697 

1698 entities: List[Union[DatasetType, DatasetRef]] = [] 

1699 entities.extend(datasetTypes) 

1700 entities.extend(datasetRefs) 

1701 

1702 datastoreErrorStr = None 

1703 try: 

1704 self.datastore.validateConfiguration(entities, logFailures=logFailures) 

1705 except ValidationError as e: 

1706 datastoreErrorStr = str(e) 

1707 

1708 # Also check that the LookupKeys used by the datastores match 

1709 # registry and storage class definitions 

1710 keys = self.datastore.getLookupKeys() 

1711 

1712 failedNames = set() 

1713 failedDataId = set() 

1714 for key in keys: 

1715 if key.name is not None: 

1716 if key.name in ignore: 

1717 continue 

1718 

1719 # skip if specific datasetType names were requested and this 

1720 # name does not match 

1721 if datasetTypeNames and key.name not in datasetTypeNames: 

1722 continue 

1723 

1724 # See if it is a StorageClass or a DatasetType 

1725 if key.name in self.storageClasses: 

1726 pass 

1727 else: 

1728 try: 

1729 self.registry.getDatasetType(key.name) 

1730 except KeyError: 

1731 if logFailures: 

1732 log.critical("Key '%s' does not correspond to a DatasetType or StorageClass", key) 

1733 failedNames.add(key) 

1734 else: 

1735 # Dimensions are checked for consistency when the Butler 

1736 # is created and rendezvoused with a universe. 

1737 pass 

1738 

1739 # Check that the instrument is a valid instrument 

1740 # Currently only support instrument so check for that 

1741 if key.dataId: 

1742 dataIdKeys = set(key.dataId) 

1743 if set(["instrument"]) != dataIdKeys: 

1744 if logFailures: 

1745 log.critical("Key '%s' has unsupported DataId override", key) 

1746 failedDataId.add(key) 

1747 elif key.dataId["instrument"] not in instruments: 

1748 if logFailures: 

1749 log.critical("Key '%s' has unknown instrument", key) 

1750 failedDataId.add(key) 

1751 

1752 messages = [] 

1753 

1754 if datastoreErrorStr: 

1755 messages.append(datastoreErrorStr) 

1756 

1757 for failed, msg in ((failedNames, "Keys without corresponding DatasetType or StorageClass entry: "), 

1758 (failedDataId, "Keys with bad DataId entries: ")): 

1759 if failed: 

1760 msg += ", ".join(str(k) for k in failed) 

1761 messages.append(msg) 

1762 

1763 if messages: 

1764 raise ValidationError(";\n".join(messages)) 

1765 

1766 @property 

1767 def collections(self) -> CollectionSearch: 

1768 """The collections to search by default, in order (`CollectionSearch`). 

1769 

1770 This is an alias for ``self.registry.defaults.collections``. It cannot 

1771 be set directly in isolation, but all defaults may be changed together 

1772 by assigning a new `RegistryDefaults` instance to 

1773 ``self.registry.defaults``. 

1774 """ 

1775 return self.registry.defaults.collections 

1776 

1777 @property 

1778 def run(self) -> Optional[str]: 

1779 """Name of the run this butler writes outputs to by default (`str` or 

1780 `None`). 

1781 

1782 This is an alias for ``self.registry.defaults.run``. It cannot be set 

1783 directly in isolation, but all defaults may be changed together by 

1784 assigning a new `RegistryDefaults` instance to 

1785 ``self.registry.defaults``. 

1786 """ 

1787 return self.registry.defaults.run 

1788 

1789 registry: Registry 

1790 """The object that manages dataset metadata and relationships (`Registry`). 

1791 

1792 Most operations that don't involve reading or writing butler datasets are 

1793 accessible only via `Registry` methods. 

1794 """ 

1795 

1796 datastore: Datastore 

1797 """The object that manages actual dataset storage (`Datastore`). 

1798 

1799 Direct user access to the datastore should rarely be necessary; the primary 

1800 exception is the case where a `Datastore` implementation provides extra 

1801 functionality beyond what the base class defines. 

1802 """ 

1803 

1804 storageClasses: StorageClassFactory 

1805 """An object that maps known storage class names to objects that fully 

1806 describe them (`StorageClassFactory`). 

1807 """