Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22""" 

23Butler top level classes. 

24""" 

25from __future__ import annotations 

26 

27__all__ = ( 

28 "Butler", 

29 "ButlerValidationError", 

30 "PruneCollectionsArgsError", 

31 "PurgeWithoutUnstorePruneCollectionsError", 

32 "RunWithoutPurgePruneCollectionsError", 

33 "PurgeUnsupportedPruneCollectionsError", 

34) 

35 

36 

37from collections import defaultdict 

38import contextlib 

39import logging 

40import numbers 

41import os 

42from typing import ( 

43 Any, 

44 ClassVar, 

45 Counter, 

46 Dict, 

47 Iterable, 

48 Iterator, 

49 List, 

50 Mapping, 

51 MutableMapping, 

52 Optional, 

53 Set, 

54 TextIO, 

55 Tuple, 

56 Type, 

57 Union, 

58) 

59 

60try: 

61 import boto3 

62except ImportError: 

63 boto3 = None 

64 

65from lsst.utils import doImport 

66from .core import ( 

67 AmbiguousDatasetError, 

68 ButlerURI, 

69 Config, 

70 ConfigSubset, 

71 DataCoordinate, 

72 DataId, 

73 DataIdValue, 

74 DatasetRef, 

75 DatasetType, 

76 Datastore, 

77 Dimension, 

78 DimensionConfig, 

79 FileDataset, 

80 StorageClassFactory, 

81 Timespan, 

82 ValidationError, 

83) 

84from .core.repoRelocation import BUTLER_ROOT_TAG 

85from .core.utils import transactional, getClassOf 

86from ._deferredDatasetHandle import DeferredDatasetHandle 

87from ._butlerConfig import ButlerConfig 

88from .registry import Registry, RegistryConfig, CollectionType 

89from .registry.wildcards import CollectionSearch 

90from .transfers import RepoExportContext 

91 

92log = logging.getLogger(__name__) 

93 

94 

95class ButlerValidationError(ValidationError): 

96 """There is a problem with the Butler configuration.""" 

97 pass 

98 

99 

100class PruneCollectionsArgsError(TypeError): 

101 """Base class for errors relating to Butler.pruneCollections input 

102 arguments. 

103 """ 

104 pass 

105 

106 

107class PurgeWithoutUnstorePruneCollectionsError(PruneCollectionsArgsError): 

108 """Raised when purge and unstore are both required to be True, and 

109 purge is True but unstore is False. 

110 """ 

111 

112 def __init__(self) -> None: 

113 super().__init__("Cannot pass purge=True without unstore=True.") 

114 

115 

116class RunWithoutPurgePruneCollectionsError(PruneCollectionsArgsError): 

117 """Raised when pruning a RUN collection but purge is False.""" 

118 

119 def __init__(self, collectionType: CollectionType): 

120 self.collectionType = collectionType 

121 super().__init__(f"Cannot prune RUN collection {self.collectionType.name} without purge=True.") 

122 

123 

124class PurgeUnsupportedPruneCollectionsError(PruneCollectionsArgsError): 

125 """Raised when purge is True but is not supported for the given 

126 collection.""" 

127 

128 def __init__(self, collectionType: CollectionType): 

129 self.collectionType = collectionType 

130 super().__init__( 

131 f"Cannot prune {self.collectionType} collection {self.collectionType.name} with purge=True.") 

132 

133 

134class Butler: 

135 """Main entry point for the data access system. 

136 

137 Parameters 

138 ---------- 

139 config : `ButlerConfig`, `Config` or `str`, optional. 

140 Configuration. Anything acceptable to the 

141 `ButlerConfig` constructor. If a directory path 

142 is given the configuration will be read from a ``butler.yaml`` file in 

143 that location. If `None` is given default values will be used. 

144 butler : `Butler`, optional. 

145 If provided, construct a new Butler that uses the same registry and 

146 datastore as the given one, but with the given collection and run. 

147 Incompatible with the ``config``, ``searchPaths``, and ``writeable`` 

148 arguments. 

149 collections : `Any`, optional 

150 An expression specifying the collections to be searched (in order) when 

151 reading datasets, and optionally dataset type restrictions on them. 

152 This may be: 

153 - a `str` collection name; 

154 - a tuple of (collection name, *dataset type restriction*); 

155 - an iterable of either of the above; 

156 - a mapping from `str` to *dataset type restriction*. 

157 

158 See :ref:`daf_butler_collection_expressions` for more information, 

159 including the definition of a *dataset type restriction*. All 

160 collections must either already exist or be specified to be created 

161 by other arguments. 

162 run : `str`, optional 

163 Name of the run datasets should be output to. If the run 

164 does not exist, it will be created. If ``collections`` is `None`, it 

165 will be set to ``[run]``. If this is not set (and ``writeable`` is 

166 not set either), a read-only butler will be created. 

167 tags : `Iterable` [ `str` ], optional 

168 A list of `~CollectionType.TAGGED` collections that datasets should be 

169 associated with in `put` or `ingest` and disassociated from in 

170 `pruneDatasets`. If any of these collections does not exist, it will 

171 be created. 

172 chains : `Mapping` [ `str`, `Iterable` [ `str` ] ], optional 

173 A mapping from the names of new `~CollectionType.CHAINED` collections 

174 to an expression identifying their child collections (which takes the 

175 same form as the ``collections`` argument. Chains may be nested only 

176 if children precede their parents in this mapping. 

177 searchPaths : `list` of `str`, optional 

178 Directory paths to search when calculating the full Butler 

179 configuration. Not used if the supplied config is already a 

180 `ButlerConfig`. 

181 writeable : `bool`, optional 

182 Explicitly sets whether the butler supports write operations. If not 

183 provided, a read-write butler is created if any of ``run``, ``tags``, 

184 or ``chains`` is non-empty. 

185 

186 Examples 

187 -------- 

188 While there are many ways to control exactly how a `Butler` interacts with 

189 the collections in its `Registry`, the most common cases are still simple. 

190 

191 For a read-only `Butler` that searches one collection, do:: 

192 

193 butler = Butler("/path/to/repo", collections=["u/alice/DM-50000"]) 

194 

195 For a read-write `Butler` that writes to and reads from a 

196 `~CollectionType.RUN` collection:: 

197 

198 butler = Butler("/path/to/repo", run="u/alice/DM-50000/a") 

199 

200 The `Butler` passed to a ``PipelineTask`` is often much more complex, 

201 because we want to write to one `~CollectionType.RUN` collection but read 

202 from several others (as well), while defining a new 

203 `~CollectionType.CHAINED` collection that combines them all:: 

204 

205 butler = Butler("/path/to/repo", run="u/alice/DM-50000/a", 

206 collections=["u/alice/DM-50000"], 

207 chains={ 

208 "u/alice/DM-50000": ["u/alice/DM-50000/a", 

209 "u/bob/DM-49998", 

210 "raw/hsc"] 

211 }) 

212 

213 This butler will `put` new datasets to the run ``u/alice/DM-50000/a``, but 

214 they'll also be available from the chained collection ``u/alice/DM-50000``. 

215 Datasets will be read first from that run (since it appears first in the 

216 chain), and then from ``u/bob/DM-49998`` and finally ``raw/hsc``. 

217 If ``u/alice/DM-50000`` had already been defined, the ``chain`` argument 

218 would be unnecessary. We could also construct a butler that performs 

219 exactly the same `put` and `get` operations without actually creating a 

220 chained collection, just by passing multiple items is ``collections``:: 

221 

222 butler = Butler("/path/to/repo", run="u/alice/DM-50000/a", 

223 collections=["u/alice/DM-50000/a", 

224 "u/bob/DM-49998", 

225 "raw/hsc"]) 

226 

227 Finally, one can always create a `Butler` with no collections:: 

228 

229 butler = Butler("/path/to/repo", writeable=True) 

230 

231 This can be extremely useful when you just want to use ``butler.registry``, 

232 e.g. for inserting dimension data or managing collections, or when the 

233 collections you want to use with the butler are not consistent. 

234 Passing ``writeable`` explicitly here is only necessary if you want to be 

235 able to make changes to the repo - usually the value for ``writeable`` is 

236 can be guessed from the collection arguments provided, but it defaults to 

237 `False` when there are not collection arguments. 

238 """ 

239 def __init__(self, config: Union[Config, str, None] = None, *, 

240 butler: Optional[Butler] = None, 

241 collections: Any = None, 

242 run: Optional[str] = None, 

243 tags: Iterable[str] = (), 

244 chains: Optional[Mapping[str, Any]] = None, 

245 searchPaths: Optional[List[str]] = None, 

246 writeable: Optional[bool] = None, 

247 ): 

248 # Transform any single-pass iterator into an actual sequence so we 

249 # can see if its empty 

250 self.tags = tuple(tags) 

251 # Load registry, datastore, etc. from config or existing butler. 

252 if butler is not None: 

253 if config is not None or searchPaths is not None or writeable is not None: 

254 raise TypeError("Cannot pass 'config', 'searchPaths', or 'writeable' " 

255 "arguments with 'butler' argument.") 

256 self.registry = butler.registry 

257 self.datastore = butler.datastore 

258 self.storageClasses = butler.storageClasses 

259 self._config: ButlerConfig = butler._config 

260 else: 

261 self._config = ButlerConfig(config, searchPaths=searchPaths) 

262 if "root" in self._config: 

263 butlerRoot = self._config["root"] 

264 else: 

265 butlerRoot = self._config.configDir 

266 if writeable is None: 

267 writeable = run is not None or chains is not None or bool(self.tags) 

268 self.registry = Registry.fromConfig(self._config, butlerRoot=butlerRoot, writeable=writeable) 

269 self.datastore = Datastore.fromConfig(self._config, self.registry.getDatastoreBridgeManager(), 

270 butlerRoot=butlerRoot) 

271 self.storageClasses = StorageClassFactory() 

272 self.storageClasses.addFromConfig(self._config) 

273 # Check the many collection arguments for consistency and create any 

274 # needed collections that don't exist. 

275 if collections is None: 

276 if run is not None: 

277 collections = (run,) 

278 else: 

279 collections = () 

280 self.collections = CollectionSearch.fromExpression(collections) 

281 if chains is None: 

282 chains = {} 

283 self.run = run 

284 if "run" in self._config or "collection" in self._config: 

285 raise ValueError("Passing a run or collection via configuration is no longer supported.") 

286 if self.run is not None: 

287 self.registry.registerCollection(self.run, type=CollectionType.RUN) 

288 for tag in self.tags: 

289 self.registry.registerCollection(tag, type=CollectionType.TAGGED) 

290 for parent, children in chains.items(): 

291 self.registry.registerCollection(parent, type=CollectionType.CHAINED) 

292 self.registry.setCollectionChain(parent, children) 

293 

294 GENERATION: ClassVar[int] = 3 

295 """This is a Generation 3 Butler. 

296 

297 This attribute may be removed in the future, once the Generation 2 Butler 

298 interface has been fully retired; it should only be used in transitional 

299 code. 

300 """ 

301 

302 @staticmethod 

303 def makeRepo(root: str, config: Union[Config, str, None] = None, 

304 dimensionConfig: Union[Config, str, None] = None, standalone: bool = False, 

305 searchPaths: Optional[List[str]] = None, forceConfigRoot: bool = True, 

306 outfile: Optional[str] = None, overwrite: bool = False) -> Config: 

307 """Create an empty data repository by adding a butler.yaml config 

308 to a repository root directory. 

309 

310 Parameters 

311 ---------- 

312 root : `str` or `ButlerURI` 

313 Path or URI to the root location of the new repository. Will be 

314 created if it does not exist. 

315 config : `Config` or `str`, optional 

316 Configuration to write to the repository, after setting any 

317 root-dependent Registry or Datastore config options. Can not 

318 be a `ButlerConfig` or a `ConfigSubset`. If `None`, default 

319 configuration will be used. Root-dependent config options 

320 specified in this config are overwritten if ``forceConfigRoot`` 

321 is `True`. 

322 dimensionConfig : `Config` or `str`, optional 

323 Configuration for dimensions, will be used to initialize registry 

324 database. 

325 standalone : `bool` 

326 If True, write all expanded defaults, not just customized or 

327 repository-specific settings. 

328 This (mostly) decouples the repository from the default 

329 configuration, insulating it from changes to the defaults (which 

330 may be good or bad, depending on the nature of the changes). 

331 Future *additions* to the defaults will still be picked up when 

332 initializing `Butlers` to repos created with ``standalone=True``. 

333 searchPaths : `list` of `str`, optional 

334 Directory paths to search when calculating the full butler 

335 configuration. 

336 forceConfigRoot : `bool`, optional 

337 If `False`, any values present in the supplied ``config`` that 

338 would normally be reset are not overridden and will appear 

339 directly in the output config. This allows non-standard overrides 

340 of the root directory for a datastore or registry to be given. 

341 If this parameter is `True` the values for ``root`` will be 

342 forced into the resulting config if appropriate. 

343 outfile : `str`, optional 

344 If not-`None`, the output configuration will be written to this 

345 location rather than into the repository itself. Can be a URI 

346 string. Can refer to a directory that will be used to write 

347 ``butler.yaml``. 

348 overwrite : `bool`, optional 

349 Create a new configuration file even if one already exists 

350 in the specified output location. Default is to raise 

351 an exception. 

352 

353 Returns 

354 ------- 

355 config : `Config` 

356 The updated `Config` instance written to the repo. 

357 

358 Raises 

359 ------ 

360 ValueError 

361 Raised if a ButlerConfig or ConfigSubset is passed instead of a 

362 regular Config (as these subclasses would make it impossible to 

363 support ``standalone=False``). 

364 FileExistsError 

365 Raised if the output config file already exists. 

366 os.error 

367 Raised if the directory does not exist, exists but is not a 

368 directory, or cannot be created. 

369 

370 Notes 

371 ----- 

372 Note that when ``standalone=False`` (the default), the configuration 

373 search path (see `ConfigSubset.defaultSearchPaths`) that was used to 

374 construct the repository should also be used to construct any Butlers 

375 to avoid configuration inconsistencies. 

376 """ 

377 if isinstance(config, (ButlerConfig, ConfigSubset)): 

378 raise ValueError("makeRepo must be passed a regular Config without defaults applied.") 

379 

380 # Ensure that the root of the repository exists or can be made 

381 uri = ButlerURI(root, forceDirectory=True) 

382 uri.mkdir() 

383 

384 config = Config(config) 

385 

386 # If we are creating a new repo from scratch with relative roots, 

387 # do not propagate an explicit root from the config file 

388 if "root" in config: 

389 del config["root"] 

390 

391 full = ButlerConfig(config, searchPaths=searchPaths) # this applies defaults 

392 datastoreClass: Type[Datastore] = doImport(full["datastore", "cls"]) 

393 datastoreClass.setConfigRoot(BUTLER_ROOT_TAG, config, full, overwrite=forceConfigRoot) 

394 

395 # if key exists in given config, parse it, otherwise parse the defaults 

396 # in the expanded config 

397 if config.get(("registry", "db")): 

398 registryConfig = RegistryConfig(config) 

399 else: 

400 registryConfig = RegistryConfig(full) 

401 defaultDatabaseUri = registryConfig.makeDefaultDatabaseUri(BUTLER_ROOT_TAG) 

402 if defaultDatabaseUri is not None: 

403 Config.updateParameters(RegistryConfig, config, full, 

404 toUpdate={"db": defaultDatabaseUri}, 

405 overwrite=forceConfigRoot) 

406 else: 

407 Config.updateParameters(RegistryConfig, config, full, toCopy=("db",), 

408 overwrite=forceConfigRoot) 

409 

410 if standalone: 

411 config.merge(full) 

412 else: 

413 # Always expand the registry.managers section into the per-repo 

414 # config, because after the database schema is created, it's not 

415 # allowed to change anymore. Note that in the standalone=True 

416 # branch, _everything_ in the config is expanded, so there's no 

417 # need to special case this. 

418 Config.updateParameters(RegistryConfig, config, full, toCopy=("managers",), overwrite=False) 

419 configURI: Union[str, ButlerURI] 

420 if outfile is not None: 

421 # When writing to a separate location we must include 

422 # the root of the butler repo in the config else it won't know 

423 # where to look. 

424 config["root"] = uri.geturl() 

425 configURI = outfile 

426 else: 

427 configURI = uri 

428 config.dumpToUri(configURI, overwrite=overwrite) 

429 

430 # Create Registry and populate tables 

431 registryConfig = RegistryConfig(config.get("registry")) 

432 dimensionConfig = DimensionConfig(dimensionConfig) 

433 Registry.createFromConfig(registryConfig, dimensionConfig=dimensionConfig, butlerRoot=root) 

434 

435 return config 

436 

437 @classmethod 

438 def _unpickle(cls, config: ButlerConfig, collections: Optional[CollectionSearch], run: Optional[str], 

439 tags: Tuple[str, ...], writeable: bool) -> Butler: 

440 """Callable used to unpickle a Butler. 

441 

442 We prefer not to use ``Butler.__init__`` directly so we can force some 

443 of its many arguments to be keyword-only (note that ``__reduce__`` 

444 can only invoke callables with positional arguments). 

445 

446 Parameters 

447 ---------- 

448 config : `ButlerConfig` 

449 Butler configuration, already coerced into a true `ButlerConfig` 

450 instance (and hence after any search paths for overrides have been 

451 utilized). 

452 collections : `CollectionSearch` 

453 Names of collections to read from. 

454 run : `str`, optional 

455 Name of `~CollectionType.RUN` collection to write to. 

456 tags : `tuple` [`str`] 

457 Names of `~CollectionType.TAGGED` collections to associate with. 

458 writeable : `bool` 

459 Whether the Butler should support write operations. 

460 

461 Returns 

462 ------- 

463 butler : `Butler` 

464 A new `Butler` instance. 

465 """ 

466 return cls(config=config, collections=collections, run=run, tags=tags, writeable=writeable) 

467 

468 def __reduce__(self) -> tuple: 

469 """Support pickling. 

470 """ 

471 return (Butler._unpickle, (self._config, self.collections, self.run, self.tags, 

472 self.registry.isWriteable())) 

473 

474 def __str__(self) -> str: 

475 return "Butler(collections={}, run={}, tags={}, datastore='{}', registry='{}')".format( 

476 self.collections, self.run, self.tags, self.datastore, self.registry) 

477 

478 def isWriteable(self) -> bool: 

479 """Return `True` if this `Butler` supports write operations. 

480 """ 

481 return self.registry.isWriteable() 

482 

483 @contextlib.contextmanager 

484 def transaction(self) -> Iterator[None]: 

485 """Context manager supporting `Butler` transactions. 

486 

487 Transactions can be nested. 

488 """ 

489 with self.registry.transaction(): 

490 with self.datastore.transaction(): 

491 yield 

492 

493 def _standardizeArgs(self, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

494 dataId: Optional[DataId] = None, **kwds: Any 

495 ) -> Tuple[DatasetType, Optional[DataId]]: 

496 """Standardize the arguments passed to several Butler APIs. 

497 

498 Parameters 

499 ---------- 

500 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

501 When `DatasetRef` the `dataId` should be `None`. 

502 Otherwise the `DatasetType` or name thereof. 

503 dataId : `dict` or `DataCoordinate` 

504 A `dict` of `Dimension` link name, value pairs that label the 

505 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

506 should be provided as the second argument. 

507 kwds 

508 Additional keyword arguments used to augment or construct a 

509 `DataCoordinate`. See `DataCoordinate.standardize` 

510 parameters. 

511 

512 Returns 

513 ------- 

514 datasetType : `DatasetType` 

515 A `DatasetType` instance extracted from ``datasetRefOrType``. 

516 dataId : `dict` or `DataId`, optional 

517 Argument that can be used (along with ``kwds``) to construct a 

518 `DataId`. 

519 

520 Notes 

521 ----- 

522 Butler APIs that conceptually need a DatasetRef also allow passing a 

523 `DatasetType` (or the name of one) and a `DataId` (or a dict and 

524 keyword arguments that can be used to construct one) separately. This 

525 method accepts those arguments and always returns a true `DatasetType` 

526 and a `DataId` or `dict`. 

527 

528 Standardization of `dict` vs `DataId` is best handled by passing the 

529 returned ``dataId`` (and ``kwds``) to `Registry` APIs, which are 

530 generally similarly flexible. 

531 """ 

532 externalDatasetType: Optional[DatasetType] = None 

533 internalDatasetType: Optional[DatasetType] = None 

534 if isinstance(datasetRefOrType, DatasetRef): 

535 if dataId is not None or kwds: 

536 raise ValueError("DatasetRef given, cannot use dataId as well") 

537 externalDatasetType = datasetRefOrType.datasetType 

538 dataId = datasetRefOrType.dataId 

539 else: 

540 # Don't check whether DataId is provided, because Registry APIs 

541 # can usually construct a better error message when it wasn't. 

542 if isinstance(datasetRefOrType, DatasetType): 

543 externalDatasetType = datasetRefOrType 

544 else: 

545 internalDatasetType = self.registry.getDatasetType(datasetRefOrType) 

546 

547 # Check that they are self-consistent 

548 if externalDatasetType is not None: 

549 internalDatasetType = self.registry.getDatasetType(externalDatasetType.name) 

550 if externalDatasetType != internalDatasetType: 

551 raise ValueError(f"Supplied dataset type ({externalDatasetType}) inconsistent with " 

552 f"registry definition ({internalDatasetType})") 

553 

554 assert internalDatasetType is not None 

555 return internalDatasetType, dataId 

556 

557 def _findDatasetRef(self, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

558 dataId: Optional[DataId] = None, *, 

559 collections: Any = None, 

560 allowUnresolved: bool = False, 

561 **kwds: Any) -> DatasetRef: 

562 """Shared logic for methods that start with a search for a dataset in 

563 the registry. 

564 

565 Parameters 

566 ---------- 

567 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

568 When `DatasetRef` the `dataId` should be `None`. 

569 Otherwise the `DatasetType` or name thereof. 

570 dataId : `dict` or `DataCoordinate`, optional 

571 A `dict` of `Dimension` link name, value pairs that label the 

572 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

573 should be provided as the first argument. 

574 collections : Any, optional 

575 Collections to be searched, overriding ``self.collections``. 

576 Can be any of the types supported by the ``collections`` argument 

577 to butler construction. 

578 allowUnresolved : `bool`, optional 

579 If `True`, return an unresolved `DatasetRef` if finding a resolved 

580 one in the `Registry` fails. Defaults to `False`. 

581 kwds 

582 Additional keyword arguments used to augment or construct a 

583 `DataId`. See `DataId` parameters. 

584 

585 Returns 

586 ------- 

587 ref : `DatasetRef` 

588 A reference to the dataset identified by the given arguments. 

589 

590 Raises 

591 ------ 

592 LookupError 

593 Raised if no matching dataset exists in the `Registry` (and 

594 ``allowUnresolved is False``). 

595 ValueError 

596 Raised if a resolved `DatasetRef` was passed as an input, but it 

597 differs from the one found in the registry. 

598 TypeError 

599 Raised if no collections were provided. 

600 """ 

601 datasetType, dataId = self._standardizeArgs(datasetRefOrType, dataId, **kwds) 

602 if isinstance(datasetRefOrType, DatasetRef): 

603 idNumber = datasetRefOrType.id 

604 else: 

605 idNumber = None 

606 timespan: Optional[Timespan] = None 

607 

608 # Process dimension records that are using record information 

609 # rather than ids 

610 newDataId: Dict[str, DataIdValue] = {} 

611 byRecord: Dict[str, Dict[str, Any]] = defaultdict(dict) 

612 

613 # if all the dataId comes from keyword parameters we do not need 

614 # to do anything here because they can't be of the form 

615 # exposure.obs_id because a "." is not allowed in a keyword parameter. 

616 if dataId: 

617 for k, v in dataId.items(): 

618 # If we have a Dimension we do not need to do anything 

619 # because it cannot be a compound key. 

620 if isinstance(k, str) and "." in k: 

621 # Someone is using a more human-readable dataId 

622 dimensionName, record = k.split(".", 1) 

623 byRecord[dimensionName][record] = v 

624 elif isinstance(k, Dimension): 

625 newDataId[k.name] = v 

626 else: 

627 newDataId[k] = v 

628 

629 # Go through the updated dataId and check the type in case someone is 

630 # using an alternate key. We have already filtered out the compound 

631 # keys dimensions.record format. 

632 not_dimensions = {} 

633 

634 # Will need to look in the dataId and the keyword arguments 

635 # and will remove them if they need to be fixed or are unrecognized. 

636 for dataIdDict in (newDataId, kwds): 

637 # Use a list so we can adjust the dict safely in the loop 

638 for dimensionName in list(dataIdDict): 

639 value = dataIdDict[dimensionName] 

640 try: 

641 dimension = self.registry.dimensions.getStaticDimensions()[dimensionName] 

642 except KeyError: 

643 # This is not a real dimension 

644 not_dimensions[dimensionName] = value 

645 del dataIdDict[dimensionName] 

646 continue 

647 

648 # Convert an integral type to an explicit int to simplify 

649 # comparisons here 

650 if isinstance(value, numbers.Integral): 

651 value = int(value) 

652 

653 if not isinstance(value, dimension.primaryKey.getPythonType()): 

654 for alternate in dimension.alternateKeys: 

655 if isinstance(value, alternate.getPythonType()): 

656 byRecord[dimensionName][alternate.name] = value 

657 del dataIdDict[dimensionName] 

658 log.debug("Converting dimension %s to %s.%s=%s", 

659 dimensionName, dimensionName, alternate.name, value) 

660 break 

661 else: 

662 log.warning("Type mismatch found for value '%r' provided for dimension %s. " 

663 "Could not find matching alternative (primary key has type %s) " 

664 "so attempting to use as-is.", 

665 value, dimensionName, dimension.primaryKey.getPythonType()) 

666 

667 # If we have some unrecognized dimensions we have to try to connect 

668 # them to records in other dimensions. This is made more complicated 

669 # by some dimensions having records with clashing names. A mitigation 

670 # is that we can tell by this point which dimensions are missing 

671 # for the DatasetType but this does not work for calibrations 

672 # where additional dimensions can be used to constrain the temporal 

673 # axis. 

674 if not_dimensions: 

675 # Calculate missing dimensions 

676 provided = set(newDataId) | set(kwds) | set(byRecord) 

677 missingDimensions = datasetType.dimensions.names - provided 

678 

679 # For calibrations we may well be needing temporal dimensions 

680 # so rather than always including all dimensions in the scan 

681 # restrict things a little. It is still possible for there 

682 # to be confusion over day_obs in visit vs exposure for example. 

683 # If we are not searching calibration collections things may 

684 # fail but they are going to fail anyway because of the 

685 # ambiguousness of the dataId... 

686 candidateDimensions: Set[str] = set() 

687 candidateDimensions.update(missingDimensions) 

688 if datasetType.isCalibration(): 

689 for dim in self.registry.dimensions.getStaticDimensions(): 

690 if dim.temporal: 

691 candidateDimensions.add(str(dim)) 

692 

693 # Look up table for the first association with a dimension 

694 guessedAssociation: Dict[str, Dict[str, Any]] = defaultdict(dict) 

695 

696 # Keep track of whether an item is associated with multiple 

697 # dimensions. 

698 counter: Counter[str] = Counter() 

699 assigned: Dict[str, Set[str]] = defaultdict(set) 

700 

701 # Go through the missing dimensions and associate the 

702 # given names with records within those dimensions 

703 for dimensionName in candidateDimensions: 

704 dimension = self.registry.dimensions.getStaticDimensions()[dimensionName] 

705 fields = dimension.metadata.names | dimension.uniqueKeys.names 

706 for field in not_dimensions: 

707 if field in fields: 

708 guessedAssociation[dimensionName][field] = not_dimensions[field] 

709 counter[dimensionName] += 1 

710 assigned[field].add(dimensionName) 

711 

712 # There is a chance we have allocated a single dataId item 

713 # to multiple dimensions. Need to decide which should be retained. 

714 # For now assume that the most popular alternative wins. 

715 # This means that day_obs with seq_num will result in 

716 # exposure.day_obs and not visit.day_obs 

717 # Also prefer an explicitly missing dimension over an inferred 

718 # temporal dimension. 

719 for fieldName, assignedDimensions in assigned.items(): 

720 if len(assignedDimensions) > 1: 

721 # Pick the most popular (preferring mandatory dimensions) 

722 requiredButMissing = assignedDimensions.intersection(missingDimensions) 

723 if requiredButMissing: 

724 candidateDimensions = requiredButMissing 

725 else: 

726 candidateDimensions = assignedDimensions 

727 

728 # Select the relevant items and get a new restricted 

729 # counter. 

730 theseCounts = {k: v for k, v in counter.items() if k in candidateDimensions} 

731 duplicatesCounter: Counter[str] = Counter() 

732 duplicatesCounter.update(theseCounts) 

733 

734 # Choose the most common. If they are equally common 

735 # we will pick the one that was found first. 

736 # Returns a list of tuples 

737 selected = duplicatesCounter.most_common(1)[0][0] 

738 

739 log.debug("Ambiguous dataId entry '%s' associated with multiple dimensions: %s." 

740 " Removed ambiguity by choosing dimension %s.", 

741 fieldName, ", ".join(assignedDimensions), selected) 

742 

743 for candidateDimension in assignedDimensions: 

744 if candidateDimension != selected: 

745 del guessedAssociation[candidateDimension][fieldName] 

746 

747 # Update the record look up dict with the new associations 

748 for dimensionName, values in guessedAssociation.items(): 

749 if values: # A dict might now be empty 

750 log.debug("Assigned non-dimension dataId keys to dimension %s: %s", 

751 dimensionName, values) 

752 byRecord[dimensionName].update(values) 

753 

754 if byRecord: 

755 # Some record specifiers were found so we need to convert 

756 # them to the Id form 

757 for dimensionName, values in byRecord.items(): 

758 if dimensionName in newDataId: 

759 log.warning("DataId specified explicit %s dimension value of %s in addition to" 

760 " general record specifiers for it of %s. Ignoring record information.", 

761 dimensionName, newDataId[dimensionName], str(values)) 

762 continue 

763 

764 # Build up a WHERE expression -- use single quotes 

765 def quote(s: Any) -> str: 

766 if isinstance(s, str): 

767 return f"'{s}'" 

768 else: 

769 return s 

770 

771 where = " AND ".join(f"{dimensionName}.{k} = {quote(v)}" 

772 for k, v in values.items()) 

773 

774 # Hopefully we get a single record that matches 

775 records = set(self.registry.queryDimensionRecords(dimensionName, dataId=newDataId, 

776 where=where, **kwds)) 

777 

778 if len(records) != 1: 

779 if len(records) > 1: 

780 log.debug("Received %d records from constraints of %s", len(records), str(values)) 

781 for r in records: 

782 log.debug("- %s", str(r)) 

783 raise RuntimeError(f"DataId specification for dimension {dimensionName} is not" 

784 f" uniquely constrained to a single dataset by {values}." 

785 f" Got {len(records)} results.") 

786 raise RuntimeError(f"DataId specification for dimension {dimensionName} matched no" 

787 f" records when constrained by {values}") 

788 

789 # Get the primary key from the real dimension object 

790 dimension = self.registry.dimensions.getStaticDimensions()[dimensionName] 

791 if not isinstance(dimension, Dimension): 

792 raise RuntimeError( 

793 f"{dimension.name} is not a true dimension, and cannot be used in data IDs." 

794 ) 

795 newDataId[dimensionName] = getattr(records.pop(), dimension.primaryKey.name) 

796 

797 # We have modified the dataId so need to switch to it 

798 dataId = newDataId 

799 

800 if datasetType.isCalibration(): 

801 # Because this is a calibration dataset, first try to make a 

802 # standardize the data ID without restricting the dimensions to 

803 # those of the dataset type requested, because there may be extra 

804 # dimensions that provide temporal information for a validity-range 

805 # lookup. 

806 dataId = DataCoordinate.standardize(dataId, universe=self.registry.dimensions, **kwds) 

807 if dataId.graph.temporal: 

808 dataId = self.registry.expandDataId(dataId) 

809 timespan = dataId.timespan 

810 else: 

811 # Standardize the data ID to just the dimensions of the dataset 

812 # type instead of letting registry.findDataset do it, so we get the 

813 # result even if no dataset is found. 

814 dataId = DataCoordinate.standardize(dataId, graph=datasetType.dimensions, **kwds) 

815 if collections is None: 

816 collections = self.collections 

817 if not collections: 

818 raise TypeError("No input collections provided.") 

819 else: 

820 collections = CollectionSearch.fromExpression(collections) 

821 # Always lookup the DatasetRef, even if one is given, to ensure it is 

822 # present in the current collection. 

823 ref = self.registry.findDataset(datasetType, dataId, collections=collections, timespan=timespan) 

824 if ref is None: 

825 if allowUnresolved: 

826 return DatasetRef(datasetType, dataId) 

827 else: 

828 raise LookupError(f"Dataset {datasetType.name} with data ID {dataId} " 

829 f"could not be found in collections {collections}.") 

830 if idNumber is not None and idNumber != ref.id: 

831 raise ValueError(f"DatasetRef.id provided ({idNumber}) does not match " 

832 f"id ({ref.id}) in registry in collections {collections}.") 

833 return ref 

834 

835 @transactional 

836 def put(self, obj: Any, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

837 dataId: Optional[DataId] = None, *, 

838 run: Optional[str] = None, 

839 tags: Optional[Iterable[str]] = None, 

840 **kwds: Any) -> DatasetRef: 

841 """Store and register a dataset. 

842 

843 Parameters 

844 ---------- 

845 obj : `object` 

846 The dataset. 

847 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

848 When `DatasetRef` is provided, ``dataId`` should be `None`. 

849 Otherwise the `DatasetType` or name thereof. 

850 dataId : `dict` or `DataCoordinate` 

851 A `dict` of `Dimension` link name, value pairs that label the 

852 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

853 should be provided as the second argument. 

854 run : `str`, optional 

855 The name of the run the dataset should be added to, overriding 

856 ``self.run``. 

857 tags : `Iterable` [ `str` ], optional 

858 The names of a `~CollectionType.TAGGED` collections to associate 

859 the dataset with, overriding ``self.tags``. These collections 

860 must have already been added to the `Registry`. 

861 kwds 

862 Additional keyword arguments used to augment or construct a 

863 `DataCoordinate`. See `DataCoordinate.standardize` 

864 parameters. 

865 

866 Returns 

867 ------- 

868 ref : `DatasetRef` 

869 A reference to the stored dataset, updated with the correct id if 

870 given. 

871 

872 Raises 

873 ------ 

874 TypeError 

875 Raised if the butler is read-only or if no run has been provided. 

876 """ 

877 log.debug("Butler put: %s, dataId=%s, run=%s", datasetRefOrType, dataId, run) 

878 if not self.isWriteable(): 

879 raise TypeError("Butler is read-only.") 

880 datasetType, dataId = self._standardizeArgs(datasetRefOrType, dataId, **kwds) 

881 if isinstance(datasetRefOrType, DatasetRef) and datasetRefOrType.id is not None: 

882 raise ValueError("DatasetRef must not be in registry, must have None id") 

883 

884 if run is None: 

885 if self.run is None: 

886 raise TypeError("No run provided.") 

887 run = self.run 

888 # No need to check type for run; first thing we do is 

889 # insertDatasets, and that will check for us. 

890 

891 if tags is None: 

892 tags = self.tags 

893 else: 

894 tags = tuple(tags) 

895 for tag in tags: 

896 # Check that these are tagged collections up front, because we want 

897 # to avoid relying on Datastore transactionality to avoid modifying 

898 # the repo if there's an error later. 

899 collectionType = self.registry.getCollectionType(tag) 

900 if collectionType is not CollectionType.TAGGED: 

901 raise TypeError(f"Cannot associate into collection '{tag}' of non-TAGGED type " 

902 f"{collectionType.name}.") 

903 

904 # Add Registry Dataset entry. 

905 dataId = self.registry.expandDataId(dataId, graph=datasetType.dimensions, **kwds) 

906 ref, = self.registry.insertDatasets(datasetType, run=run, dataIds=[dataId]) 

907 

908 # Add Datastore entry. 

909 self.datastore.put(obj, ref) 

910 

911 for tag in tags: 

912 self.registry.associate(tag, [ref]) 

913 

914 return ref 

915 

916 def getDirect(self, ref: DatasetRef, *, parameters: Optional[Dict[str, Any]] = None) -> Any: 

917 """Retrieve a stored dataset. 

918 

919 Unlike `Butler.get`, this method allows datasets outside the Butler's 

920 collection to be read as long as the `DatasetRef` that identifies them 

921 can be obtained separately. 

922 

923 Parameters 

924 ---------- 

925 ref : `DatasetRef` 

926 Resolved reference to an already stored dataset. 

927 parameters : `dict` 

928 Additional StorageClass-defined options to control reading, 

929 typically used to efficiently read only a subset of the dataset. 

930 

931 Returns 

932 ------- 

933 obj : `object` 

934 The dataset. 

935 """ 

936 return self.datastore.get(ref, parameters=parameters) 

937 

938 def getDirectDeferred(self, ref: DatasetRef, *, 

939 parameters: Union[dict, None] = None) -> DeferredDatasetHandle: 

940 """Create a `DeferredDatasetHandle` which can later retrieve a dataset, 

941 from a resolved `DatasetRef`. 

942 

943 Parameters 

944 ---------- 

945 ref : `DatasetRef` 

946 Resolved reference to an already stored dataset. 

947 parameters : `dict` 

948 Additional StorageClass-defined options to control reading, 

949 typically used to efficiently read only a subset of the dataset. 

950 

951 Returns 

952 ------- 

953 obj : `DeferredDatasetHandle` 

954 A handle which can be used to retrieve a dataset at a later time. 

955 

956 Raises 

957 ------ 

958 AmbiguousDatasetError 

959 Raised if ``ref.id is None``, i.e. the reference is unresolved. 

960 """ 

961 if ref.id is None: 

962 raise AmbiguousDatasetError( 

963 f"Dataset of type {ref.datasetType.name} with data ID {ref.dataId} is not resolved." 

964 ) 

965 return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters) 

966 

967 def getDeferred(self, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

968 dataId: Optional[DataId] = None, *, 

969 parameters: Union[dict, None] = None, 

970 collections: Any = None, 

971 **kwds: Any) -> DeferredDatasetHandle: 

972 """Create a `DeferredDatasetHandle` which can later retrieve a dataset, 

973 after an immediate registry lookup. 

974 

975 Parameters 

976 ---------- 

977 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

978 When `DatasetRef` the `dataId` should be `None`. 

979 Otherwise the `DatasetType` or name thereof. 

980 dataId : `dict` or `DataCoordinate`, optional 

981 A `dict` of `Dimension` link name, value pairs that label the 

982 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

983 should be provided as the first argument. 

984 parameters : `dict` 

985 Additional StorageClass-defined options to control reading, 

986 typically used to efficiently read only a subset of the dataset. 

987 collections : Any, optional 

988 Collections to be searched, overriding ``self.collections``. 

989 Can be any of the types supported by the ``collections`` argument 

990 to butler construction. 

991 kwds 

992 Additional keyword arguments used to augment or construct a 

993 `DataId`. See `DataId` parameters. 

994 

995 Returns 

996 ------- 

997 obj : `DeferredDatasetHandle` 

998 A handle which can be used to retrieve a dataset at a later time. 

999 

1000 Raises 

1001 ------ 

1002 LookupError 

1003 Raised if no matching dataset exists in the `Registry` (and 

1004 ``allowUnresolved is False``). 

1005 ValueError 

1006 Raised if a resolved `DatasetRef` was passed as an input, but it 

1007 differs from the one found in the registry. 

1008 TypeError 

1009 Raised if no collections were provided. 

1010 """ 

1011 ref = self._findDatasetRef(datasetRefOrType, dataId, collections=collections, **kwds) 

1012 return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters) 

1013 

1014 def get(self, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

1015 dataId: Optional[DataId] = None, *, 

1016 parameters: Optional[Dict[str, Any]] = None, 

1017 collections: Any = None, 

1018 **kwds: Any) -> Any: 

1019 """Retrieve a stored dataset. 

1020 

1021 Parameters 

1022 ---------- 

1023 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

1024 When `DatasetRef` the `dataId` should be `None`. 

1025 Otherwise the `DatasetType` or name thereof. 

1026 dataId : `dict` or `DataCoordinate` 

1027 A `dict` of `Dimension` link name, value pairs that label the 

1028 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

1029 should be provided as the first argument. 

1030 parameters : `dict` 

1031 Additional StorageClass-defined options to control reading, 

1032 typically used to efficiently read only a subset of the dataset. 

1033 collections : Any, optional 

1034 Collections to be searched, overriding ``self.collections``. 

1035 Can be any of the types supported by the ``collections`` argument 

1036 to butler construction. 

1037 kwds 

1038 Additional keyword arguments used to augment or construct a 

1039 `DataCoordinate`. See `DataCoordinate.standardize` 

1040 parameters. 

1041 

1042 Returns 

1043 ------- 

1044 obj : `object` 

1045 The dataset. 

1046 

1047 Raises 

1048 ------ 

1049 ValueError 

1050 Raised if a resolved `DatasetRef` was passed as an input, but it 

1051 differs from the one found in the registry. 

1052 LookupError 

1053 Raised if no matching dataset exists in the `Registry`. 

1054 TypeError 

1055 Raised if no collections were provided. 

1056 

1057 Notes 

1058 ----- 

1059 When looking up datasets in a `~CollectionType.CALIBRATION` collection, 

1060 this method requires that the given data ID include temporal dimensions 

1061 beyond the dimensions of the dataset type itself, in order to find the 

1062 dataset with the appropriate validity range. For example, a "bias" 

1063 dataset with native dimensions ``{instrument, detector}`` could be 

1064 fetched with a ``{instrument, detector, exposure}`` data ID, because 

1065 ``exposure`` is a temporal dimension. 

1066 """ 

1067 log.debug("Butler get: %s, dataId=%s, parameters=%s", datasetRefOrType, dataId, parameters) 

1068 ref = self._findDatasetRef(datasetRefOrType, dataId, collections=collections, **kwds) 

1069 return self.getDirect(ref, parameters=parameters) 

1070 

1071 def getURIs(self, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

1072 dataId: Optional[DataId] = None, *, 

1073 predict: bool = False, 

1074 collections: Any = None, 

1075 run: Optional[str] = None, 

1076 **kwds: Any) -> Tuple[Optional[ButlerURI], Dict[str, ButlerURI]]: 

1077 """Returns the URIs associated with the dataset. 

1078 

1079 Parameters 

1080 ---------- 

1081 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

1082 When `DatasetRef` the `dataId` should be `None`. 

1083 Otherwise the `DatasetType` or name thereof. 

1084 dataId : `dict` or `DataCoordinate` 

1085 A `dict` of `Dimension` link name, value pairs that label the 

1086 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

1087 should be provided as the first argument. 

1088 predict : `bool` 

1089 If `True`, allow URIs to be returned of datasets that have not 

1090 been written. 

1091 collections : Any, optional 

1092 Collections to be searched, overriding ``self.collections``. 

1093 Can be any of the types supported by the ``collections`` argument 

1094 to butler construction. 

1095 run : `str`, optional 

1096 Run to use for predictions, overriding ``self.run``. 

1097 kwds 

1098 Additional keyword arguments used to augment or construct a 

1099 `DataCoordinate`. See `DataCoordinate.standardize` 

1100 parameters. 

1101 

1102 Returns 

1103 ------- 

1104 primary : `ButlerURI` 

1105 The URI to the primary artifact associated with this dataset. 

1106 If the dataset was disassembled within the datastore this 

1107 may be `None`. 

1108 components : `dict` 

1109 URIs to any components associated with the dataset artifact. 

1110 Can be empty if there are no components. 

1111 """ 

1112 ref = self._findDatasetRef(datasetRefOrType, dataId, allowUnresolved=predict, 

1113 collections=collections, **kwds) 

1114 if ref.id is None: # only possible if predict is True 

1115 if run is None: 

1116 run = self.run 

1117 if run is None: 

1118 raise TypeError("Cannot predict location with run=None.") 

1119 # Lie about ID, because we can't guess it, and only 

1120 # Datastore.getURIs() will ever see it (and it doesn't use it). 

1121 ref = ref.resolved(id=0, run=run) 

1122 return self.datastore.getURIs(ref, predict) 

1123 

1124 def getURI(self, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

1125 dataId: Optional[DataId] = None, *, 

1126 predict: bool = False, 

1127 collections: Any = None, 

1128 run: Optional[str] = None, 

1129 **kwds: Any) -> ButlerURI: 

1130 """Return the URI to the Dataset. 

1131 

1132 Parameters 

1133 ---------- 

1134 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

1135 When `DatasetRef` the `dataId` should be `None`. 

1136 Otherwise the `DatasetType` or name thereof. 

1137 dataId : `dict` or `DataCoordinate` 

1138 A `dict` of `Dimension` link name, value pairs that label the 

1139 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

1140 should be provided as the first argument. 

1141 predict : `bool` 

1142 If `True`, allow URIs to be returned of datasets that have not 

1143 been written. 

1144 collections : Any, optional 

1145 Collections to be searched, overriding ``self.collections``. 

1146 Can be any of the types supported by the ``collections`` argument 

1147 to butler construction. 

1148 run : `str`, optional 

1149 Run to use for predictions, overriding ``self.run``. 

1150 kwds 

1151 Additional keyword arguments used to augment or construct a 

1152 `DataCoordinate`. See `DataCoordinate.standardize` 

1153 parameters. 

1154 

1155 Returns 

1156 ------- 

1157 uri : `ButlerURI` 

1158 URI pointing to the Dataset within the datastore. If the 

1159 Dataset does not exist in the datastore, and if ``predict`` is 

1160 `True`, the URI will be a prediction and will include a URI 

1161 fragment "#predicted". 

1162 If the datastore does not have entities that relate well 

1163 to the concept of a URI the returned URI string will be 

1164 descriptive. The returned URI is not guaranteed to be obtainable. 

1165 

1166 Raises 

1167 ------ 

1168 LookupError 

1169 A URI has been requested for a dataset that does not exist and 

1170 guessing is not allowed. 

1171 ValueError 

1172 Raised if a resolved `DatasetRef` was passed as an input, but it 

1173 differs from the one found in the registry. 

1174 TypeError 

1175 Raised if no collections were provided. 

1176 RuntimeError 

1177 Raised if a URI is requested for a dataset that consists of 

1178 multiple artifacts. 

1179 """ 

1180 primary, components = self.getURIs(datasetRefOrType, dataId=dataId, predict=predict, 

1181 collections=collections, run=run, **kwds) 

1182 

1183 if primary is None or components: 

1184 raise RuntimeError(f"Dataset ({datasetRefOrType}) includes distinct URIs for components. " 

1185 "Use Butler.getURIs() instead.") 

1186 return primary 

1187 

1188 def datasetExists(self, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

1189 dataId: Optional[DataId] = None, *, 

1190 collections: Any = None, 

1191 **kwds: Any) -> bool: 

1192 """Return True if the Dataset is actually present in the Datastore. 

1193 

1194 Parameters 

1195 ---------- 

1196 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

1197 When `DatasetRef` the `dataId` should be `None`. 

1198 Otherwise the `DatasetType` or name thereof. 

1199 dataId : `dict` or `DataCoordinate` 

1200 A `dict` of `Dimension` link name, value pairs that label the 

1201 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

1202 should be provided as the first argument. 

1203 collections : Any, optional 

1204 Collections to be searched, overriding ``self.collections``. 

1205 Can be any of the types supported by the ``collections`` argument 

1206 to butler construction. 

1207 kwds 

1208 Additional keyword arguments used to augment or construct a 

1209 `DataCoordinate`. See `DataCoordinate.standardize` 

1210 parameters. 

1211 

1212 Raises 

1213 ------ 

1214 LookupError 

1215 Raised if the dataset is not even present in the Registry. 

1216 ValueError 

1217 Raised if a resolved `DatasetRef` was passed as an input, but it 

1218 differs from the one found in the registry. 

1219 TypeError 

1220 Raised if no collections were provided. 

1221 """ 

1222 ref = self._findDatasetRef(datasetRefOrType, dataId, collections=collections, **kwds) 

1223 return self.datastore.exists(ref) 

1224 

1225 def pruneCollection(self, name: str, purge: bool = False, unstore: bool = False) -> None: 

1226 """Remove a collection and possibly prune datasets within it. 

1227 

1228 Parameters 

1229 ---------- 

1230 name : `str` 

1231 Name of the collection to remove. If this is a 

1232 `~CollectionType.TAGGED` or `~CollectionType.CHAINED` collection, 

1233 datasets within the collection are not modified unless ``unstore`` 

1234 is `True`. If this is a `~CollectionType.RUN` collection, 

1235 ``purge`` and ``unstore`` must be `True`, and all datasets in it 

1236 are fully removed from the data repository. 

1237 purge : `bool`, optional 

1238 If `True`, permit `~CollectionType.RUN` collections to be removed, 

1239 fully removing datasets within them. Requires ``unstore=True`` as 

1240 well as an added precaution against accidental deletion. Must be 

1241 `False` (default) if the collection is not a ``RUN``. 

1242 unstore: `bool`, optional 

1243 If `True`, remove all datasets in the collection from all 

1244 datastores in which they appear. 

1245 

1246 Raises 

1247 ------ 

1248 TypeError 

1249 Raised if the butler is read-only or arguments are mutually 

1250 inconsistent. 

1251 """ 

1252 

1253 # See pruneDatasets comments for more information about the logic here; 

1254 # the cases are almost the same, but here we can rely on Registry to 

1255 # take care everything but Datastore deletion when we remove the 

1256 # collection. 

1257 if not self.isWriteable(): 

1258 raise TypeError("Butler is read-only.") 

1259 collectionType = self.registry.getCollectionType(name) 

1260 if purge and not unstore: 

1261 raise PurgeWithoutUnstorePruneCollectionsError() 

1262 if collectionType is CollectionType.RUN and not purge: 

1263 raise RunWithoutPurgePruneCollectionsError(collectionType) 

1264 if collectionType is not CollectionType.RUN and purge: 

1265 raise PurgeUnsupportedPruneCollectionsError(collectionType) 

1266 

1267 with self.registry.transaction(): 

1268 if unstore: 

1269 for ref in self.registry.queryDatasets(..., collections=name, findFirst=True): 

1270 if self.datastore.exists(ref): 

1271 self.datastore.trash(ref) 

1272 self.registry.removeCollection(name) 

1273 if unstore: 

1274 # Point of no return for removing artifacts 

1275 self.datastore.emptyTrash() 

1276 

1277 def pruneDatasets(self, refs: Iterable[DatasetRef], *, 

1278 disassociate: bool = True, 

1279 unstore: bool = False, 

1280 tags: Optional[Iterable[str]] = None, 

1281 purge: bool = False, 

1282 run: Optional[str] = None) -> None: 

1283 """Remove one or more datasets from a collection and/or storage. 

1284 

1285 Parameters 

1286 ---------- 

1287 refs : `~collections.abc.Iterable` of `DatasetRef` 

1288 Datasets to prune. These must be "resolved" references (not just 

1289 a `DatasetType` and data ID). 

1290 disassociate : `bool`, optional 

1291 Disassociate pruned datasets from ``self.tags`` (or the collections 

1292 given via the ``tags`` argument). 

1293 unstore : `bool`, optional 

1294 If `True` (`False` is default) remove these datasets from all 

1295 datastores known to this butler. Note that this will make it 

1296 impossible to retrieve these datasets even via other collections. 

1297 Datasets that are already not stored are ignored by this option. 

1298 tags : `Iterable` [ `str` ], optional 

1299 `~CollectionType.TAGGED` collections to disassociate the datasets 

1300 from, overriding ``self.tags``. Ignored if ``disassociate`` is 

1301 `False` or ``purge`` is `True`. 

1302 purge : `bool`, optional 

1303 If `True` (`False` is default), completely remove the dataset from 

1304 the `Registry`. To prevent accidental deletions, ``purge`` may 

1305 only be `True` if all of the following conditions are met: 

1306 

1307 - All given datasets are in the given run. 

1308 - ``disassociate`` is `True`; 

1309 - ``unstore`` is `True`. 

1310 

1311 This mode may remove provenance information from datasets other 

1312 than those provided, and should be used with extreme care. 

1313 run : `str`, optional 

1314 `~CollectionType.RUN` collection to purge from, overriding 

1315 ``self.run``. Ignored unless ``purge`` is `True`. 

1316 

1317 Raises 

1318 ------ 

1319 TypeError 

1320 Raised if the butler is read-only, if no collection was provided, 

1321 or the conditions for ``purge=True`` were not met. 

1322 """ 

1323 if not self.isWriteable(): 

1324 raise TypeError("Butler is read-only.") 

1325 if purge: 

1326 if not disassociate: 

1327 raise TypeError("Cannot pass purge=True without disassociate=True.") 

1328 if not unstore: 

1329 raise TypeError("Cannot pass purge=True without unstore=True.") 

1330 if run is None: 

1331 run = self.run 

1332 if run is None: 

1333 raise TypeError("No run provided but purge=True.") 

1334 collectionType = self.registry.getCollectionType(run) 

1335 if collectionType is not CollectionType.RUN: 

1336 raise TypeError(f"Cannot purge from collection '{run}' " 

1337 f"of non-RUN type {collectionType.name}.") 

1338 elif disassociate: 

1339 if tags is None: 

1340 tags = self.tags 

1341 else: 

1342 tags = tuple(tags) 

1343 if not tags: 

1344 raise TypeError("No tags provided but disassociate=True.") 

1345 for tag in tags: 

1346 collectionType = self.registry.getCollectionType(tag) 

1347 if collectionType is not CollectionType.TAGGED: 

1348 raise TypeError(f"Cannot disassociate from collection '{tag}' " 

1349 f"of non-TAGGED type {collectionType.name}.") 

1350 # Transform possibly-single-pass iterable into something we can iterate 

1351 # over multiple times. 

1352 refs = list(refs) 

1353 # Pruning a component of a DatasetRef makes no sense since registry 

1354 # doesn't know about components and datastore might not store 

1355 # components in a separate file 

1356 for ref in refs: 

1357 if ref.datasetType.component(): 

1358 raise ValueError(f"Can not prune a component of a dataset (ref={ref})") 

1359 # We don't need an unreliable Datastore transaction for this, because 

1360 # we've been extra careful to ensure that Datastore.trash only involves 

1361 # mutating the Registry (it can _look_ at Datastore-specific things, 

1362 # but shouldn't change them), and hence all operations here are 

1363 # Registry operations. 

1364 with self.registry.transaction(): 

1365 if unstore: 

1366 for ref in refs: 

1367 # There is a difference between a concrete composite 

1368 # and virtual composite. In a virtual composite the 

1369 # datastore is never given the top level DatasetRef. In 

1370 # the concrete composite the datastore knows all the 

1371 # refs and will clean up itself if asked to remove the 

1372 # parent ref. We can not check configuration for this 

1373 # since we can not trust that the configuration is the 

1374 # same. We therefore have to ask if the ref exists or 

1375 # not. This is consistent with the fact that we want 

1376 # to ignore already-removed-from-datastore datasets 

1377 # anyway. 

1378 if self.datastore.exists(ref): 

1379 self.datastore.trash(ref) 

1380 if purge: 

1381 self.registry.removeDatasets(refs) 

1382 elif disassociate: 

1383 assert tags, "Guaranteed by earlier logic in this function." 

1384 for tag in tags: 

1385 self.registry.disassociate(tag, refs) 

1386 # We've exited the Registry transaction, and apparently committed. 

1387 # (if there was an exception, everything rolled back, and it's as if 

1388 # nothing happened - and we never get here). 

1389 # Datastore artifacts are not yet gone, but they're clearly marked 

1390 # as trash, so if we fail to delete now because of (e.g.) filesystem 

1391 # problems we can try again later, and if manual administrative 

1392 # intervention is required, it's pretty clear what that should entail: 

1393 # deleting everything on disk and in private Datastore tables that is 

1394 # in the dataset_location_trash table. 

1395 if unstore: 

1396 # Point of no return for removing artifacts 

1397 self.datastore.emptyTrash() 

1398 

1399 @transactional 

1400 def ingest(self, *datasets: FileDataset, transfer: Optional[str] = "auto", run: Optional[str] = None, 

1401 tags: Optional[Iterable[str]] = None,) -> None: 

1402 """Store and register one or more datasets that already exist on disk. 

1403 

1404 Parameters 

1405 ---------- 

1406 datasets : `FileDataset` 

1407 Each positional argument is a struct containing information about 

1408 a file to be ingested, including its path (either absolute or 

1409 relative to the datastore root, if applicable), a `DatasetRef`, 

1410 and optionally a formatter class or its fully-qualified string 

1411 name. If a formatter is not provided, the formatter that would be 

1412 used for `put` is assumed. On successful return, all 

1413 `FileDataset.ref` attributes will have their `DatasetRef.id` 

1414 attribute populated and all `FileDataset.formatter` attributes will 

1415 be set to the formatter class used. `FileDataset.path` attributes 

1416 may be modified to put paths in whatever the datastore considers a 

1417 standardized form. 

1418 transfer : `str`, optional 

1419 If not `None`, must be one of 'auto', 'move', 'copy', 'direct', 

1420 'hardlink', 'relsymlink' or 'symlink', indicating how to transfer 

1421 the file. 

1422 run : `str`, optional 

1423 The name of the run ingested datasets should be added to, 

1424 overriding ``self.run``. 

1425 tags : `Iterable` [ `str` ], optional 

1426 The names of a `~CollectionType.TAGGED` collections to associate 

1427 the dataset with, overriding ``self.tags``. These collections 

1428 must have already been added to the `Registry`. 

1429 

1430 Raises 

1431 ------ 

1432 TypeError 

1433 Raised if the butler is read-only or if no run was provided. 

1434 NotImplementedError 

1435 Raised if the `Datastore` does not support the given transfer mode. 

1436 DatasetTypeNotSupportedError 

1437 Raised if one or more files to be ingested have a dataset type that 

1438 is not supported by the `Datastore`.. 

1439 FileNotFoundError 

1440 Raised if one of the given files does not exist. 

1441 FileExistsError 

1442 Raised if transfer is not `None` but the (internal) location the 

1443 file would be moved to is already occupied. 

1444 

1445 Notes 

1446 ----- 

1447 This operation is not fully exception safe: if a database operation 

1448 fails, the given `FileDataset` instances may be only partially updated. 

1449 

1450 It is atomic in terms of database operations (they will either all 

1451 succeed or all fail) providing the database engine implements 

1452 transactions correctly. It will attempt to be atomic in terms of 

1453 filesystem operations as well, but this cannot be implemented 

1454 rigorously for most datastores. 

1455 """ 

1456 if not self.isWriteable(): 

1457 raise TypeError("Butler is read-only.") 

1458 if run is None: 

1459 if self.run is None: 

1460 raise TypeError("No run provided.") 

1461 run = self.run 

1462 # No need to check run type, since insertDatasets will do that 

1463 # (safely) for us. 

1464 if tags is None: 

1465 tags = self.tags 

1466 else: 

1467 tags = tuple(tags) 

1468 for tag in tags: 

1469 # Check that these are tagged collections up front, because we want 

1470 # to avoid relying on Datastore transactionality to avoid modifying 

1471 # the repo if there's an error later. 

1472 collectionType = self.registry.getCollectionType(tag) 

1473 if collectionType is not CollectionType.TAGGED: 

1474 raise TypeError(f"Cannot associate into collection '{tag}' of non-TAGGED type " 

1475 f"{collectionType.name}.") 

1476 # Reorganize the inputs so they're grouped by DatasetType and then 

1477 # data ID. We also include a list of DatasetRefs for each FileDataset 

1478 # to hold the resolved DatasetRefs returned by the Registry, before 

1479 # it's safe to swap them into FileDataset.refs. 

1480 # Some type annotation aliases to make that clearer: 

1481 GroupForType = Dict[DataCoordinate, Tuple[FileDataset, List[DatasetRef]]] 

1482 GroupedData = MutableMapping[DatasetType, GroupForType] 

1483 # The actual data structure: 

1484 groupedData: GroupedData = defaultdict(dict) 

1485 # And the nested loop that populates it: 

1486 for dataset in datasets: 

1487 # This list intentionally shared across the inner loop, since it's 

1488 # associated with `dataset`. 

1489 resolvedRefs: List[DatasetRef] = [] 

1490 for ref in dataset.refs: 

1491 groupedData[ref.datasetType][ref.dataId] = (dataset, resolvedRefs) 

1492 

1493 # Now we can bulk-insert into Registry for each DatasetType. 

1494 allResolvedRefs: List[DatasetRef] = [] 

1495 for datasetType, groupForType in groupedData.items(): 

1496 refs = self.registry.insertDatasets(datasetType, 

1497 dataIds=groupForType.keys(), 

1498 run=run) 

1499 # Append those resolved DatasetRefs to the new lists we set up for 

1500 # them. 

1501 for ref, (_, resolvedRefs) in zip(refs, groupForType.values()): 

1502 resolvedRefs.append(ref) 

1503 

1504 # Go back to the original FileDatasets to replace their refs with the 

1505 # new resolved ones, and also build a big list of all refs. 

1506 allResolvedRefs = [] 

1507 for groupForType in groupedData.values(): 

1508 for dataset, resolvedRefs in groupForType.values(): 

1509 dataset.refs = resolvedRefs 

1510 allResolvedRefs.extend(resolvedRefs) 

1511 

1512 # Bulk-associate everything with any tagged collections. 

1513 for tag in tags: 

1514 self.registry.associate(tag, allResolvedRefs) 

1515 

1516 # Bulk-insert everything into Datastore. 

1517 self.datastore.ingest(*datasets, transfer=transfer) 

1518 

1519 @contextlib.contextmanager 

1520 def export(self, *, directory: Optional[str] = None, 

1521 filename: Optional[str] = None, 

1522 format: Optional[str] = None, 

1523 transfer: Optional[str] = None) -> Iterator[RepoExportContext]: 

1524 """Export datasets from the repository represented by this `Butler`. 

1525 

1526 This method is a context manager that returns a helper object 

1527 (`RepoExportContext`) that is used to indicate what information from 

1528 the repository should be exported. 

1529 

1530 Parameters 

1531 ---------- 

1532 directory : `str`, optional 

1533 Directory dataset files should be written to if ``transfer`` is not 

1534 `None`. 

1535 filename : `str`, optional 

1536 Name for the file that will include database information associated 

1537 with the exported datasets. If this is not an absolute path and 

1538 ``directory`` is not `None`, it will be written to ``directory`` 

1539 instead of the current working directory. Defaults to 

1540 "export.{format}". 

1541 format : `str`, optional 

1542 File format for the database information file. If `None`, the 

1543 extension of ``filename`` will be used. 

1544 transfer : `str`, optional 

1545 Transfer mode passed to `Datastore.export`. 

1546 

1547 Raises 

1548 ------ 

1549 TypeError 

1550 Raised if the set of arguments passed is inconsistent. 

1551 

1552 Examples 

1553 -------- 

1554 Typically the `Registry.queryDataIds` and `Registry.queryDatasets` 

1555 methods are used to provide the iterables over data IDs and/or datasets 

1556 to be exported:: 

1557 

1558 with butler.export("exports.yaml") as export: 

1559 # Export all flats, but none of the dimension element rows 

1560 # (i.e. data ID information) associated with them. 

1561 export.saveDatasets(butler.registry.queryDatasets("flat"), 

1562 elements=()) 

1563 # Export all datasets that start with "deepCoadd_" and all of 

1564 # their associated data ID information. 

1565 export.saveDatasets(butler.registry.queryDatasets("deepCoadd_*")) 

1566 """ 

1567 if directory is None and transfer is not None: 

1568 raise TypeError("Cannot transfer without providing a directory.") 

1569 if transfer == "move": 

1570 raise TypeError("Transfer may not be 'move': export is read-only") 

1571 if format is None: 

1572 if filename is None: 

1573 raise TypeError("At least one of 'filename' or 'format' must be provided.") 

1574 else: 

1575 _, format = os.path.splitext(filename) 

1576 elif filename is None: 

1577 filename = f"export.{format}" 

1578 if directory is not None: 

1579 filename = os.path.join(directory, filename) 

1580 BackendClass = getClassOf(self._config["repo_transfer_formats"][format]["export"]) 

1581 with open(filename, 'w') as stream: 

1582 backend = BackendClass(stream) 

1583 try: 

1584 helper = RepoExportContext(self.registry, self.datastore, backend=backend, 

1585 directory=directory, transfer=transfer) 

1586 yield helper 

1587 except BaseException: 

1588 raise 

1589 else: 

1590 helper._finish() 

1591 

1592 def import_(self, *, directory: Optional[str] = None, 

1593 filename: Union[str, TextIO, None] = None, 

1594 format: Optional[str] = None, 

1595 transfer: Optional[str] = None, 

1596 skip_dimensions: Optional[Set] = None) -> None: 

1597 """Import datasets into this repository that were exported from a 

1598 different butler repository via `~lsst.daf.butler.Butler.export`. 

1599 

1600 Parameters 

1601 ---------- 

1602 directory : `str`, optional 

1603 Directory containing dataset files to import from. If `None`, 

1604 ``filename`` and all dataset file paths specified therein must 

1605 be absolute. 

1606 filename : `str` or `TextIO`, optional 

1607 A stream or name of file that contains database information 

1608 associated with the exported datasets, typically generated by 

1609 `~lsst.daf.butler.Butler.export`. If this a string (name) and 

1610 is not an absolute path, does not exist in the current working 

1611 directory, and ``directory`` is not `None`, it is assumed to be in 

1612 ``directory``. Defaults to "export.{format}". 

1613 format : `str`, optional 

1614 File format for ``filename``. If `None`, the extension of 

1615 ``filename`` will be used. 

1616 transfer : `str`, optional 

1617 Transfer mode passed to `~lsst.daf.butler.Datastore.ingest`. 

1618 skip_dimensions : `set`, optional 

1619 Names of dimensions that should be skipped and not imported. 

1620 

1621 Raises 

1622 ------ 

1623 TypeError 

1624 Raised if the set of arguments passed is inconsistent, or if the 

1625 butler is read-only. 

1626 """ 

1627 if not self.isWriteable(): 

1628 raise TypeError("Butler is read-only.") 

1629 if format is None: 

1630 if filename is None: 

1631 raise TypeError("At least one of 'filename' or 'format' must be provided.") 

1632 else: 

1633 _, format = os.path.splitext(filename) # type: ignore 

1634 elif filename is None: 

1635 filename = f"export.{format}" 

1636 if isinstance(filename, str) and directory is not None and not os.path.exists(filename): 

1637 filename = os.path.join(directory, filename) 

1638 BackendClass = getClassOf(self._config["repo_transfer_formats"][format]["import"]) 

1639 

1640 def doImport(importStream: TextIO) -> None: 

1641 backend = BackendClass(importStream, self.registry) 

1642 backend.register() 

1643 with self.transaction(): 

1644 backend.load(self.datastore, directory=directory, transfer=transfer, 

1645 skip_dimensions=skip_dimensions) 

1646 

1647 if isinstance(filename, str): 

1648 with open(filename, "r") as stream: 

1649 doImport(stream) 

1650 else: 

1651 doImport(filename) 

1652 

1653 def validateConfiguration(self, logFailures: bool = False, 

1654 datasetTypeNames: Optional[Iterable[str]] = None, 

1655 ignore: Iterable[str] = None) -> None: 

1656 """Validate butler configuration. 

1657 

1658 Checks that each `DatasetType` can be stored in the `Datastore`. 

1659 

1660 Parameters 

1661 ---------- 

1662 logFailures : `bool`, optional 

1663 If `True`, output a log message for every validation error 

1664 detected. 

1665 datasetTypeNames : iterable of `str`, optional 

1666 The `DatasetType` names that should be checked. This allows 

1667 only a subset to be selected. 

1668 ignore : iterable of `str`, optional 

1669 Names of DatasetTypes to skip over. This can be used to skip 

1670 known problems. If a named `DatasetType` corresponds to a 

1671 composite, all components of that `DatasetType` will also be 

1672 ignored. 

1673 

1674 Raises 

1675 ------ 

1676 ButlerValidationError 

1677 Raised if there is some inconsistency with how this Butler 

1678 is configured. 

1679 """ 

1680 if datasetTypeNames: 

1681 datasetTypes = [self.registry.getDatasetType(name) for name in datasetTypeNames] 

1682 else: 

1683 datasetTypes = list(self.registry.queryDatasetTypes()) 

1684 

1685 # filter out anything from the ignore list 

1686 if ignore: 

1687 ignore = set(ignore) 

1688 datasetTypes = [e for e in datasetTypes 

1689 if e.name not in ignore and e.nameAndComponent()[0] not in ignore] 

1690 else: 

1691 ignore = set() 

1692 

1693 # Find all the registered instruments 

1694 instruments = set( 

1695 record.name for record in self.registry.queryDimensionRecords("instrument") 

1696 ) 

1697 

1698 # For each datasetType that has an instrument dimension, create 

1699 # a DatasetRef for each defined instrument 

1700 datasetRefs = [] 

1701 

1702 for datasetType in datasetTypes: 

1703 if "instrument" in datasetType.dimensions: 

1704 for instrument in instruments: 

1705 datasetRef = DatasetRef(datasetType, {"instrument": instrument}, # type: ignore 

1706 conform=False) 

1707 datasetRefs.append(datasetRef) 

1708 

1709 entities: List[Union[DatasetType, DatasetRef]] = [] 

1710 entities.extend(datasetTypes) 

1711 entities.extend(datasetRefs) 

1712 

1713 datastoreErrorStr = None 

1714 try: 

1715 self.datastore.validateConfiguration(entities, logFailures=logFailures) 

1716 except ValidationError as e: 

1717 datastoreErrorStr = str(e) 

1718 

1719 # Also check that the LookupKeys used by the datastores match 

1720 # registry and storage class definitions 

1721 keys = self.datastore.getLookupKeys() 

1722 

1723 failedNames = set() 

1724 failedDataId = set() 

1725 for key in keys: 

1726 if key.name is not None: 

1727 if key.name in ignore: 

1728 continue 

1729 

1730 # skip if specific datasetType names were requested and this 

1731 # name does not match 

1732 if datasetTypeNames and key.name not in datasetTypeNames: 

1733 continue 

1734 

1735 # See if it is a StorageClass or a DatasetType 

1736 if key.name in self.storageClasses: 

1737 pass 

1738 else: 

1739 try: 

1740 self.registry.getDatasetType(key.name) 

1741 except KeyError: 

1742 if logFailures: 

1743 log.fatal("Key '%s' does not correspond to a DatasetType or StorageClass", key) 

1744 failedNames.add(key) 

1745 else: 

1746 # Dimensions are checked for consistency when the Butler 

1747 # is created and rendezvoused with a universe. 

1748 pass 

1749 

1750 # Check that the instrument is a valid instrument 

1751 # Currently only support instrument so check for that 

1752 if key.dataId: 

1753 dataIdKeys = set(key.dataId) 

1754 if set(["instrument"]) != dataIdKeys: 

1755 if logFailures: 

1756 log.fatal("Key '%s' has unsupported DataId override", key) 

1757 failedDataId.add(key) 

1758 elif key.dataId["instrument"] not in instruments: 

1759 if logFailures: 

1760 log.fatal("Key '%s' has unknown instrument", key) 

1761 failedDataId.add(key) 

1762 

1763 messages = [] 

1764 

1765 if datastoreErrorStr: 

1766 messages.append(datastoreErrorStr) 

1767 

1768 for failed, msg in ((failedNames, "Keys without corresponding DatasetType or StorageClass entry: "), 

1769 (failedDataId, "Keys with bad DataId entries: ")): 

1770 if failed: 

1771 msg += ", ".join(str(k) for k in failed) 

1772 messages.append(msg) 

1773 

1774 if messages: 

1775 raise ValidationError(";\n".join(messages)) 

1776 

1777 registry: Registry 

1778 """The object that manages dataset metadata and relationships (`Registry`). 

1779 

1780 Most operations that don't involve reading or writing butler datasets are 

1781 accessible only via `Registry` methods. 

1782 """ 

1783 

1784 datastore: Datastore 

1785 """The object that manages actual dataset storage (`Datastore`). 

1786 

1787 Direct user access to the datastore should rarely be necessary; the primary 

1788 exception is the case where a `Datastore` implementation provides extra 

1789 functionality beyond what the base class defines. 

1790 """ 

1791 

1792 storageClasses: StorageClassFactory 

1793 """An object that maps known storage class names to objects that fully 

1794 describe them (`StorageClassFactory`). 

1795 """ 

1796 

1797 collections: Optional[CollectionSearch] 

1798 """The collections to search and any restrictions on the dataset types to 

1799 search for within them, in order (`CollectionSearch`). 

1800 """ 

1801 

1802 run: Optional[str] 

1803 """Name of the run this butler writes outputs to (`str` or `None`). 

1804 """ 

1805 

1806 tags: Tuple[str, ...] 

1807 """Names of `~CollectionType.TAGGED` collections this butler associates 

1808 with in `put` and `ingest`, and disassociates from in `pruneDatasets` 

1809 (`tuple` [ `str` ]). 

1810 """