Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22""" 

23Butler top level classes. 

24""" 

25from __future__ import annotations 

26 

27__all__ = ( 

28 "Butler", 

29 "ButlerValidationError", 

30 "PruneCollectionsArgsError", 

31 "PurgeWithoutUnstorePruneCollectionsError", 

32 "RunWithoutPurgePruneCollectionsError", 

33 "PurgeUnsupportedPruneCollectionsError", 

34) 

35 

36 

37from collections import defaultdict 

38import contextlib 

39import logging 

40import numbers 

41import os 

42from typing import ( 

43 Any, 

44 ClassVar, 

45 Counter, 

46 Dict, 

47 Iterable, 

48 Iterator, 

49 List, 

50 MutableMapping, 

51 Optional, 

52 Set, 

53 TextIO, 

54 Tuple, 

55 Type, 

56 Union, 

57) 

58 

59try: 

60 import boto3 

61except ImportError: 

62 boto3 = None 

63 

64from lsst.utils import doImport 

65from .core import ( 

66 AmbiguousDatasetError, 

67 ButlerURI, 

68 Config, 

69 ConfigSubset, 

70 DataCoordinate, 

71 DataId, 

72 DataIdValue, 

73 DatasetRef, 

74 DatasetType, 

75 Datastore, 

76 Dimension, 

77 DimensionConfig, 

78 FileDataset, 

79 Progress, 

80 StorageClassFactory, 

81 Timespan, 

82 ValidationError, 

83 VERBOSE, 

84) 

85from .core.repoRelocation import BUTLER_ROOT_TAG 

86from .core.utils import transactional, getClassOf 

87from ._deferredDatasetHandle import DeferredDatasetHandle 

88from ._butlerConfig import ButlerConfig 

89from .registry import ( 

90 Registry, 

91 RegistryConfig, 

92 RegistryDefaults, 

93 CollectionSearch, 

94 CollectionType, 

95 ConflictingDefinitionError, 

96 DatasetIdGenEnum, 

97) 

98from .transfers import RepoExportContext 

99 

100log = logging.getLogger(__name__) 

101 

102 

103class ButlerValidationError(ValidationError): 

104 """There is a problem with the Butler configuration.""" 

105 pass 

106 

107 

108class PruneCollectionsArgsError(TypeError): 

109 """Base class for errors relating to Butler.pruneCollections input 

110 arguments. 

111 """ 

112 pass 

113 

114 

115class PurgeWithoutUnstorePruneCollectionsError(PruneCollectionsArgsError): 

116 """Raised when purge and unstore are both required to be True, and 

117 purge is True but unstore is False. 

118 """ 

119 

120 def __init__(self) -> None: 

121 super().__init__("Cannot pass purge=True without unstore=True.") 

122 

123 

124class RunWithoutPurgePruneCollectionsError(PruneCollectionsArgsError): 

125 """Raised when pruning a RUN collection but purge is False.""" 

126 

127 def __init__(self, collectionType: CollectionType): 

128 self.collectionType = collectionType 

129 super().__init__(f"Cannot prune RUN collection {self.collectionType.name} without purge=True.") 

130 

131 

132class PurgeUnsupportedPruneCollectionsError(PruneCollectionsArgsError): 

133 """Raised when purge is True but is not supported for the given 

134 collection.""" 

135 

136 def __init__(self, collectionType: CollectionType): 

137 self.collectionType = collectionType 

138 super().__init__( 

139 f"Cannot prune {self.collectionType} collection {self.collectionType.name} with purge=True.") 

140 

141 

142class Butler: 

143 """Main entry point for the data access system. 

144 

145 Parameters 

146 ---------- 

147 config : `ButlerConfig`, `Config` or `str`, optional. 

148 Configuration. Anything acceptable to the 

149 `ButlerConfig` constructor. If a directory path 

150 is given the configuration will be read from a ``butler.yaml`` file in 

151 that location. If `None` is given default values will be used. 

152 butler : `Butler`, optional. 

153 If provided, construct a new Butler that uses the same registry and 

154 datastore as the given one, but with the given collection and run. 

155 Incompatible with the ``config``, ``searchPaths``, and ``writeable`` 

156 arguments. 

157 collections : `str` or `Iterable` [ `str` ], optional 

158 An expression specifying the collections to be searched (in order) when 

159 reading datasets. 

160 This may be a `str` collection name or an iterable thereof. 

161 See :ref:`daf_butler_collection_expressions` for more information. 

162 These collections are not registered automatically and must be 

163 manually registered before they are used by any method, but they may be 

164 manually registered after the `Butler` is initialized. 

165 run : `str`, optional 

166 Name of the `~CollectionType.RUN` collection new datasets should be 

167 inserted into. If ``collections`` is `None` and ``run`` is not `None`, 

168 ``collections`` will be set to ``[run]``. If not `None`, this 

169 collection will automatically be registered. If this is not set (and 

170 ``writeable`` is not set either), a read-only butler will be created. 

171 searchPaths : `list` of `str`, optional 

172 Directory paths to search when calculating the full Butler 

173 configuration. Not used if the supplied config is already a 

174 `ButlerConfig`. 

175 writeable : `bool`, optional 

176 Explicitly sets whether the butler supports write operations. If not 

177 provided, a read-write butler is created if any of ``run``, ``tags``, 

178 or ``chains`` is non-empty. 

179 inferDefaults : `bool`, optional 

180 If `True` (default) infer default data ID values from the values 

181 present in the datasets in ``collections``: if all collections have the 

182 same value (or no value) for a governor dimension, that value will be 

183 the default for that dimension. Nonexistent collections are ignored. 

184 If a default value is provided explicitly for a governor dimension via 

185 ``**kwargs``, no default will be inferred for that dimension. 

186 **kwargs : `str` 

187 Default data ID key-value pairs. These may only identify "governor" 

188 dimensions like ``instrument`` and ``skymap``. 

189 

190 Examples 

191 -------- 

192 While there are many ways to control exactly how a `Butler` interacts with 

193 the collections in its `Registry`, the most common cases are still simple. 

194 

195 For a read-only `Butler` that searches one collection, do:: 

196 

197 butler = Butler("/path/to/repo", collections=["u/alice/DM-50000"]) 

198 

199 For a read-write `Butler` that writes to and reads from a 

200 `~CollectionType.RUN` collection:: 

201 

202 butler = Butler("/path/to/repo", run="u/alice/DM-50000/a") 

203 

204 The `Butler` passed to a ``PipelineTask`` is often much more complex, 

205 because we want to write to one `~CollectionType.RUN` collection but read 

206 from several others (as well):: 

207 

208 butler = Butler("/path/to/repo", run="u/alice/DM-50000/a", 

209 collections=["u/alice/DM-50000/a", 

210 "u/bob/DM-49998", 

211 "HSC/defaults"]) 

212 

213 This butler will `put` new datasets to the run ``u/alice/DM-50000/a``. 

214 Datasets will be read first from that run (since it appears first in the 

215 chain), and then from ``u/bob/DM-49998`` and finally ``HSC/defaults``. 

216 

217 Finally, one can always create a `Butler` with no collections:: 

218 

219 butler = Butler("/path/to/repo", writeable=True) 

220 

221 This can be extremely useful when you just want to use ``butler.registry``, 

222 e.g. for inserting dimension data or managing collections, or when the 

223 collections you want to use with the butler are not consistent. 

224 Passing ``writeable`` explicitly here is only necessary if you want to be 

225 able to make changes to the repo - usually the value for ``writeable`` can 

226 be guessed from the collection arguments provided, but it defaults to 

227 `False` when there are not collection arguments. 

228 """ 

229 def __init__(self, config: Union[Config, str, None] = None, *, 

230 butler: Optional[Butler] = None, 

231 collections: Any = None, 

232 run: Optional[str] = None, 

233 searchPaths: Optional[List[str]] = None, 

234 writeable: Optional[bool] = None, 

235 inferDefaults: bool = True, 

236 **kwargs: str, 

237 ): 

238 defaults = RegistryDefaults(collections=collections, run=run, infer=inferDefaults, **kwargs) 

239 # Load registry, datastore, etc. from config or existing butler. 

240 if butler is not None: 

241 if config is not None or searchPaths is not None or writeable is not None: 

242 raise TypeError("Cannot pass 'config', 'searchPaths', or 'writeable' " 

243 "arguments with 'butler' argument.") 

244 self.registry = butler.registry.copy(defaults) 

245 self.datastore = butler.datastore 

246 self.storageClasses = butler.storageClasses 

247 self._config: ButlerConfig = butler._config 

248 else: 

249 self._config = ButlerConfig(config, searchPaths=searchPaths) 

250 if "root" in self._config: 

251 butlerRoot = self._config["root"] 

252 else: 

253 butlerRoot = self._config.configDir 

254 if writeable is None: 

255 writeable = run is not None 

256 self.registry = Registry.fromConfig(self._config, butlerRoot=butlerRoot, writeable=writeable, 

257 defaults=defaults) 

258 self.datastore = Datastore.fromConfig(self._config, self.registry.getDatastoreBridgeManager(), 

259 butlerRoot=butlerRoot) 

260 self.storageClasses = StorageClassFactory() 

261 self.storageClasses.addFromConfig(self._config) 

262 if "run" in self._config or "collection" in self._config: 

263 raise ValueError("Passing a run or collection via configuration is no longer supported.") 

264 

265 GENERATION: ClassVar[int] = 3 

266 """This is a Generation 3 Butler. 

267 

268 This attribute may be removed in the future, once the Generation 2 Butler 

269 interface has been fully retired; it should only be used in transitional 

270 code. 

271 """ 

272 

273 @staticmethod 

274 def makeRepo(root: str, config: Union[Config, str, None] = None, 

275 dimensionConfig: Union[Config, str, None] = None, standalone: bool = False, 

276 searchPaths: Optional[List[str]] = None, forceConfigRoot: bool = True, 

277 outfile: Optional[str] = None, overwrite: bool = False) -> Config: 

278 """Create an empty data repository by adding a butler.yaml config 

279 to a repository root directory. 

280 

281 Parameters 

282 ---------- 

283 root : `str` or `ButlerURI` 

284 Path or URI to the root location of the new repository. Will be 

285 created if it does not exist. 

286 config : `Config` or `str`, optional 

287 Configuration to write to the repository, after setting any 

288 root-dependent Registry or Datastore config options. Can not 

289 be a `ButlerConfig` or a `ConfigSubset`. If `None`, default 

290 configuration will be used. Root-dependent config options 

291 specified in this config are overwritten if ``forceConfigRoot`` 

292 is `True`. 

293 dimensionConfig : `Config` or `str`, optional 

294 Configuration for dimensions, will be used to initialize registry 

295 database. 

296 standalone : `bool` 

297 If True, write all expanded defaults, not just customized or 

298 repository-specific settings. 

299 This (mostly) decouples the repository from the default 

300 configuration, insulating it from changes to the defaults (which 

301 may be good or bad, depending on the nature of the changes). 

302 Future *additions* to the defaults will still be picked up when 

303 initializing `Butlers` to repos created with ``standalone=True``. 

304 searchPaths : `list` of `str`, optional 

305 Directory paths to search when calculating the full butler 

306 configuration. 

307 forceConfigRoot : `bool`, optional 

308 If `False`, any values present in the supplied ``config`` that 

309 would normally be reset are not overridden and will appear 

310 directly in the output config. This allows non-standard overrides 

311 of the root directory for a datastore or registry to be given. 

312 If this parameter is `True` the values for ``root`` will be 

313 forced into the resulting config if appropriate. 

314 outfile : `str`, optional 

315 If not-`None`, the output configuration will be written to this 

316 location rather than into the repository itself. Can be a URI 

317 string. Can refer to a directory that will be used to write 

318 ``butler.yaml``. 

319 overwrite : `bool`, optional 

320 Create a new configuration file even if one already exists 

321 in the specified output location. Default is to raise 

322 an exception. 

323 

324 Returns 

325 ------- 

326 config : `Config` 

327 The updated `Config` instance written to the repo. 

328 

329 Raises 

330 ------ 

331 ValueError 

332 Raised if a ButlerConfig or ConfigSubset is passed instead of a 

333 regular Config (as these subclasses would make it impossible to 

334 support ``standalone=False``). 

335 FileExistsError 

336 Raised if the output config file already exists. 

337 os.error 

338 Raised if the directory does not exist, exists but is not a 

339 directory, or cannot be created. 

340 

341 Notes 

342 ----- 

343 Note that when ``standalone=False`` (the default), the configuration 

344 search path (see `ConfigSubset.defaultSearchPaths`) that was used to 

345 construct the repository should also be used to construct any Butlers 

346 to avoid configuration inconsistencies. 

347 """ 

348 if isinstance(config, (ButlerConfig, ConfigSubset)): 

349 raise ValueError("makeRepo must be passed a regular Config without defaults applied.") 

350 

351 # Ensure that the root of the repository exists or can be made 

352 uri = ButlerURI(root, forceDirectory=True) 

353 uri.mkdir() 

354 

355 config = Config(config) 

356 

357 # If we are creating a new repo from scratch with relative roots, 

358 # do not propagate an explicit root from the config file 

359 if "root" in config: 

360 del config["root"] 

361 

362 full = ButlerConfig(config, searchPaths=searchPaths) # this applies defaults 

363 datastoreClass: Type[Datastore] = doImport(full["datastore", "cls"]) 

364 datastoreClass.setConfigRoot(BUTLER_ROOT_TAG, config, full, overwrite=forceConfigRoot) 

365 

366 # if key exists in given config, parse it, otherwise parse the defaults 

367 # in the expanded config 

368 if config.get(("registry", "db")): 

369 registryConfig = RegistryConfig(config) 

370 else: 

371 registryConfig = RegistryConfig(full) 

372 defaultDatabaseUri = registryConfig.makeDefaultDatabaseUri(BUTLER_ROOT_TAG) 

373 if defaultDatabaseUri is not None: 

374 Config.updateParameters(RegistryConfig, config, full, 

375 toUpdate={"db": defaultDatabaseUri}, 

376 overwrite=forceConfigRoot) 

377 else: 

378 Config.updateParameters(RegistryConfig, config, full, toCopy=("db",), 

379 overwrite=forceConfigRoot) 

380 

381 if standalone: 

382 config.merge(full) 

383 else: 

384 # Always expand the registry.managers section into the per-repo 

385 # config, because after the database schema is created, it's not 

386 # allowed to change anymore. Note that in the standalone=True 

387 # branch, _everything_ in the config is expanded, so there's no 

388 # need to special case this. 

389 Config.updateParameters(RegistryConfig, config, full, toCopy=("managers",), overwrite=False) 

390 configURI: Union[str, ButlerURI] 

391 if outfile is not None: 

392 # When writing to a separate location we must include 

393 # the root of the butler repo in the config else it won't know 

394 # where to look. 

395 config["root"] = uri.geturl() 

396 configURI = outfile 

397 else: 

398 configURI = uri 

399 config.dumpToUri(configURI, overwrite=overwrite) 

400 

401 # Create Registry and populate tables 

402 registryConfig = RegistryConfig(config.get("registry")) 

403 dimensionConfig = DimensionConfig(dimensionConfig) 

404 Registry.createFromConfig(registryConfig, dimensionConfig=dimensionConfig, butlerRoot=root) 

405 

406 log.log(VERBOSE, "Wrote new Butler configuration file to %s", configURI) 

407 

408 return config 

409 

410 @classmethod 

411 def _unpickle(cls, config: ButlerConfig, collections: Optional[CollectionSearch], run: Optional[str], 

412 defaultDataId: Dict[str, str], writeable: bool) -> Butler: 

413 """Callable used to unpickle a Butler. 

414 

415 We prefer not to use ``Butler.__init__`` directly so we can force some 

416 of its many arguments to be keyword-only (note that ``__reduce__`` 

417 can only invoke callables with positional arguments). 

418 

419 Parameters 

420 ---------- 

421 config : `ButlerConfig` 

422 Butler configuration, already coerced into a true `ButlerConfig` 

423 instance (and hence after any search paths for overrides have been 

424 utilized). 

425 collections : `CollectionSearch` 

426 Names of the default collections to read from. 

427 run : `str`, optional 

428 Name of the default `~CollectionType.RUN` collection to write to. 

429 defaultDataId : `dict` [ `str`, `str` ] 

430 Default data ID values. 

431 writeable : `bool` 

432 Whether the Butler should support write operations. 

433 

434 Returns 

435 ------- 

436 butler : `Butler` 

437 A new `Butler` instance. 

438 """ 

439 # MyPy doesn't recognize that the kwargs below are totally valid; it 

440 # seems to think '**defaultDataId* is a _positional_ argument! 

441 return cls(config=config, collections=collections, run=run, writeable=writeable, 

442 **defaultDataId) # type: ignore 

443 

444 def __reduce__(self) -> tuple: 

445 """Support pickling. 

446 """ 

447 return (Butler._unpickle, (self._config, self.collections, self.run, 

448 self.registry.defaults.dataId.byName(), 

449 self.registry.isWriteable())) 

450 

451 def __str__(self) -> str: 

452 return "Butler(collections={}, run={}, datastore='{}', registry='{}')".format( 

453 self.collections, self.run, self.datastore, self.registry) 

454 

455 def isWriteable(self) -> bool: 

456 """Return `True` if this `Butler` supports write operations. 

457 """ 

458 return self.registry.isWriteable() 

459 

460 @contextlib.contextmanager 

461 def transaction(self) -> Iterator[None]: 

462 """Context manager supporting `Butler` transactions. 

463 

464 Transactions can be nested. 

465 """ 

466 with self.registry.transaction(): 

467 with self.datastore.transaction(): 

468 yield 

469 

470 def _standardizeArgs(self, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

471 dataId: Optional[DataId] = None, **kwds: Any 

472 ) -> Tuple[DatasetType, Optional[DataId]]: 

473 """Standardize the arguments passed to several Butler APIs. 

474 

475 Parameters 

476 ---------- 

477 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

478 When `DatasetRef` the `dataId` should be `None`. 

479 Otherwise the `DatasetType` or name thereof. 

480 dataId : `dict` or `DataCoordinate` 

481 A `dict` of `Dimension` link name, value pairs that label the 

482 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

483 should be provided as the second argument. 

484 kwds 

485 Additional keyword arguments used to augment or construct a 

486 `DataCoordinate`. See `DataCoordinate.standardize` 

487 parameters. 

488 

489 Returns 

490 ------- 

491 datasetType : `DatasetType` 

492 A `DatasetType` instance extracted from ``datasetRefOrType``. 

493 dataId : `dict` or `DataId`, optional 

494 Argument that can be used (along with ``kwds``) to construct a 

495 `DataId`. 

496 

497 Notes 

498 ----- 

499 Butler APIs that conceptually need a DatasetRef also allow passing a 

500 `DatasetType` (or the name of one) and a `DataId` (or a dict and 

501 keyword arguments that can be used to construct one) separately. This 

502 method accepts those arguments and always returns a true `DatasetType` 

503 and a `DataId` or `dict`. 

504 

505 Standardization of `dict` vs `DataId` is best handled by passing the 

506 returned ``dataId`` (and ``kwds``) to `Registry` APIs, which are 

507 generally similarly flexible. 

508 """ 

509 externalDatasetType: Optional[DatasetType] = None 

510 internalDatasetType: Optional[DatasetType] = None 

511 if isinstance(datasetRefOrType, DatasetRef): 

512 if dataId is not None or kwds: 

513 raise ValueError("DatasetRef given, cannot use dataId as well") 

514 externalDatasetType = datasetRefOrType.datasetType 

515 dataId = datasetRefOrType.dataId 

516 else: 

517 # Don't check whether DataId is provided, because Registry APIs 

518 # can usually construct a better error message when it wasn't. 

519 if isinstance(datasetRefOrType, DatasetType): 

520 externalDatasetType = datasetRefOrType 

521 else: 

522 internalDatasetType = self.registry.getDatasetType(datasetRefOrType) 

523 

524 # Check that they are self-consistent 

525 if externalDatasetType is not None: 

526 internalDatasetType = self.registry.getDatasetType(externalDatasetType.name) 

527 if externalDatasetType != internalDatasetType: 

528 raise ValueError(f"Supplied dataset type ({externalDatasetType}) inconsistent with " 

529 f"registry definition ({internalDatasetType})") 

530 

531 assert internalDatasetType is not None 

532 return internalDatasetType, dataId 

533 

534 def _findDatasetRef(self, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

535 dataId: Optional[DataId] = None, *, 

536 collections: Any = None, 

537 allowUnresolved: bool = False, 

538 **kwds: Any) -> DatasetRef: 

539 """Shared logic for methods that start with a search for a dataset in 

540 the registry. 

541 

542 Parameters 

543 ---------- 

544 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

545 When `DatasetRef` the `dataId` should be `None`. 

546 Otherwise the `DatasetType` or name thereof. 

547 dataId : `dict` or `DataCoordinate`, optional 

548 A `dict` of `Dimension` link name, value pairs that label the 

549 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

550 should be provided as the first argument. 

551 collections : Any, optional 

552 Collections to be searched, overriding ``self.collections``. 

553 Can be any of the types supported by the ``collections`` argument 

554 to butler construction. 

555 allowUnresolved : `bool`, optional 

556 If `True`, return an unresolved `DatasetRef` if finding a resolved 

557 one in the `Registry` fails. Defaults to `False`. 

558 kwds 

559 Additional keyword arguments used to augment or construct a 

560 `DataId`. See `DataId` parameters. 

561 

562 Returns 

563 ------- 

564 ref : `DatasetRef` 

565 A reference to the dataset identified by the given arguments. 

566 

567 Raises 

568 ------ 

569 LookupError 

570 Raised if no matching dataset exists in the `Registry` (and 

571 ``allowUnresolved is False``). 

572 ValueError 

573 Raised if a resolved `DatasetRef` was passed as an input, but it 

574 differs from the one found in the registry. 

575 TypeError 

576 Raised if no collections were provided. 

577 """ 

578 datasetType, dataId = self._standardizeArgs(datasetRefOrType, dataId, **kwds) 

579 if isinstance(datasetRefOrType, DatasetRef): 

580 idNumber = datasetRefOrType.id 

581 else: 

582 idNumber = None 

583 timespan: Optional[Timespan] = None 

584 

585 # Process dimension records that are using record information 

586 # rather than ids 

587 newDataId: Dict[str, DataIdValue] = {} 

588 byRecord: Dict[str, Dict[str, Any]] = defaultdict(dict) 

589 

590 # if all the dataId comes from keyword parameters we do not need 

591 # to do anything here because they can't be of the form 

592 # exposure.obs_id because a "." is not allowed in a keyword parameter. 

593 if dataId: 

594 for k, v in dataId.items(): 

595 # If we have a Dimension we do not need to do anything 

596 # because it cannot be a compound key. 

597 if isinstance(k, str) and "." in k: 

598 # Someone is using a more human-readable dataId 

599 dimensionName, record = k.split(".", 1) 

600 byRecord[dimensionName][record] = v 

601 elif isinstance(k, Dimension): 

602 newDataId[k.name] = v 

603 else: 

604 newDataId[k] = v 

605 

606 # Go through the updated dataId and check the type in case someone is 

607 # using an alternate key. We have already filtered out the compound 

608 # keys dimensions.record format. 

609 not_dimensions = {} 

610 

611 # Will need to look in the dataId and the keyword arguments 

612 # and will remove them if they need to be fixed or are unrecognized. 

613 for dataIdDict in (newDataId, kwds): 

614 # Use a list so we can adjust the dict safely in the loop 

615 for dimensionName in list(dataIdDict): 

616 value = dataIdDict[dimensionName] 

617 try: 

618 dimension = self.registry.dimensions.getStaticDimensions()[dimensionName] 

619 except KeyError: 

620 # This is not a real dimension 

621 not_dimensions[dimensionName] = value 

622 del dataIdDict[dimensionName] 

623 continue 

624 

625 # Convert an integral type to an explicit int to simplify 

626 # comparisons here 

627 if isinstance(value, numbers.Integral): 

628 value = int(value) 

629 

630 if not isinstance(value, dimension.primaryKey.getPythonType()): 

631 for alternate in dimension.alternateKeys: 

632 if isinstance(value, alternate.getPythonType()): 

633 byRecord[dimensionName][alternate.name] = value 

634 del dataIdDict[dimensionName] 

635 log.debug("Converting dimension %s to %s.%s=%s", 

636 dimensionName, dimensionName, alternate.name, value) 

637 break 

638 else: 

639 log.warning("Type mismatch found for value '%r' provided for dimension %s. " 

640 "Could not find matching alternative (primary key has type %s) " 

641 "so attempting to use as-is.", 

642 value, dimensionName, dimension.primaryKey.getPythonType()) 

643 

644 # If we have some unrecognized dimensions we have to try to connect 

645 # them to records in other dimensions. This is made more complicated 

646 # by some dimensions having records with clashing names. A mitigation 

647 # is that we can tell by this point which dimensions are missing 

648 # for the DatasetType but this does not work for calibrations 

649 # where additional dimensions can be used to constrain the temporal 

650 # axis. 

651 if not_dimensions: 

652 # Calculate missing dimensions 

653 provided = set(newDataId) | set(kwds) | set(byRecord) 

654 missingDimensions = datasetType.dimensions.names - provided 

655 

656 # For calibrations we may well be needing temporal dimensions 

657 # so rather than always including all dimensions in the scan 

658 # restrict things a little. It is still possible for there 

659 # to be confusion over day_obs in visit vs exposure for example. 

660 # If we are not searching calibration collections things may 

661 # fail but they are going to fail anyway because of the 

662 # ambiguousness of the dataId... 

663 candidateDimensions: Set[str] = set() 

664 candidateDimensions.update(missingDimensions) 

665 if datasetType.isCalibration(): 

666 for dim in self.registry.dimensions.getStaticDimensions(): 

667 if dim.temporal: 

668 candidateDimensions.add(str(dim)) 

669 

670 # Look up table for the first association with a dimension 

671 guessedAssociation: Dict[str, Dict[str, Any]] = defaultdict(dict) 

672 

673 # Keep track of whether an item is associated with multiple 

674 # dimensions. 

675 counter: Counter[str] = Counter() 

676 assigned: Dict[str, Set[str]] = defaultdict(set) 

677 

678 # Go through the missing dimensions and associate the 

679 # given names with records within those dimensions 

680 for dimensionName in candidateDimensions: 

681 dimension = self.registry.dimensions.getStaticDimensions()[dimensionName] 

682 fields = dimension.metadata.names | dimension.uniqueKeys.names 

683 for field in not_dimensions: 

684 if field in fields: 

685 guessedAssociation[dimensionName][field] = not_dimensions[field] 

686 counter[dimensionName] += 1 

687 assigned[field].add(dimensionName) 

688 

689 # There is a chance we have allocated a single dataId item 

690 # to multiple dimensions. Need to decide which should be retained. 

691 # For now assume that the most popular alternative wins. 

692 # This means that day_obs with seq_num will result in 

693 # exposure.day_obs and not visit.day_obs 

694 # Also prefer an explicitly missing dimension over an inferred 

695 # temporal dimension. 

696 for fieldName, assignedDimensions in assigned.items(): 

697 if len(assignedDimensions) > 1: 

698 # Pick the most popular (preferring mandatory dimensions) 

699 requiredButMissing = assignedDimensions.intersection(missingDimensions) 

700 if requiredButMissing: 

701 candidateDimensions = requiredButMissing 

702 else: 

703 candidateDimensions = assignedDimensions 

704 

705 # Select the relevant items and get a new restricted 

706 # counter. 

707 theseCounts = {k: v for k, v in counter.items() if k in candidateDimensions} 

708 duplicatesCounter: Counter[str] = Counter() 

709 duplicatesCounter.update(theseCounts) 

710 

711 # Choose the most common. If they are equally common 

712 # we will pick the one that was found first. 

713 # Returns a list of tuples 

714 selected = duplicatesCounter.most_common(1)[0][0] 

715 

716 log.debug("Ambiguous dataId entry '%s' associated with multiple dimensions: %s." 

717 " Removed ambiguity by choosing dimension %s.", 

718 fieldName, ", ".join(assignedDimensions), selected) 

719 

720 for candidateDimension in assignedDimensions: 

721 if candidateDimension != selected: 

722 del guessedAssociation[candidateDimension][fieldName] 

723 

724 # Update the record look up dict with the new associations 

725 for dimensionName, values in guessedAssociation.items(): 

726 if values: # A dict might now be empty 

727 log.debug("Assigned non-dimension dataId keys to dimension %s: %s", 

728 dimensionName, values) 

729 byRecord[dimensionName].update(values) 

730 

731 if byRecord: 

732 # Some record specifiers were found so we need to convert 

733 # them to the Id form 

734 for dimensionName, values in byRecord.items(): 

735 if dimensionName in newDataId: 

736 log.warning("DataId specified explicit %s dimension value of %s in addition to" 

737 " general record specifiers for it of %s. Ignoring record information.", 

738 dimensionName, newDataId[dimensionName], str(values)) 

739 continue 

740 

741 # Build up a WHERE expression -- use single quotes 

742 def quote(s: Any) -> str: 

743 if isinstance(s, str): 

744 return f"'{s}'" 

745 else: 

746 return s 

747 

748 where = " AND ".join(f"{dimensionName}.{k} = {quote(v)}" 

749 for k, v in values.items()) 

750 

751 # Hopefully we get a single record that matches 

752 records = set(self.registry.queryDimensionRecords(dimensionName, dataId=newDataId, 

753 where=where, **kwds)) 

754 

755 if len(records) != 1: 

756 if len(records) > 1: 

757 log.debug("Received %d records from constraints of %s", len(records), str(values)) 

758 for r in records: 

759 log.debug("- %s", str(r)) 

760 raise RuntimeError(f"DataId specification for dimension {dimensionName} is not" 

761 f" uniquely constrained to a single dataset by {values}." 

762 f" Got {len(records)} results.") 

763 raise RuntimeError(f"DataId specification for dimension {dimensionName} matched no" 

764 f" records when constrained by {values}") 

765 

766 # Get the primary key from the real dimension object 

767 dimension = self.registry.dimensions.getStaticDimensions()[dimensionName] 

768 if not isinstance(dimension, Dimension): 

769 raise RuntimeError( 

770 f"{dimension.name} is not a true dimension, and cannot be used in data IDs." 

771 ) 

772 newDataId[dimensionName] = getattr(records.pop(), dimension.primaryKey.name) 

773 

774 # We have modified the dataId so need to switch to it 

775 dataId = newDataId 

776 

777 if datasetType.isCalibration(): 

778 # Because this is a calibration dataset, first try to make a 

779 # standardize the data ID without restricting the dimensions to 

780 # those of the dataset type requested, because there may be extra 

781 # dimensions that provide temporal information for a validity-range 

782 # lookup. 

783 dataId = DataCoordinate.standardize(dataId, universe=self.registry.dimensions, 

784 defaults=self.registry.defaults.dataId, **kwds) 

785 if dataId.graph.temporal: 

786 dataId = self.registry.expandDataId(dataId) 

787 timespan = dataId.timespan 

788 else: 

789 # Standardize the data ID to just the dimensions of the dataset 

790 # type instead of letting registry.findDataset do it, so we get the 

791 # result even if no dataset is found. 

792 dataId = DataCoordinate.standardize(dataId, graph=datasetType.dimensions, 

793 defaults=self.registry.defaults.dataId, **kwds) 

794 # Always lookup the DatasetRef, even if one is given, to ensure it is 

795 # present in the current collection. 

796 ref = self.registry.findDataset(datasetType, dataId, collections=collections, timespan=timespan) 

797 if ref is None: 

798 if allowUnresolved: 

799 return DatasetRef(datasetType, dataId) 

800 else: 

801 if collections is None: 

802 collections = self.registry.defaults.collections 

803 raise LookupError(f"Dataset {datasetType.name} with data ID {dataId} " 

804 f"could not be found in collections {collections}.") 

805 if idNumber is not None and idNumber != ref.id: 

806 if collections is None: 

807 collections = self.registry.defaults.collections 

808 raise ValueError(f"DatasetRef.id provided ({idNumber}) does not match " 

809 f"id ({ref.id}) in registry in collections {collections}.") 

810 return ref 

811 

812 @transactional 

813 def put(self, obj: Any, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

814 dataId: Optional[DataId] = None, *, 

815 run: Optional[str] = None, 

816 **kwds: Any) -> DatasetRef: 

817 """Store and register a dataset. 

818 

819 Parameters 

820 ---------- 

821 obj : `object` 

822 The dataset. 

823 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

824 When `DatasetRef` is provided, ``dataId`` should be `None`. 

825 Otherwise the `DatasetType` or name thereof. 

826 dataId : `dict` or `DataCoordinate` 

827 A `dict` of `Dimension` link name, value pairs that label the 

828 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

829 should be provided as the second argument. 

830 run : `str`, optional 

831 The name of the run the dataset should be added to, overriding 

832 ``self.run``. 

833 kwds 

834 Additional keyword arguments used to augment or construct a 

835 `DataCoordinate`. See `DataCoordinate.standardize` 

836 parameters. 

837 

838 Returns 

839 ------- 

840 ref : `DatasetRef` 

841 A reference to the stored dataset, updated with the correct id if 

842 given. 

843 

844 Raises 

845 ------ 

846 TypeError 

847 Raised if the butler is read-only or if no run has been provided. 

848 """ 

849 log.debug("Butler put: %s, dataId=%s, run=%s", datasetRefOrType, dataId, run) 

850 if not self.isWriteable(): 

851 raise TypeError("Butler is read-only.") 

852 datasetType, dataId = self._standardizeArgs(datasetRefOrType, dataId, **kwds) 

853 if isinstance(datasetRefOrType, DatasetRef) and datasetRefOrType.id is not None: 

854 raise ValueError("DatasetRef must not be in registry, must have None id") 

855 

856 # Add Registry Dataset entry. 

857 dataId = self.registry.expandDataId(dataId, graph=datasetType.dimensions, **kwds) 

858 ref, = self.registry.insertDatasets(datasetType, run=run, dataIds=[dataId]) 

859 

860 # Add Datastore entry. 

861 self.datastore.put(obj, ref) 

862 

863 return ref 

864 

865 def getDirect(self, ref: DatasetRef, *, parameters: Optional[Dict[str, Any]] = None) -> Any: 

866 """Retrieve a stored dataset. 

867 

868 Unlike `Butler.get`, this method allows datasets outside the Butler's 

869 collection to be read as long as the `DatasetRef` that identifies them 

870 can be obtained separately. 

871 

872 Parameters 

873 ---------- 

874 ref : `DatasetRef` 

875 Resolved reference to an already stored dataset. 

876 parameters : `dict` 

877 Additional StorageClass-defined options to control reading, 

878 typically used to efficiently read only a subset of the dataset. 

879 

880 Returns 

881 ------- 

882 obj : `object` 

883 The dataset. 

884 """ 

885 return self.datastore.get(ref, parameters=parameters) 

886 

887 def getDirectDeferred(self, ref: DatasetRef, *, 

888 parameters: Union[dict, None] = None) -> DeferredDatasetHandle: 

889 """Create a `DeferredDatasetHandle` which can later retrieve a dataset, 

890 from a resolved `DatasetRef`. 

891 

892 Parameters 

893 ---------- 

894 ref : `DatasetRef` 

895 Resolved reference to an already stored dataset. 

896 parameters : `dict` 

897 Additional StorageClass-defined options to control reading, 

898 typically used to efficiently read only a subset of the dataset. 

899 

900 Returns 

901 ------- 

902 obj : `DeferredDatasetHandle` 

903 A handle which can be used to retrieve a dataset at a later time. 

904 

905 Raises 

906 ------ 

907 AmbiguousDatasetError 

908 Raised if ``ref.id is None``, i.e. the reference is unresolved. 

909 """ 

910 if ref.id is None: 

911 raise AmbiguousDatasetError( 

912 f"Dataset of type {ref.datasetType.name} with data ID {ref.dataId} is not resolved." 

913 ) 

914 return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters) 

915 

916 def getDeferred(self, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

917 dataId: Optional[DataId] = None, *, 

918 parameters: Union[dict, None] = None, 

919 collections: Any = None, 

920 **kwds: Any) -> DeferredDatasetHandle: 

921 """Create a `DeferredDatasetHandle` which can later retrieve a dataset, 

922 after an immediate registry lookup. 

923 

924 Parameters 

925 ---------- 

926 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

927 When `DatasetRef` the `dataId` should be `None`. 

928 Otherwise the `DatasetType` or name thereof. 

929 dataId : `dict` or `DataCoordinate`, optional 

930 A `dict` of `Dimension` link name, value pairs that label the 

931 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

932 should be provided as the first argument. 

933 parameters : `dict` 

934 Additional StorageClass-defined options to control reading, 

935 typically used to efficiently read only a subset of the dataset. 

936 collections : Any, optional 

937 Collections to be searched, overriding ``self.collections``. 

938 Can be any of the types supported by the ``collections`` argument 

939 to butler construction. 

940 kwds 

941 Additional keyword arguments used to augment or construct a 

942 `DataId`. See `DataId` parameters. 

943 

944 Returns 

945 ------- 

946 obj : `DeferredDatasetHandle` 

947 A handle which can be used to retrieve a dataset at a later time. 

948 

949 Raises 

950 ------ 

951 LookupError 

952 Raised if no matching dataset exists in the `Registry` (and 

953 ``allowUnresolved is False``). 

954 ValueError 

955 Raised if a resolved `DatasetRef` was passed as an input, but it 

956 differs from the one found in the registry. 

957 TypeError 

958 Raised if no collections were provided. 

959 """ 

960 ref = self._findDatasetRef(datasetRefOrType, dataId, collections=collections, **kwds) 

961 return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters) 

962 

963 def get(self, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

964 dataId: Optional[DataId] = None, *, 

965 parameters: Optional[Dict[str, Any]] = None, 

966 collections: Any = None, 

967 **kwds: Any) -> Any: 

968 """Retrieve a stored dataset. 

969 

970 Parameters 

971 ---------- 

972 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

973 When `DatasetRef` the `dataId` should be `None`. 

974 Otherwise the `DatasetType` or name thereof. 

975 dataId : `dict` or `DataCoordinate` 

976 A `dict` of `Dimension` link name, value pairs that label the 

977 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

978 should be provided as the first argument. 

979 parameters : `dict` 

980 Additional StorageClass-defined options to control reading, 

981 typically used to efficiently read only a subset of the dataset. 

982 collections : Any, optional 

983 Collections to be searched, overriding ``self.collections``. 

984 Can be any of the types supported by the ``collections`` argument 

985 to butler construction. 

986 kwds 

987 Additional keyword arguments used to augment or construct a 

988 `DataCoordinate`. See `DataCoordinate.standardize` 

989 parameters. 

990 

991 Returns 

992 ------- 

993 obj : `object` 

994 The dataset. 

995 

996 Raises 

997 ------ 

998 ValueError 

999 Raised if a resolved `DatasetRef` was passed as an input, but it 

1000 differs from the one found in the registry. 

1001 LookupError 

1002 Raised if no matching dataset exists in the `Registry`. 

1003 TypeError 

1004 Raised if no collections were provided. 

1005 

1006 Notes 

1007 ----- 

1008 When looking up datasets in a `~CollectionType.CALIBRATION` collection, 

1009 this method requires that the given data ID include temporal dimensions 

1010 beyond the dimensions of the dataset type itself, in order to find the 

1011 dataset with the appropriate validity range. For example, a "bias" 

1012 dataset with native dimensions ``{instrument, detector}`` could be 

1013 fetched with a ``{instrument, detector, exposure}`` data ID, because 

1014 ``exposure`` is a temporal dimension. 

1015 """ 

1016 log.debug("Butler get: %s, dataId=%s, parameters=%s", datasetRefOrType, dataId, parameters) 

1017 ref = self._findDatasetRef(datasetRefOrType, dataId, collections=collections, **kwds) 

1018 return self.getDirect(ref, parameters=parameters) 

1019 

1020 def getURIs(self, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

1021 dataId: Optional[DataId] = None, *, 

1022 predict: bool = False, 

1023 collections: Any = None, 

1024 run: Optional[str] = None, 

1025 **kwds: Any) -> Tuple[Optional[ButlerURI], Dict[str, ButlerURI]]: 

1026 """Returns the URIs associated with the dataset. 

1027 

1028 Parameters 

1029 ---------- 

1030 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

1031 When `DatasetRef` the `dataId` should be `None`. 

1032 Otherwise the `DatasetType` or name thereof. 

1033 dataId : `dict` or `DataCoordinate` 

1034 A `dict` of `Dimension` link name, value pairs that label the 

1035 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

1036 should be provided as the first argument. 

1037 predict : `bool` 

1038 If `True`, allow URIs to be returned of datasets that have not 

1039 been written. 

1040 collections : Any, optional 

1041 Collections to be searched, overriding ``self.collections``. 

1042 Can be any of the types supported by the ``collections`` argument 

1043 to butler construction. 

1044 run : `str`, optional 

1045 Run to use for predictions, overriding ``self.run``. 

1046 kwds 

1047 Additional keyword arguments used to augment or construct a 

1048 `DataCoordinate`. See `DataCoordinate.standardize` 

1049 parameters. 

1050 

1051 Returns 

1052 ------- 

1053 primary : `ButlerURI` 

1054 The URI to the primary artifact associated with this dataset. 

1055 If the dataset was disassembled within the datastore this 

1056 may be `None`. 

1057 components : `dict` 

1058 URIs to any components associated with the dataset artifact. 

1059 Can be empty if there are no components. 

1060 """ 

1061 ref = self._findDatasetRef(datasetRefOrType, dataId, allowUnresolved=predict, 

1062 collections=collections, **kwds) 

1063 if ref.id is None: # only possible if predict is True 

1064 if run is None: 

1065 run = self.run 

1066 if run is None: 

1067 raise TypeError("Cannot predict location with run=None.") 

1068 # Lie about ID, because we can't guess it, and only 

1069 # Datastore.getURIs() will ever see it (and it doesn't use it). 

1070 ref = ref.resolved(id=0, run=run) 

1071 return self.datastore.getURIs(ref, predict) 

1072 

1073 def getURI(self, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

1074 dataId: Optional[DataId] = None, *, 

1075 predict: bool = False, 

1076 collections: Any = None, 

1077 run: Optional[str] = None, 

1078 **kwds: Any) -> ButlerURI: 

1079 """Return the URI to the Dataset. 

1080 

1081 Parameters 

1082 ---------- 

1083 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

1084 When `DatasetRef` the `dataId` should be `None`. 

1085 Otherwise the `DatasetType` or name thereof. 

1086 dataId : `dict` or `DataCoordinate` 

1087 A `dict` of `Dimension` link name, value pairs that label the 

1088 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

1089 should be provided as the first argument. 

1090 predict : `bool` 

1091 If `True`, allow URIs to be returned of datasets that have not 

1092 been written. 

1093 collections : Any, optional 

1094 Collections to be searched, overriding ``self.collections``. 

1095 Can be any of the types supported by the ``collections`` argument 

1096 to butler construction. 

1097 run : `str`, optional 

1098 Run to use for predictions, overriding ``self.run``. 

1099 kwds 

1100 Additional keyword arguments used to augment or construct a 

1101 `DataCoordinate`. See `DataCoordinate.standardize` 

1102 parameters. 

1103 

1104 Returns 

1105 ------- 

1106 uri : `ButlerURI` 

1107 URI pointing to the Dataset within the datastore. If the 

1108 Dataset does not exist in the datastore, and if ``predict`` is 

1109 `True`, the URI will be a prediction and will include a URI 

1110 fragment "#predicted". 

1111 If the datastore does not have entities that relate well 

1112 to the concept of a URI the returned URI string will be 

1113 descriptive. The returned URI is not guaranteed to be obtainable. 

1114 

1115 Raises 

1116 ------ 

1117 LookupError 

1118 A URI has been requested for a dataset that does not exist and 

1119 guessing is not allowed. 

1120 ValueError 

1121 Raised if a resolved `DatasetRef` was passed as an input, but it 

1122 differs from the one found in the registry. 

1123 TypeError 

1124 Raised if no collections were provided. 

1125 RuntimeError 

1126 Raised if a URI is requested for a dataset that consists of 

1127 multiple artifacts. 

1128 """ 

1129 primary, components = self.getURIs(datasetRefOrType, dataId=dataId, predict=predict, 

1130 collections=collections, run=run, **kwds) 

1131 

1132 if primary is None or components: 

1133 raise RuntimeError(f"Dataset ({datasetRefOrType}) includes distinct URIs for components. " 

1134 "Use Butler.getURIs() instead.") 

1135 return primary 

1136 

1137 def retrieveArtifacts(self, refs: Iterable[DatasetRef], 

1138 destination: Union[str, ButlerURI], transfer: str = "auto", 

1139 preserve_path: bool = True, 

1140 overwrite: bool = False) -> List[ButlerURI]: 

1141 """Retrieve the artifacts associated with the supplied refs. 

1142 

1143 Parameters 

1144 ---------- 

1145 refs : iterable of `DatasetRef` 

1146 The datasets for which artifacts are to be retrieved. 

1147 A single ref can result in multiple artifacts. The refs must 

1148 be resolved. 

1149 destination : `ButlerURI` or `str` 

1150 Location to write the artifacts. 

1151 transfer : `str`, optional 

1152 Method to use to transfer the artifacts. Must be one of the options 

1153 supported by `ButlerURI.transfer_from()`. "move" is not allowed. 

1154 preserve_path : `bool`, optional 

1155 If `True` the full path of the artifact within the datastore 

1156 is preserved. If `False` the final file component of the path 

1157 is used. 

1158 overwrite : `bool`, optional 

1159 If `True` allow transfers to overwrite existing files at the 

1160 destination. 

1161 

1162 Returns 

1163 ------- 

1164 targets : `list` of `ButlerURI` 

1165 URIs of file artifacts in destination location. Order is not 

1166 preserved. 

1167 

1168 Notes 

1169 ----- 

1170 For non-file datastores the artifacts written to the destination 

1171 may not match the representation inside the datastore. For example 

1172 a hierarchical data structure in a NoSQL database may well be stored 

1173 as a JSON file. 

1174 """ 

1175 return self.datastore.retrieveArtifacts(refs, ButlerURI(destination), transfer=transfer, 

1176 preserve_path=preserve_path, overwrite=overwrite) 

1177 

1178 def datasetExists(self, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

1179 dataId: Optional[DataId] = None, *, 

1180 collections: Any = None, 

1181 **kwds: Any) -> bool: 

1182 """Return True if the Dataset is actually present in the Datastore. 

1183 

1184 Parameters 

1185 ---------- 

1186 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

1187 When `DatasetRef` the `dataId` should be `None`. 

1188 Otherwise the `DatasetType` or name thereof. 

1189 dataId : `dict` or `DataCoordinate` 

1190 A `dict` of `Dimension` link name, value pairs that label the 

1191 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

1192 should be provided as the first argument. 

1193 collections : Any, optional 

1194 Collections to be searched, overriding ``self.collections``. 

1195 Can be any of the types supported by the ``collections`` argument 

1196 to butler construction. 

1197 kwds 

1198 Additional keyword arguments used to augment or construct a 

1199 `DataCoordinate`. See `DataCoordinate.standardize` 

1200 parameters. 

1201 

1202 Raises 

1203 ------ 

1204 LookupError 

1205 Raised if the dataset is not even present in the Registry. 

1206 ValueError 

1207 Raised if a resolved `DatasetRef` was passed as an input, but it 

1208 differs from the one found in the registry. 

1209 TypeError 

1210 Raised if no collections were provided. 

1211 """ 

1212 ref = self._findDatasetRef(datasetRefOrType, dataId, collections=collections, **kwds) 

1213 return self.datastore.exists(ref) 

1214 

1215 def removeRuns(self, names: Iterable[str], unstore: bool = True) -> None: 

1216 """Remove one or more `~CollectionType.RUN` collections and the 

1217 datasets within them. 

1218 

1219 Parameters 

1220 ---------- 

1221 names : `Iterable` [ `str` ] 

1222 The names of the collections to remove. 

1223 unstore : `bool`, optional 

1224 If `True` (default), delete datasets from all datastores in which 

1225 they are present, and attempt to rollback the registry deletions if 

1226 datastore deletions fail (which may not always be possible). If 

1227 `False`, datastore records for these datasets are still removed, 

1228 but any artifacts (e.g. files) will not be. 

1229 

1230 Raises 

1231 ------ 

1232 TypeError 

1233 Raised if one or more collections are not of type 

1234 `~CollectionType.RUN`. 

1235 """ 

1236 if not self.isWriteable(): 

1237 raise TypeError("Butler is read-only.") 

1238 names = list(names) 

1239 refs: List[DatasetRef] = [] 

1240 for name in names: 

1241 collectionType = self.registry.getCollectionType(name) 

1242 if collectionType is not CollectionType.RUN: 

1243 raise TypeError(f"The collection type of '{name}' is {collectionType.name}, not RUN.") 

1244 refs.extend(self.registry.queryDatasets(..., collections=name, findFirst=True)) 

1245 with self.registry.transaction(): 

1246 if unstore: 

1247 self.datastore.trash(refs) 

1248 else: 

1249 self.datastore.forget(refs) 

1250 for name in names: 

1251 self.registry.removeCollection(name) 

1252 if unstore: 

1253 # Point of no return for removing artifacts 

1254 self.datastore.emptyTrash() 

1255 

1256 def pruneCollection(self, name: str, purge: bool = False, unstore: bool = False, 

1257 unlink: Optional[List[str]] = None) -> None: 

1258 """Remove a collection and possibly prune datasets within it. 

1259 

1260 Parameters 

1261 ---------- 

1262 name : `str` 

1263 Name of the collection to remove. If this is a 

1264 `~CollectionType.TAGGED` or `~CollectionType.CHAINED` collection, 

1265 datasets within the collection are not modified unless ``unstore`` 

1266 is `True`. If this is a `~CollectionType.RUN` collection, 

1267 ``purge`` and ``unstore`` must be `True`, and all datasets in it 

1268 are fully removed from the data repository. 

1269 purge : `bool`, optional 

1270 If `True`, permit `~CollectionType.RUN` collections to be removed, 

1271 fully removing datasets within them. Requires ``unstore=True`` as 

1272 well as an added precaution against accidental deletion. Must be 

1273 `False` (default) if the collection is not a ``RUN``. 

1274 unstore: `bool`, optional 

1275 If `True`, remove all datasets in the collection from all 

1276 datastores in which they appear. 

1277 unlink: `list` [`str`], optional 

1278 Before removing the given `collection` unlink it from from these 

1279 parent collections. 

1280 

1281 Raises 

1282 ------ 

1283 TypeError 

1284 Raised if the butler is read-only or arguments are mutually 

1285 inconsistent. 

1286 """ 

1287 # See pruneDatasets comments for more information about the logic here; 

1288 # the cases are almost the same, but here we can rely on Registry to 

1289 # take care everything but Datastore deletion when we remove the 

1290 # collection. 

1291 if not self.isWriteable(): 

1292 raise TypeError("Butler is read-only.") 

1293 collectionType = self.registry.getCollectionType(name) 

1294 if purge and not unstore: 

1295 raise PurgeWithoutUnstorePruneCollectionsError() 

1296 if collectionType is CollectionType.RUN and not purge: 

1297 raise RunWithoutPurgePruneCollectionsError(collectionType) 

1298 if collectionType is not CollectionType.RUN and purge: 

1299 raise PurgeUnsupportedPruneCollectionsError(collectionType) 

1300 

1301 def remove(child: str, parent: str) -> None: 

1302 """Remove a child collection from a parent collection.""" 

1303 # Remove child from parent. 

1304 chain = list(self.registry.getCollectionChain(parent)) 

1305 try: 

1306 chain.remove(name) 

1307 except ValueError as e: 

1308 raise RuntimeError(f"{name} is not a child of {parent}") from e 

1309 self.registry.setCollectionChain(parent, chain) 

1310 

1311 with self.registry.transaction(): 

1312 if (unlink): 

1313 for parent in unlink: 

1314 remove(name, parent) 

1315 if unstore: 

1316 refs = self.registry.queryDatasets(..., collections=name, findFirst=True) 

1317 self.datastore.trash(refs) 

1318 self.registry.removeCollection(name) 

1319 

1320 if unstore: 

1321 # Point of no return for removing artifacts 

1322 self.datastore.emptyTrash() 

1323 

1324 def pruneDatasets(self, refs: Iterable[DatasetRef], *, 

1325 disassociate: bool = True, 

1326 unstore: bool = False, 

1327 tags: Iterable[str] = (), 

1328 purge: bool = False, 

1329 run: Optional[str] = None) -> None: 

1330 """Remove one or more datasets from a collection and/or storage. 

1331 

1332 Parameters 

1333 ---------- 

1334 refs : `~collections.abc.Iterable` of `DatasetRef` 

1335 Datasets to prune. These must be "resolved" references (not just 

1336 a `DatasetType` and data ID). 

1337 disassociate : `bool`, optional 

1338 Disassociate pruned datasets from ``tags``, or from all collections 

1339 if ``purge=True``. 

1340 unstore : `bool`, optional 

1341 If `True` (`False` is default) remove these datasets from all 

1342 datastores known to this butler. Note that this will make it 

1343 impossible to retrieve these datasets even via other collections. 

1344 Datasets that are already not stored are ignored by this option. 

1345 tags : `Iterable` [ `str` ], optional 

1346 `~CollectionType.TAGGED` collections to disassociate the datasets 

1347 from. Ignored if ``disassociate`` is `False` or ``purge`` is 

1348 `True`. 

1349 purge : `bool`, optional 

1350 If `True` (`False` is default), completely remove the dataset from 

1351 the `Registry`. To prevent accidental deletions, ``purge`` may 

1352 only be `True` if all of the following conditions are met: 

1353 

1354 - All given datasets are in the given run. 

1355 - ``disassociate`` is `True`; 

1356 - ``unstore`` is `True`. 

1357 

1358 This mode may remove provenance information from datasets other 

1359 than those provided, and should be used with extreme care. 

1360 

1361 Raises 

1362 ------ 

1363 TypeError 

1364 Raised if the butler is read-only, if no collection was provided, 

1365 or the conditions for ``purge=True`` were not met. 

1366 """ 

1367 if not self.isWriteable(): 

1368 raise TypeError("Butler is read-only.") 

1369 if purge: 

1370 if not disassociate: 

1371 raise TypeError("Cannot pass purge=True without disassociate=True.") 

1372 if not unstore: 

1373 raise TypeError("Cannot pass purge=True without unstore=True.") 

1374 elif disassociate: 

1375 tags = tuple(tags) 

1376 if not tags: 

1377 raise TypeError("No tags provided but disassociate=True.") 

1378 for tag in tags: 

1379 collectionType = self.registry.getCollectionType(tag) 

1380 if collectionType is not CollectionType.TAGGED: 

1381 raise TypeError(f"Cannot disassociate from collection '{tag}' " 

1382 f"of non-TAGGED type {collectionType.name}.") 

1383 # Transform possibly-single-pass iterable into something we can iterate 

1384 # over multiple times. 

1385 refs = list(refs) 

1386 # Pruning a component of a DatasetRef makes no sense since registry 

1387 # doesn't know about components and datastore might not store 

1388 # components in a separate file 

1389 for ref in refs: 

1390 if ref.datasetType.component(): 

1391 raise ValueError(f"Can not prune a component of a dataset (ref={ref})") 

1392 # We don't need an unreliable Datastore transaction for this, because 

1393 # we've been extra careful to ensure that Datastore.trash only involves 

1394 # mutating the Registry (it can _look_ at Datastore-specific things, 

1395 # but shouldn't change them), and hence all operations here are 

1396 # Registry operations. 

1397 with self.registry.transaction(): 

1398 if unstore: 

1399 self.datastore.trash(refs) 

1400 if purge: 

1401 self.registry.removeDatasets(refs) 

1402 elif disassociate: 

1403 assert tags, "Guaranteed by earlier logic in this function." 

1404 for tag in tags: 

1405 self.registry.disassociate(tag, refs) 

1406 # We've exited the Registry transaction, and apparently committed. 

1407 # (if there was an exception, everything rolled back, and it's as if 

1408 # nothing happened - and we never get here). 

1409 # Datastore artifacts are not yet gone, but they're clearly marked 

1410 # as trash, so if we fail to delete now because of (e.g.) filesystem 

1411 # problems we can try again later, and if manual administrative 

1412 # intervention is required, it's pretty clear what that should entail: 

1413 # deleting everything on disk and in private Datastore tables that is 

1414 # in the dataset_location_trash table. 

1415 if unstore: 

1416 # Point of no return for removing artifacts 

1417 self.datastore.emptyTrash() 

1418 

1419 @transactional 

1420 def ingest(self, *datasets: FileDataset, transfer: Optional[str] = "auto", run: Optional[str] = None, 

1421 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

1422 ) -> None: 

1423 """Store and register one or more datasets that already exist on disk. 

1424 

1425 Parameters 

1426 ---------- 

1427 datasets : `FileDataset` 

1428 Each positional argument is a struct containing information about 

1429 a file to be ingested, including its path (either absolute or 

1430 relative to the datastore root, if applicable), a `DatasetRef`, 

1431 and optionally a formatter class or its fully-qualified string 

1432 name. If a formatter is not provided, the formatter that would be 

1433 used for `put` is assumed. On successful return, all 

1434 `FileDataset.ref` attributes will have their `DatasetRef.id` 

1435 attribute populated and all `FileDataset.formatter` attributes will 

1436 be set to the formatter class used. `FileDataset.path` attributes 

1437 may be modified to put paths in whatever the datastore considers a 

1438 standardized form. 

1439 transfer : `str`, optional 

1440 If not `None`, must be one of 'auto', 'move', 'copy', 'direct', 

1441 'hardlink', 'relsymlink' or 'symlink', indicating how to transfer 

1442 the file. 

1443 run : `str`, optional 

1444 The name of the run ingested datasets should be added to, 

1445 overriding ``self.run``. 

1446 idGenerationMode : `DatasetIdGenEnum`, optional 

1447 Specifies option for generating dataset IDs. By default unique IDs 

1448 are generated for each inserted dataset. 

1449 

1450 Raises 

1451 ------ 

1452 TypeError 

1453 Raised if the butler is read-only or if no run was provided. 

1454 NotImplementedError 

1455 Raised if the `Datastore` does not support the given transfer mode. 

1456 DatasetTypeNotSupportedError 

1457 Raised if one or more files to be ingested have a dataset type that 

1458 is not supported by the `Datastore`.. 

1459 FileNotFoundError 

1460 Raised if one of the given files does not exist. 

1461 FileExistsError 

1462 Raised if transfer is not `None` but the (internal) location the 

1463 file would be moved to is already occupied. 

1464 

1465 Notes 

1466 ----- 

1467 This operation is not fully exception safe: if a database operation 

1468 fails, the given `FileDataset` instances may be only partially updated. 

1469 

1470 It is atomic in terms of database operations (they will either all 

1471 succeed or all fail) providing the database engine implements 

1472 transactions correctly. It will attempt to be atomic in terms of 

1473 filesystem operations as well, but this cannot be implemented 

1474 rigorously for most datastores. 

1475 """ 

1476 if not self.isWriteable(): 

1477 raise TypeError("Butler is read-only.") 

1478 progress = Progress("lsst.daf.butler.Butler.ingest", level=logging.DEBUG) 

1479 # Reorganize the inputs so they're grouped by DatasetType and then 

1480 # data ID. We also include a list of DatasetRefs for each FileDataset 

1481 # to hold the resolved DatasetRefs returned by the Registry, before 

1482 # it's safe to swap them into FileDataset.refs. 

1483 # Some type annotation aliases to make that clearer: 

1484 GroupForType = Dict[DataCoordinate, Tuple[FileDataset, List[DatasetRef]]] 

1485 GroupedData = MutableMapping[DatasetType, GroupForType] 

1486 # The actual data structure: 

1487 groupedData: GroupedData = defaultdict(dict) 

1488 # And the nested loop that populates it: 

1489 for dataset in progress.wrap(datasets, desc="Grouping by dataset type"): 

1490 # This list intentionally shared across the inner loop, since it's 

1491 # associated with `dataset`. 

1492 resolvedRefs: List[DatasetRef] = [] 

1493 for ref in dataset.refs: 

1494 if ref.dataId in groupedData[ref.datasetType]: 

1495 raise ConflictingDefinitionError(f"Ingest conflict. Dataset {dataset.path} has same" 

1496 " DataId as other ingest dataset" 

1497 f" {groupedData[ref.datasetType][ref.dataId][0].path} " 

1498 f" ({ref.dataId})") 

1499 groupedData[ref.datasetType][ref.dataId] = (dataset, resolvedRefs) 

1500 

1501 # Now we can bulk-insert into Registry for each DatasetType. 

1502 allResolvedRefs: List[DatasetRef] = [] 

1503 for datasetType, groupForType in progress.iter_item_chunks(groupedData.items(), 

1504 desc="Bulk-inserting datasets by type"): 

1505 refs = self.registry.insertDatasets( 

1506 datasetType, 

1507 dataIds=groupForType.keys(), 

1508 run=run, 

1509 expand=self.datastore.needs_expanded_data_ids(transfer, datasetType), 

1510 idGenerationMode=idGenerationMode, 

1511 ) 

1512 # Append those resolved DatasetRefs to the new lists we set up for 

1513 # them. 

1514 for ref, (_, resolvedRefs) in zip(refs, groupForType.values()): 

1515 resolvedRefs.append(ref) 

1516 

1517 # Go back to the original FileDatasets to replace their refs with the 

1518 # new resolved ones, and also build a big list of all refs. 

1519 allResolvedRefs = [] 

1520 for groupForType in progress.iter_chunks(groupedData.values(), 

1521 desc="Reassociating resolved dataset refs with files"): 

1522 for dataset, resolvedRefs in groupForType.values(): 

1523 dataset.refs = resolvedRefs 

1524 allResolvedRefs.extend(resolvedRefs) 

1525 

1526 # Bulk-insert everything into Datastore. 

1527 self.datastore.ingest(*datasets, transfer=transfer) 

1528 

1529 @contextlib.contextmanager 

1530 def export(self, *, directory: Optional[str] = None, 

1531 filename: Optional[str] = None, 

1532 format: Optional[str] = None, 

1533 transfer: Optional[str] = None) -> Iterator[RepoExportContext]: 

1534 """Export datasets from the repository represented by this `Butler`. 

1535 

1536 This method is a context manager that returns a helper object 

1537 (`RepoExportContext`) that is used to indicate what information from 

1538 the repository should be exported. 

1539 

1540 Parameters 

1541 ---------- 

1542 directory : `str`, optional 

1543 Directory dataset files should be written to if ``transfer`` is not 

1544 `None`. 

1545 filename : `str`, optional 

1546 Name for the file that will include database information associated 

1547 with the exported datasets. If this is not an absolute path and 

1548 ``directory`` is not `None`, it will be written to ``directory`` 

1549 instead of the current working directory. Defaults to 

1550 "export.{format}". 

1551 format : `str`, optional 

1552 File format for the database information file. If `None`, the 

1553 extension of ``filename`` will be used. 

1554 transfer : `str`, optional 

1555 Transfer mode passed to `Datastore.export`. 

1556 

1557 Raises 

1558 ------ 

1559 TypeError 

1560 Raised if the set of arguments passed is inconsistent. 

1561 

1562 Examples 

1563 -------- 

1564 Typically the `Registry.queryDataIds` and `Registry.queryDatasets` 

1565 methods are used to provide the iterables over data IDs and/or datasets 

1566 to be exported:: 

1567 

1568 with butler.export("exports.yaml") as export: 

1569 # Export all flats, but none of the dimension element rows 

1570 # (i.e. data ID information) associated with them. 

1571 export.saveDatasets(butler.registry.queryDatasets("flat"), 

1572 elements=()) 

1573 # Export all datasets that start with "deepCoadd_" and all of 

1574 # their associated data ID information. 

1575 export.saveDatasets(butler.registry.queryDatasets("deepCoadd_*")) 

1576 """ 

1577 if directory is None and transfer is not None: 

1578 raise TypeError("Cannot transfer without providing a directory.") 

1579 if transfer == "move": 

1580 raise TypeError("Transfer may not be 'move': export is read-only") 

1581 if format is None: 

1582 if filename is None: 

1583 raise TypeError("At least one of 'filename' or 'format' must be provided.") 

1584 else: 

1585 _, format = os.path.splitext(filename) 

1586 elif filename is None: 

1587 filename = f"export.{format}" 

1588 if directory is not None: 

1589 filename = os.path.join(directory, filename) 

1590 BackendClass = getClassOf(self._config["repo_transfer_formats"][format]["export"]) 

1591 with open(filename, 'w') as stream: 

1592 backend = BackendClass(stream) 

1593 try: 

1594 helper = RepoExportContext(self.registry, self.datastore, backend=backend, 

1595 directory=directory, transfer=transfer) 

1596 yield helper 

1597 except BaseException: 

1598 raise 

1599 else: 

1600 helper._finish() 

1601 

1602 def import_(self, *, directory: Optional[str] = None, 

1603 filename: Union[str, TextIO, None] = None, 

1604 format: Optional[str] = None, 

1605 transfer: Optional[str] = None, 

1606 skip_dimensions: Optional[Set] = None, 

1607 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, 

1608 reuseIds: bool = False) -> None: 

1609 """Import datasets into this repository that were exported from a 

1610 different butler repository via `~lsst.daf.butler.Butler.export`. 

1611 

1612 Parameters 

1613 ---------- 

1614 directory : `str`, optional 

1615 Directory containing dataset files to import from. If `None`, 

1616 ``filename`` and all dataset file paths specified therein must 

1617 be absolute. 

1618 filename : `str` or `TextIO`, optional 

1619 A stream or name of file that contains database information 

1620 associated with the exported datasets, typically generated by 

1621 `~lsst.daf.butler.Butler.export`. If this a string (name) and 

1622 is not an absolute path, does not exist in the current working 

1623 directory, and ``directory`` is not `None`, it is assumed to be in 

1624 ``directory``. Defaults to "export.{format}". 

1625 format : `str`, optional 

1626 File format for ``filename``. If `None`, the extension of 

1627 ``filename`` will be used. 

1628 transfer : `str`, optional 

1629 Transfer mode passed to `~lsst.daf.butler.Datastore.ingest`. 

1630 skip_dimensions : `set`, optional 

1631 Names of dimensions that should be skipped and not imported. 

1632 idGenerationMode : `DatasetIdGenEnum`, optional 

1633 Specifies option for generating dataset IDs when IDs are not 

1634 provided or their type does not match backend type. By default 

1635 unique IDs are generated for each inserted dataset. 

1636 reuseIds : `bool`, optional 

1637 If `True` then forces re-use of imported dataset IDs for integer 

1638 IDs which are normally generated as auto-incremented; exception 

1639 will be raised if imported IDs clash with existing ones. This 

1640 option has no effect on the use of globally-unique IDs which are 

1641 always re-used (or generated if integer IDs are being imported). 

1642 

1643 Raises 

1644 ------ 

1645 TypeError 

1646 Raised if the set of arguments passed is inconsistent, or if the 

1647 butler is read-only. 

1648 """ 

1649 if not self.isWriteable(): 

1650 raise TypeError("Butler is read-only.") 

1651 if format is None: 

1652 if filename is None: 

1653 raise TypeError("At least one of 'filename' or 'format' must be provided.") 

1654 else: 

1655 _, format = os.path.splitext(filename) # type: ignore 

1656 elif filename is None: 

1657 filename = f"export.{format}" 

1658 if isinstance(filename, str) and directory is not None and not os.path.exists(filename): 

1659 filename = os.path.join(directory, filename) 

1660 BackendClass = getClassOf(self._config["repo_transfer_formats"][format]["import"]) 

1661 

1662 def doImport(importStream: TextIO) -> None: 

1663 backend = BackendClass(importStream, self.registry) 

1664 backend.register() 

1665 with self.transaction(): 

1666 backend.load(self.datastore, directory=directory, transfer=transfer, 

1667 skip_dimensions=skip_dimensions, idGenerationMode=idGenerationMode, 

1668 reuseIds=reuseIds) 

1669 

1670 if isinstance(filename, str): 

1671 with open(filename, "r") as stream: 

1672 doImport(stream) 

1673 else: 

1674 doImport(filename) 

1675 

1676 def validateConfiguration(self, logFailures: bool = False, 

1677 datasetTypeNames: Optional[Iterable[str]] = None, 

1678 ignore: Iterable[str] = None) -> None: 

1679 """Validate butler configuration. 

1680 

1681 Checks that each `DatasetType` can be stored in the `Datastore`. 

1682 

1683 Parameters 

1684 ---------- 

1685 logFailures : `bool`, optional 

1686 If `True`, output a log message for every validation error 

1687 detected. 

1688 datasetTypeNames : iterable of `str`, optional 

1689 The `DatasetType` names that should be checked. This allows 

1690 only a subset to be selected. 

1691 ignore : iterable of `str`, optional 

1692 Names of DatasetTypes to skip over. This can be used to skip 

1693 known problems. If a named `DatasetType` corresponds to a 

1694 composite, all components of that `DatasetType` will also be 

1695 ignored. 

1696 

1697 Raises 

1698 ------ 

1699 ButlerValidationError 

1700 Raised if there is some inconsistency with how this Butler 

1701 is configured. 

1702 """ 

1703 if datasetTypeNames: 

1704 datasetTypes = [self.registry.getDatasetType(name) for name in datasetTypeNames] 

1705 else: 

1706 datasetTypes = list(self.registry.queryDatasetTypes()) 

1707 

1708 # filter out anything from the ignore list 

1709 if ignore: 

1710 ignore = set(ignore) 

1711 datasetTypes = [e for e in datasetTypes 

1712 if e.name not in ignore and e.nameAndComponent()[0] not in ignore] 

1713 else: 

1714 ignore = set() 

1715 

1716 # Find all the registered instruments 

1717 instruments = set( 

1718 record.name for record in self.registry.queryDimensionRecords("instrument") 

1719 ) 

1720 

1721 # For each datasetType that has an instrument dimension, create 

1722 # a DatasetRef for each defined instrument 

1723 datasetRefs = [] 

1724 

1725 for datasetType in datasetTypes: 

1726 if "instrument" in datasetType.dimensions: 

1727 for instrument in instruments: 

1728 datasetRef = DatasetRef(datasetType, {"instrument": instrument}, # type: ignore 

1729 conform=False) 

1730 datasetRefs.append(datasetRef) 

1731 

1732 entities: List[Union[DatasetType, DatasetRef]] = [] 

1733 entities.extend(datasetTypes) 

1734 entities.extend(datasetRefs) 

1735 

1736 datastoreErrorStr = None 

1737 try: 

1738 self.datastore.validateConfiguration(entities, logFailures=logFailures) 

1739 except ValidationError as e: 

1740 datastoreErrorStr = str(e) 

1741 

1742 # Also check that the LookupKeys used by the datastores match 

1743 # registry and storage class definitions 

1744 keys = self.datastore.getLookupKeys() 

1745 

1746 failedNames = set() 

1747 failedDataId = set() 

1748 for key in keys: 

1749 if key.name is not None: 

1750 if key.name in ignore: 

1751 continue 

1752 

1753 # skip if specific datasetType names were requested and this 

1754 # name does not match 

1755 if datasetTypeNames and key.name not in datasetTypeNames: 

1756 continue 

1757 

1758 # See if it is a StorageClass or a DatasetType 

1759 if key.name in self.storageClasses: 

1760 pass 

1761 else: 

1762 try: 

1763 self.registry.getDatasetType(key.name) 

1764 except KeyError: 

1765 if logFailures: 

1766 log.critical("Key '%s' does not correspond to a DatasetType or StorageClass", key) 

1767 failedNames.add(key) 

1768 else: 

1769 # Dimensions are checked for consistency when the Butler 

1770 # is created and rendezvoused with a universe. 

1771 pass 

1772 

1773 # Check that the instrument is a valid instrument 

1774 # Currently only support instrument so check for that 

1775 if key.dataId: 

1776 dataIdKeys = set(key.dataId) 

1777 if set(["instrument"]) != dataIdKeys: 

1778 if logFailures: 

1779 log.critical("Key '%s' has unsupported DataId override", key) 

1780 failedDataId.add(key) 

1781 elif key.dataId["instrument"] not in instruments: 

1782 if logFailures: 

1783 log.critical("Key '%s' has unknown instrument", key) 

1784 failedDataId.add(key) 

1785 

1786 messages = [] 

1787 

1788 if datastoreErrorStr: 

1789 messages.append(datastoreErrorStr) 

1790 

1791 for failed, msg in ((failedNames, "Keys without corresponding DatasetType or StorageClass entry: "), 

1792 (failedDataId, "Keys with bad DataId entries: ")): 

1793 if failed: 

1794 msg += ", ".join(str(k) for k in failed) 

1795 messages.append(msg) 

1796 

1797 if messages: 

1798 raise ValidationError(";\n".join(messages)) 

1799 

1800 @property 

1801 def collections(self) -> CollectionSearch: 

1802 """The collections to search by default, in order (`CollectionSearch`). 

1803 

1804 This is an alias for ``self.registry.defaults.collections``. It cannot 

1805 be set directly in isolation, but all defaults may be changed together 

1806 by assigning a new `RegistryDefaults` instance to 

1807 ``self.registry.defaults``. 

1808 """ 

1809 return self.registry.defaults.collections 

1810 

1811 @property 

1812 def run(self) -> Optional[str]: 

1813 """Name of the run this butler writes outputs to by default (`str` or 

1814 `None`). 

1815 

1816 This is an alias for ``self.registry.defaults.run``. It cannot be set 

1817 directly in isolation, but all defaults may be changed together by 

1818 assigning a new `RegistryDefaults` instance to 

1819 ``self.registry.defaults``. 

1820 """ 

1821 return self.registry.defaults.run 

1822 

1823 registry: Registry 

1824 """The object that manages dataset metadata and relationships (`Registry`). 

1825 

1826 Most operations that don't involve reading or writing butler datasets are 

1827 accessible only via `Registry` methods. 

1828 """ 

1829 

1830 datastore: Datastore 

1831 """The object that manages actual dataset storage (`Datastore`). 

1832 

1833 Direct user access to the datastore should rarely be necessary; the primary 

1834 exception is the case where a `Datastore` implementation provides extra 

1835 functionality beyond what the base class defines. 

1836 """ 

1837 

1838 storageClasses: StorageClassFactory 

1839 """An object that maps known storage class names to objects that fully 

1840 describe them (`StorageClassFactory`). 

1841 """