Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22""" 

23Butler top level classes. 

24""" 

25from __future__ import annotations 

26 

27__all__ = ("Butler", "ButlerValidationError") 

28 

29import os 

30from collections import defaultdict 

31import contextlib 

32import logging 

33from typing import ( 

34 Any, 

35 ClassVar, 

36 ContextManager, 

37 Dict, 

38 Iterable, 

39 List, 

40 MutableMapping, 

41 Optional, 

42 Tuple, 

43 Union, 

44) 

45 

46try: 

47 import boto3 

48except ImportError: 

49 boto3 = None 

50 

51from lsst.utils import doImport 

52from .core import ( 

53 ButlerURI, 

54 CompositesMap, 

55 Config, 

56 ConfigSubset, 

57 DataCoordinate, 

58 DataId, 

59 DatasetRef, 

60 DatasetType, 

61 Datastore, 

62 FileDataset, 

63 Quantum, 

64 RepoExport, 

65 StorageClassFactory, 

66 ValidationError, 

67) 

68from .core.repoRelocation import BUTLER_ROOT_TAG 

69from .core.safeFileIo import safeMakeDir 

70from .core.utils import transactional, getClassOf 

71from ._deferredDatasetHandle import DeferredDatasetHandle 

72from ._butlerConfig import ButlerConfig 

73from .registry import Registry, RegistryConfig 

74 

75log = logging.getLogger(__name__) 

76 

77 

78class ButlerValidationError(ValidationError): 

79 """There is a problem with the Butler configuration.""" 

80 pass 

81 

82 

83class Butler: 

84 """Main entry point for the data access system. 

85 

86 Attributes 

87 ---------- 

88 config : `str`, `ButlerConfig` or `Config`, optional 

89 (filename to) configuration. If this is not a `ButlerConfig`, defaults 

90 will be read. If a `str`, may be the path to a directory containing 

91 a "butler.yaml" file. 

92 datastore : `Datastore` 

93 Datastore to use for storage. 

94 registry : `Registry` 

95 Registry to use for lookups. 

96 

97 Parameters 

98 ---------- 

99 config : `ButlerConfig`, `Config` or `str`, optional. 

100 Configuration. Anything acceptable to the 

101 `ButlerConfig` constructor. If a directory path 

102 is given the configuration will be read from a ``butler.yaml`` file in 

103 that location. If `None` is given default values will be used. 

104 butler : `Butler`, optional. 

105 If provided, construct a new Butler that uses the same registry and 

106 datastore as the given one, but with the given collection and run. 

107 Incompatible with the ``config``, ``searchPaths``, and ``writeable`` 

108 arguments. 

109 collection : `str`, optional 

110 Collection to use for all input lookups. May be `None` to either use 

111 the value passed to ``run``, or to defer passing a collection until 

112 the methods that require one are called. 

113 run : `str`, optional 

114 Name of the run datasets should be output to; also used as a tagged 

115 collection name these dataset will be associated with. If the run 

116 does not exist, it will be created. If ``collection`` is `None`, this 

117 collection will be used for input lookups as well; if not, it must have 

118 the same value as ``run``. 

119 searchPaths : `list` of `str`, optional 

120 Directory paths to search when calculating the full Butler 

121 configuration. Not used if the supplied config is already a 

122 `ButlerConfig`. 

123 writeable : `bool`, optional 

124 Explicitly sets whether the butler supports write operations. If not 

125 provided, a read-only butler is created unless ``run`` is passed. 

126 

127 Raises 

128 ------ 

129 ValueError 

130 Raised if neither "collection" nor "run" are provided by argument or 

131 config, or if both are provided and are inconsistent. 

132 """ 

133 def __init__(self, config: Union[Config, str, None] = None, *, 

134 butler: Optional[Butler] = None, 

135 collection: Optional[str] = None, 

136 run: Optional[str] = None, 

137 searchPaths: Optional[List[str]] = None, 

138 writeable: Optional[bool] = None): 

139 if butler is not None: 

140 if config is not None or searchPaths is not None or writeable is not None: 

141 raise TypeError("Cannot pass 'config', 'searchPaths', or 'writeable' " 

142 "arguments with 'butler' argument.") 

143 self.registry = butler.registry 

144 self.datastore = butler.datastore 

145 self.storageClasses = butler.storageClasses 

146 self._composites = butler._composites 

147 self._config = butler._config 

148 else: 

149 self._config = ButlerConfig(config, searchPaths=searchPaths) 

150 if "root" in self._config: 

151 butlerRoot = self._config["root"] 

152 else: 

153 butlerRoot = self._config.configDir 

154 if writeable is None: 

155 writeable = run is not None 

156 self.registry = Registry.fromConfig(self._config, butlerRoot=butlerRoot, writeable=writeable) 

157 self.datastore = Datastore.fromConfig(self._config, self.registry, butlerRoot=butlerRoot) 

158 self.storageClasses = StorageClassFactory() 

159 self.storageClasses.addFromConfig(self._config) 

160 self._composites = CompositesMap(self._config, universe=self.registry.dimensions) 

161 if "run" in self._config or "collection" in self._config: 

162 raise ValueError("Passing a run or collection via configuration is no longer supported.") 

163 if run is not None and writeable is False: 

164 raise ValueError(f"Butler initialized with run='{run}', " 

165 f"but is read-only; use collection='{run}' instead.") 

166 self.run = run 

167 if collection is None and run is not None: 

168 collection = run 

169 if self.run is not None and collection != self.run: 

170 raise ValueError( 

171 "Run ({}) and collection ({}) are inconsistent.".format(self.run, collection) 

172 ) 

173 self.collection = collection 

174 if self.run is not None: 

175 self.registry.registerRun(self.run) 

176 

177 GENERATION: ClassVar[int] = 3 

178 """This is a Generation 3 Butler. 

179 

180 This attribute may be removed in the future, once the Generation 2 Butler 

181 interface has been fully retired; it should only be used in transitional 

182 code. 

183 """ 

184 

185 @staticmethod 

186 def makeRepo(root: str, config: Union[Config, str, None] = None, standalone: bool = False, 

187 createRegistry: bool = True, searchPaths: Optional[List[str]] = None, 

188 forceConfigRoot: bool = True, outfile: Optional[str] = None) -> Config: 

189 """Create an empty data repository by adding a butler.yaml config 

190 to a repository root directory. 

191 

192 Parameters 

193 ---------- 

194 root : `str` 

195 Filesystem path to the root of the new repository. Will be created 

196 if it does not exist. 

197 config : `Config` or `str`, optional 

198 Configuration to write to the repository, after setting any 

199 root-dependent Registry or Datastore config options. Can not 

200 be a `ButlerConfig` or a `ConfigSubset`. If `None`, default 

201 configuration will be used. Root-dependent config options 

202 specified in this config are overwritten if ``forceConfigRoot`` 

203 is `True`. 

204 standalone : `bool` 

205 If True, write all expanded defaults, not just customized or 

206 repository-specific settings. 

207 This (mostly) decouples the repository from the default 

208 configuration, insulating it from changes to the defaults (which 

209 may be good or bad, depending on the nature of the changes). 

210 Future *additions* to the defaults will still be picked up when 

211 initializing `Butlers` to repos created with ``standalone=True``. 

212 createRegistry : `bool`, optional 

213 If `True` create a new Registry. 

214 searchPaths : `list` of `str`, optional 

215 Directory paths to search when calculating the full butler 

216 configuration. 

217 forceConfigRoot : `bool`, optional 

218 If `False`, any values present in the supplied ``config`` that 

219 would normally be reset are not overridden and will appear 

220 directly in the output config. This allows non-standard overrides 

221 of the root directory for a datastore or registry to be given. 

222 If this parameter is `True` the values for ``root`` will be 

223 forced into the resulting config if appropriate. 

224 outfile : `str`, optional 

225 If not-`None`, the output configuration will be written to this 

226 location rather than into the repository itself. Can be a URI 

227 string. Can refer to a directory that will be used to write 

228 ``butler.yaml``. 

229 

230 Returns 

231 ------- 

232 config : `Config` 

233 The updated `Config` instance written to the repo. 

234 

235 Raises 

236 ------ 

237 ValueError 

238 Raised if a ButlerConfig or ConfigSubset is passed instead of a 

239 regular Config (as these subclasses would make it impossible to 

240 support ``standalone=False``). 

241 os.error 

242 Raised if the directory does not exist, exists but is not a 

243 directory, or cannot be created. 

244 

245 Notes 

246 ----- 

247 Note that when ``standalone=False`` (the default), the configuration 

248 search path (see `ConfigSubset.defaultSearchPaths`) that was used to 

249 construct the repository should also be used to construct any Butlers 

250 to avoid configuration inconsistencies. 

251 """ 

252 if isinstance(config, (ButlerConfig, ConfigSubset)): 

253 raise ValueError("makeRepo must be passed a regular Config without defaults applied.") 

254 

255 # for "file" schemes we are assuming POSIX semantics for paths, for 

256 # schemeless URIs we are assuming os.path semantics. 

257 uri = ButlerURI(root) 

258 if uri.scheme == "file" or not uri.scheme: 

259 if not os.path.isdir(uri.ospath): 

260 safeMakeDir(uri.ospath) 

261 elif uri.scheme == "s3": 

262 s3 = boto3.resource("s3") 

263 # implies bucket exists, if not another level of checks 

264 bucket = s3.Bucket(uri.netloc) 

265 bucket.put_object(Bucket=uri.netloc, Key=uri.relativeToPathRoot) 

266 else: 

267 raise ValueError(f"Unrecognized scheme: {uri.scheme}") 

268 config = Config(config) 

269 

270 # If we are creating a new repo from scratch with relative roots, 

271 # do not propagate an explicit root from the config file 

272 if "root" in config: 

273 del config["root"] 

274 

275 full = ButlerConfig(config, searchPaths=searchPaths) # this applies defaults 

276 datastoreClass = doImport(full["datastore", "cls"]) 

277 datastoreClass.setConfigRoot(BUTLER_ROOT_TAG, config, full, overwrite=forceConfigRoot) 

278 

279 # if key exists in given config, parse it, otherwise parse the defaults 

280 # in the expanded config 

281 if config.get(("registry", "db")): 

282 registryConfig = RegistryConfig(config) 

283 else: 

284 registryConfig = RegistryConfig(full) 

285 defaultDatabaseUri = registryConfig.makeDefaultDatabaseUri(BUTLER_ROOT_TAG) 

286 if defaultDatabaseUri is not None: 

287 Config.updateParameters(RegistryConfig, config, full, 

288 toUpdate={"db": defaultDatabaseUri}, 

289 overwrite=forceConfigRoot) 

290 else: 

291 Config.updateParameters(RegistryConfig, config, full, toCopy=("db",), 

292 overwrite=forceConfigRoot) 

293 

294 if standalone: 

295 config.merge(full) 

296 if outfile is not None: 

297 # When writing to a separate location we must include 

298 # the root of the butler repo in the config else it won't know 

299 # where to look. 

300 config["root"] = uri.geturl() 

301 configURI = outfile 

302 else: 

303 configURI = uri 

304 config.dumpToUri(configURI) 

305 

306 # Create Registry and populate tables 

307 Registry.fromConfig(config, create=createRegistry, butlerRoot=root) 

308 return config 

309 

310 @classmethod 

311 def _unpickle(cls, config: ButlerConfig, collection: str, run: Optional[str], writeable: bool) -> Butler: 

312 """Callable used to unpickle a Butler. 

313 

314 We prefer not to use ``Butler.__init__`` directly so we can force some 

315 of its many arguments to be keyword-only (note that ``__reduce__`` 

316 can only invoke callables with positional arguments). 

317 

318 Parameters 

319 ---------- 

320 config : `ButlerConfig` 

321 Butler configuration, already coerced into a true `ButlerConfig` 

322 instance (and hence after any search paths for overrides have been 

323 utilized). 

324 collection : `str` 

325 String name of a collection to use for read operations. 

326 run : `str`, optional 

327 String name of a run to use for write operations, or `None` for a 

328 read-only butler. 

329 

330 Returns 

331 ------- 

332 butler : `Butler` 

333 A new `Butler` instance. 

334 """ 

335 return cls(config=config, collection=collection, run=run, writeable=writeable) 

336 

337 def __reduce__(self): 

338 """Support pickling. 

339 """ 

340 return (Butler._unpickle, (self._config, self.collection, self.run, self.registry.isWriteable())) 

341 

342 def __str__(self): 

343 return "Butler(collection='{}', datastore='{}', registry='{}')".format( 

344 self.collection, self.datastore, self.registry) 

345 

346 def isWriteable(self) -> bool: 

347 """Return `True` if this `Butler` supports write operations. 

348 """ 

349 return self.registry.isWriteable() 

350 

351 @contextlib.contextmanager 

352 def transaction(self): 

353 """Context manager supporting `Butler` transactions. 

354 

355 Transactions can be nested. 

356 """ 

357 with self.registry.transaction(): 

358 with self.datastore.transaction(): 

359 yield 

360 

361 def _standardizeArgs(self, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

362 dataId: Optional[DataId] = None, **kwds: Any) -> Tuple[DatasetType, DataId]: 

363 """Standardize the arguments passed to several Butler APIs. 

364 

365 Parameters 

366 ---------- 

367 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

368 When `DatasetRef` the `dataId` should be `None`. 

369 Otherwise the `DatasetType` or name thereof. 

370 dataId : `dict` or `DataCoordinate` 

371 A `dict` of `Dimension` link name, value pairs that label the 

372 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

373 should be provided as the second argument. 

374 kwds 

375 Additional keyword arguments used to augment or construct a 

376 `DataCoordinate`. See `DataCoordinate.standardize` 

377 parameters. 

378 

379 Returns 

380 ------- 

381 datasetType : `DatasetType` 

382 A `DatasetType` instance extracted from ``datasetRefOrType``. 

383 dataId : `dict` or `DataId`, optional 

384 Argument that can be used (along with ``kwds``) to construct a 

385 `DataId`. 

386 

387 Notes 

388 ----- 

389 Butler APIs that conceptually need a DatasetRef also allow passing a 

390 `DatasetType` (or the name of one) and a `DataId` (or a dict and 

391 keyword arguments that can be used to construct one) separately. This 

392 method accepts those arguments and always returns a true `DatasetType` 

393 and a `DataId` or `dict`. 

394 

395 Standardization of `dict` vs `DataId` is best handled by passing the 

396 returned ``dataId`` (and ``kwds``) to `Registry` APIs, which are 

397 generally similarly flexible. 

398 """ 

399 if isinstance(datasetRefOrType, DatasetRef): 

400 if dataId is not None or kwds: 

401 raise ValueError("DatasetRef given, cannot use dataId as well") 

402 datasetType = datasetRefOrType.datasetType 

403 dataId = datasetRefOrType.dataId 

404 else: 

405 # Don't check whether DataId is provided, because Registry APIs 

406 # can usually construct a better error message when it wasn't. 

407 if isinstance(datasetRefOrType, DatasetType): 

408 datasetType = datasetRefOrType 

409 else: 

410 datasetType = self.registry.getDatasetType(datasetRefOrType) 

411 return datasetType, dataId 

412 

413 def _findDatasetRef(self, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

414 dataId: Optional[DataId] = None, *, 

415 collection: Optional[str] = None, 

416 allowUnresolved: bool = False, 

417 **kwds: Any) -> DatasetRef: 

418 """Shared logic for methods that start with a search for a dataset in 

419 the registry. 

420 

421 Parameters 

422 ---------- 

423 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

424 When `DatasetRef` the `dataId` should be `None`. 

425 Otherwise the `DatasetType` or name thereof. 

426 dataId : `dict` or `DataCoordinate`, optional 

427 A `dict` of `Dimension` link name, value pairs that label the 

428 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

429 should be provided as the first argument. 

430 collection : `str`, optional 

431 Name of the collection to search, overriding ``self.collection``. 

432 allowUnresolved : `bool`, optional 

433 If `True`, return an unresolved `DatasetRef` if finding a resolved 

434 one in the `Registry` fails. Defaults to `False`. 

435 kwds 

436 Additional keyword arguments used to augment or construct a 

437 `DataId`. See `DataId` parameters. 

438 

439 Returns 

440 ------- 

441 ref : `DatasetRef` 

442 A reference to the dataset identified by the given arguments. 

443 

444 Raises 

445 ------ 

446 LookupError 

447 Raised if no matching dataset exists in the `Registry` (and 

448 ``allowUnresolved is False``). 

449 ValueError 

450 Raised if a resolved `DatasetRef` was passed as an input, but it 

451 differs from the one found in the registry in this collection. 

452 TypeError 

453 Raised if ``collection`` and ``self.collection`` are both `None`. 

454 """ 

455 datasetType, dataId = self._standardizeArgs(datasetRefOrType, dataId, **kwds) 

456 if isinstance(datasetRefOrType, DatasetRef): 

457 idNumber = datasetRefOrType.id 

458 else: 

459 idNumber = None 

460 # Expand the data ID first instead of letting registry.find do it, so 

461 # we get the result even if it returns None. 

462 dataId = self.registry.expandDataId(dataId, graph=datasetType.dimensions, **kwds) 

463 if collection is None: 

464 collection = self.collection 

465 if collection is None: 

466 raise TypeError("No collection provided.") 

467 # Always lookup the DatasetRef, even if one is given, to ensure it is 

468 # present in the current collection. 

469 ref = self.registry.find(collection, datasetType, dataId) 

470 if ref is None: 

471 if allowUnresolved: 

472 return DatasetRef(datasetType, dataId) 

473 else: 

474 raise LookupError(f"Dataset {datasetType.name} with data ID {dataId} " 

475 f"could not be found in collection '{collection}'.") 

476 if idNumber is not None and idNumber != ref.id: 

477 raise ValueError(f"DatasetRef.id provided ({idNumber}) does not match " 

478 f"id ({ref.id}) in registry in collection '{collection}'.") 

479 return ref 

480 

481 @transactional 

482 def put(self, obj: Any, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

483 dataId: Optional[DataId] = None, *, 

484 producer: Optional[Quantum] = None, 

485 run: Optional[str] = None, 

486 **kwds: Any) -> DatasetRef: 

487 """Store and register a dataset. 

488 

489 Parameters 

490 ---------- 

491 obj : `object` 

492 The dataset. 

493 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

494 When `DatasetRef` is provided, ``dataId`` should be `None`. 

495 Otherwise the `DatasetType` or name thereof. 

496 dataId : `dict` or `DataCoordinate` 

497 A `dict` of `Dimension` link name, value pairs that label the 

498 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

499 should be provided as the second argument. 

500 producer : `Quantum`, optional 

501 The producer. 

502 run : `str`, optional 

503 The name of the run the dataset should be added to, overriding 

504 ``self.run``. 

505 kwds 

506 Additional keyword arguments used to augment or construct a 

507 `DataCoordinate`. See `DataCoordinate.standardize` 

508 parameters. 

509 

510 Returns 

511 ------- 

512 ref : `DatasetRef` 

513 A reference to the stored dataset, updated with the correct id if 

514 given. 

515 

516 Raises 

517 ------ 

518 TypeError 

519 Raised if the butler is read-only or if no run has been provided. 

520 """ 

521 log.debug("Butler put: %s, dataId=%s, producer=%s, run=%s", datasetRefOrType, dataId, producer, run) 

522 if not self.isWriteable(): 

523 raise TypeError("Butler is read-only.") 

524 datasetType, dataId = self._standardizeArgs(datasetRefOrType, dataId, **kwds) 

525 if isinstance(datasetRefOrType, DatasetRef) and datasetRefOrType.id is not None: 

526 raise ValueError("DatasetRef must not be in registry, must have None id") 

527 

528 if run is None: 

529 if self.run is None: 

530 raise TypeError("No run provided.") 

531 run = self.run 

532 

533 isVirtualComposite = self._composites.shouldBeDisassembled(datasetType) 

534 

535 # Add Registry Dataset entry. If not a virtual composite, add 

536 # and attach components at the same time. 

537 dataId = self.registry.expandDataId(dataId, graph=datasetType.dimensions, **kwds) 

538 ref, = self.registry.insertDatasets(datasetType, run=run, dataIds=[dataId], 

539 producer=producer, recursive=not isVirtualComposite) 

540 

541 # Check to see if this datasetType requires disassembly 

542 if isVirtualComposite: 

543 components = datasetType.storageClass.assembler().disassemble(obj) 

544 for component, info in components.items(): 

545 compTypeName = datasetType.componentTypeName(component) 

546 compRef = self.put(info.component, compTypeName, dataId, producer=producer, run=run) 

547 self.registry.attachComponent(component, ref, compRef) 

548 else: 

549 # This is an entity without a disassembler. 

550 self.datastore.put(obj, ref) 

551 

552 return ref 

553 

554 def getDirect(self, ref: DatasetRef, *, parameters: Optional[Dict[str, Any]] = None): 

555 """Retrieve a stored dataset. 

556 

557 Unlike `Butler.get`, this method allows datasets outside the Butler's 

558 collection to be read as long as the `DatasetRef` that identifies them 

559 can be obtained separately. 

560 

561 Parameters 

562 ---------- 

563 ref : `DatasetRef` 

564 Reference to an already stored dataset. 

565 parameters : `dict` 

566 Additional StorageClass-defined options to control reading, 

567 typically used to efficiently read only a subset of the dataset. 

568 

569 Returns 

570 ------- 

571 obj : `object` 

572 The dataset. 

573 """ 

574 # if the ref exists in the store we return it directly 

575 if self.datastore.exists(ref): 

576 return self.datastore.get(ref, parameters=parameters) 

577 elif ref.isComposite(): 

578 # Check that we haven't got any unknown parameters 

579 ref.datasetType.storageClass.validateParameters(parameters) 

580 # Reconstruct the composite 

581 usedParams = set() 

582 components = {} 

583 for compName, compRef in ref.components.items(): 

584 # make a dictionary of parameters containing only the subset 

585 # supported by the StorageClass of the components 

586 compParams = compRef.datasetType.storageClass.filterParameters(parameters) 

587 usedParams.update(set(compParams)) 

588 components[compName] = self.datastore.get(compRef, parameters=compParams) 

589 

590 # Any unused parameters will have to be passed to the assembler 

591 if parameters: 

592 unusedParams = {k: v for k, v in parameters.items() if k not in usedParams} 

593 else: 

594 unusedParams = {} 

595 

596 # Assemble the components 

597 inMemoryDataset = ref.datasetType.storageClass.assembler().assemble(components) 

598 return ref.datasetType.storageClass.assembler().handleParameters(inMemoryDataset, 

599 parameters=unusedParams) 

600 else: 

601 # single entity in datastore 

602 raise FileNotFoundError(f"Unable to locate dataset '{ref}' in datastore {self.datastore.name}") 

603 

604 def getDeferred(self, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

605 dataId: Optional[DataId] = None, *, 

606 parameters: Union[dict, None] = None, 

607 collection: Optional[str] = None, 

608 **kwds: Any) -> DeferredDatasetHandle: 

609 """Create a `DeferredDatasetHandle` which can later retrieve a dataset 

610 

611 Parameters 

612 ---------- 

613 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

614 When `DatasetRef` the `dataId` should be `None`. 

615 Otherwise the `DatasetType` or name thereof. 

616 dataId : `dict` or `DataCoordinate`, optional 

617 A `dict` of `Dimension` link name, value pairs that label the 

618 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

619 should be provided as the first argument. 

620 collection : `str`, optional 

621 Name of the collection to search, overriding ``self.collection``. 

622 parameters : `dict` 

623 Additional StorageClass-defined options to control reading, 

624 typically used to efficiently read only a subset of the dataset. 

625 collection : `str`, optional 

626 Collection to search, overriding ``self.collection``. 

627 kwds 

628 Additional keyword arguments used to augment or construct a 

629 `DataId`. See `DataId` parameters. 

630 

631 Returns 

632 ------- 

633 obj : `DeferredDatasetHandle` 

634 A handle which can be used to retrieve a dataset at a later time. 

635 

636 Raises 

637 ------ 

638 LookupError 

639 Raised if no matching dataset exists in the `Registry` (and 

640 ``allowUnresolved is False``). 

641 ValueError 

642 Raised if a resolved `DatasetRef` was passed as an input, but it 

643 differs from the one found in the registry in this collection. 

644 TypeError 

645 Raised if ``collection`` and ``self.collection`` are both `None`. 

646 """ 

647 ref = self._findDatasetRef(datasetRefOrType, dataId, collection=collection, **kwds) 

648 return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters) 

649 

650 def get(self, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

651 dataId: Optional[DataId] = None, *, 

652 parameters: Optional[Dict[str, Any]] = None, 

653 collection: Optional[str] = None, 

654 **kwds: Any) -> Any: 

655 """Retrieve a stored dataset. 

656 

657 Parameters 

658 ---------- 

659 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

660 When `DatasetRef` the `dataId` should be `None`. 

661 Otherwise the `DatasetType` or name thereof. 

662 dataId : `dict` or `DataCoordinate` 

663 A `dict` of `Dimension` link name, value pairs that label the 

664 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

665 should be provided as the first argument. 

666 parameters : `dict` 

667 Additional StorageClass-defined options to control reading, 

668 typically used to efficiently read only a subset of the dataset. 

669 collection : `str`, optional 

670 Collection to search, overriding ``self.collection``. 

671 kwds 

672 Additional keyword arguments used to augment or construct a 

673 `DataCoordinate`. See `DataCoordinate.standardize` 

674 parameters. 

675 

676 Returns 

677 ------- 

678 obj : `object` 

679 The dataset. 

680 

681 Raises 

682 ------ 

683 ValueError 

684 Raised if a resolved `DatasetRef` was passed as an input, but it 

685 differs from the one found in the registry in this collection. 

686 LookupError 

687 Raised if no matching dataset exists in the `Registry`. 

688 TypeError 

689 Raised if ``collection`` and ``self.collection`` are both `None`. 

690 """ 

691 log.debug("Butler get: %s, dataId=%s, parameters=%s", datasetRefOrType, dataId, parameters) 

692 ref = self._findDatasetRef(datasetRefOrType, dataId, collection=collection, **kwds) 

693 return self.getDirect(ref, parameters=parameters) 

694 

695 def getUri(self, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

696 dataId: Optional[DataId] = None, *, 

697 predict: bool = False, 

698 collection: Optional[str] = None, 

699 run: Optional[str] = None, 

700 **kwds: Any) -> str: 

701 """Return the URI to the Dataset. 

702 

703 Parameters 

704 ---------- 

705 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

706 When `DatasetRef` the `dataId` should be `None`. 

707 Otherwise the `DatasetType` or name thereof. 

708 dataId : `dict` or `DataCoordinate` 

709 A `dict` of `Dimension` link name, value pairs that label the 

710 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

711 should be provided as the first argument. 

712 predict : `bool` 

713 If `True`, allow URIs to be returned of datasets that have not 

714 been written. 

715 collection : `str`, optional 

716 Collection to search, overriding ``self.collection``. 

717 run : `str`, optional 

718 Run to use for predictions, overriding ``self.run``. 

719 kwds 

720 Additional keyword arguments used to augment or construct a 

721 `DataCoordinate`. See `DataCoordinate.standardize` 

722 parameters. 

723 

724 Returns 

725 ------- 

726 uri : `str` 

727 URI string pointing to the Dataset within the datastore. If the 

728 Dataset does not exist in the datastore, and if ``predict`` is 

729 `True`, the URI will be a prediction and will include a URI 

730 fragment "#predicted". 

731 If the datastore does not have entities that relate well 

732 to the concept of a URI the returned URI string will be 

733 descriptive. The returned URI is not guaranteed to be obtainable. 

734 

735 Raises 

736 ------ 

737 LookupError 

738 A URI has been requested for a dataset that does not exist and 

739 guessing is not allowed. 

740 ValueError 

741 Raised if a resolved `DatasetRef` was passed as an input, but it 

742 differs from the one found in the registry in this collection. 

743 TypeError 

744 Raised if ``collection`` and ``self.collection`` are both `None`. 

745 """ 

746 ref = self._findDatasetRef(datasetRefOrType, dataId, allowUnresolved=predict, collection=collection, 

747 **kwds) 

748 if ref.id is None: # only possible if predict is True 

749 if run is None: 

750 run = self.run 

751 if run is None: 

752 raise TypeError("Cannot predict location with run=None.") 

753 # Lie about ID, because we can't guess it, and only 

754 # Datastore.getUri() will ever see it (and it doesn't use it). 

755 ref = ref.resolved(id=0, run=self.run) 

756 return self.datastore.getUri(ref, predict) 

757 

758 def datasetExists(self, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

759 dataId: Optional[DataId] = None, *, 

760 collection: Optional[str] = None, 

761 **kwds: Any) -> bool: 

762 """Return True if the Dataset is actually present in the Datastore. 

763 

764 Parameters 

765 ---------- 

766 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

767 When `DatasetRef` the `dataId` should be `None`. 

768 Otherwise the `DatasetType` or name thereof. 

769 dataId : `dict` or `DataCoordinate` 

770 A `dict` of `Dimension` link name, value pairs that label the 

771 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

772 should be provided as the first argument. 

773 collection : `str`, optional 

774 Collection to search, overriding ``self.collection``. 

775 kwds 

776 Additional keyword arguments used to augment or construct a 

777 `DataCoordinate`. See `DataCoordinate.standardize` 

778 parameters. 

779 

780 Raises 

781 ------ 

782 LookupError 

783 Raised if the dataset is not even present in the Registry. 

784 ValueError 

785 Raised if a resolved `DatasetRef` was passed as an input, but it 

786 differs from the one found in the registry in this collection. 

787 TypeError 

788 Raised if ``collection`` and ``self.collection`` are both `None`. 

789 """ 

790 ref = self._findDatasetRef(datasetRefOrType, dataId, collection=collection, **kwds) 

791 return self.datastore.exists(ref) 

792 

793 def remove(self, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

794 dataId: Optional[DataId] = None, *, 

795 delete: bool = True, remember: bool = True, collection: Optional[str] = None, **kwds: Any): 

796 """Remove a dataset from the collection and possibly the repository. 

797 

798 The identified dataset is always at least removed from the Butler's 

799 collection. By default it is also deleted from the Datastore (e.g. 

800 files are actually deleted), but the dataset is "remembered" by 

801 retaining its row in the dataset and provenance tables in the registry. 

802 

803 If the dataset is a composite, all components will also be removed. 

804 

805 Parameters 

806 ---------- 

807 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

808 When `DatasetRef` the `dataId` should be `None`. 

809 Otherwise the `DatasetType` or name thereof. 

810 dataId : `dict` or `DataId` 

811 A `dict` of `Dimension` link name, value pairs that label the 

812 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

813 should be provided as the first argument. 

814 delete : `bool` 

815 If `True` (default) actually delete the dataset from the 

816 Datastore (i.e. actually remove files). 

817 remember : `bool` 

818 If `True` (default), retain dataset and provenance records in 

819 the `Registry` for this dataset. 

820 collection : `str`, optional 

821 Collection to search, overriding ``self.collection``. 

822 kwds 

823 Additional keyword arguments used to augment or construct a 

824 `DataId`. See `DataId` parameters. 

825 

826 Raises 

827 ------ 

828 TypeError 

829 Raised if the butler is read-only, if no collection was provided, 

830 or if ``delete`` and ``remember`` are both `False`; a dataset 

831 cannot remain in a `Datastore` if its `Registry` entries is 

832 removed. 

833 OrphanedRecordError 

834 Raised if ``remember`` is `False` but the dataset is still present 

835 in a `Datastore` not recognized by this `Butler` client. 

836 ValueError 

837 Raised if a resolved `DatasetRef` was passed as an input, but it 

838 differs from the one found in the registry in this collection. 

839 """ 

840 if not self.isWriteable(): 

841 raise TypeError("Butler is read-only.") 

842 ref = self._findDatasetRef(datasetRefOrType, dataId, collection=collection, **kwds) 

843 if delete: 

844 # There is a difference between a concrete composite and virtual 

845 # composite. In a virtual composite the datastore is never 

846 # given the top level DatasetRef. In the concrete composite 

847 # the datastore knows all the refs and will clean up itself 

848 # if asked to remove the parent ref. 

849 # We can not check configuration for this since we can not trust 

850 # that the configuration is the same. We therefore have to ask 

851 # if the ref exists or not 

852 if self.datastore.exists(ref): 

853 self.datastore.remove(ref) 

854 elif ref.isComposite(): 

855 datastoreNames = set(self.datastore.names) 

856 for r in ref.components.values(): 

857 # If a dataset was removed previously but remembered 

858 # in registry, skip the removal in the datastore. 

859 datastoreLocations = self.registry.getDatasetLocations(r) 

860 if datastoreLocations & datastoreNames: 

861 self.datastore.remove(r) 

862 else: 

863 raise FileNotFoundError(f"Dataset {ref} not known to datastore") 

864 elif not remember: 

865 raise ValueError("Cannot retain dataset in Datastore without keeping Registry dataset record.") 

866 if remember: 

867 self.registry.disassociate(self.collection, [ref]) 

868 else: 

869 # This also implicitly disassociates. 

870 self.registry.removeDataset(ref) 

871 

872 @transactional 

873 def ingest(self, *datasets: FileDataset, transfer: Optional[str] = None, run: Optional[str] = None): 

874 """Store and register one or more datasets that already exist on disk. 

875 

876 Parameters 

877 ---------- 

878 datasets : `FileDataset` 

879 Each positional argument is a struct containing information about 

880 a file to be ingested, including its path (either absolute or 

881 relative to the datastore root, if applicable), a `DatasetRef`, 

882 and optionally a formatter class or its fully-qualified string 

883 name. If a formatter is not provided, the formatter that would be 

884 used for `put` is assumed. On successful return, all 

885 `FileDataset.ref` attributes will have their `DatasetRef.id` 

886 attribute populated and all `FileDataset.formatter` attributes will 

887 be set to the formatter class used. `FileDataset.path` attributes 

888 may be modified to put paths in whatever the datastore considers a 

889 standardized form. 

890 transfer : `str`, optional 

891 If not `None`, must be one of 'move', 'copy', 'hardlink', or 

892 'symlink', indicating how to transfer the file. 

893 run : `str`, optional 

894 The name of the run ingested datasets should be added to, 

895 overriding ``self.run``. 

896 

897 Raises 

898 ------ 

899 TypeError 

900 Raised if the butler is read-only or if no run was provided. 

901 NotImplementedError 

902 Raised if the `Datastore` does not support the given transfer mode. 

903 DatasetTypeNotSupportedError 

904 Raised if one or more files to be ingested have a dataset type that 

905 is not supported by the `Datastore`.. 

906 FileNotFoundError 

907 Raised if one of the given files does not exist. 

908 FileExistsError 

909 Raised if transfer is not `None` but the (internal) location the 

910 file would be moved to is already occupied. 

911 

912 Notes 

913 ----- 

914 This operation is not fully exception safe: if a database operation 

915 fails, the given `FileDataset` instances may be only partially updated. 

916 

917 It is atomic in terms of database operations (they will either all 

918 succeed or all fail) providing the database engine implements 

919 transactions correctly. It will attempt to be atomic in terms of 

920 filesystem operations as well, but this cannot be implemented 

921 rigorously for most datastores. 

922 """ 

923 if not self.isWriteable(): 

924 raise TypeError("Butler is read-only.") 

925 if run is None: 

926 if self.run is None: 

927 raise TypeError("No run provided.") 

928 run = self.run 

929 

930 # Reorganize the inputs so they're grouped by DatasetType and then 

931 # data ID. We also include a list of DatasetRefs for each FileDataset 

932 # to hold the resolved DatasetRefs returned by the Registry, before 

933 # it's safe to swap them into FileDataset.refs. 

934 # Some type annotation aliases to make that clearer: 

935 GroupForType = Dict[DataCoordinate, Tuple[FileDataset, List[DatasetRef]]] 

936 GroupedData = MutableMapping[DatasetType, GroupForType] 

937 # The actual data structure: 

938 groupedData: GroupedData = defaultdict(dict) 

939 # And the nested loop that populates it: 

940 for dataset in datasets: 

941 # This list intentionally shared across the inner loop, since it's 

942 # associated with `dataset`. 

943 resolvedRefs = [] 

944 for ref in dataset.refs: 

945 groupedData[ref.datasetType][ref.dataId] = (dataset, resolvedRefs) 

946 

947 # Now we can bulk-insert into Registry for each DatasetType. 

948 for datasetType, groupForType in groupedData.items(): 

949 refs = self.registry.insertDatasets(datasetType, 

950 dataIds=groupForType.keys(), 

951 run=run, 

952 recursive=True) 

953 # Append those resolved DatasetRefs to the new lists we set up for 

954 # them. 

955 for ref, (_, resolvedRefs) in zip(refs, groupForType.values()): 

956 resolvedRefs.append(ref) 

957 

958 # Go back to the original FileDatasets to replace their refs with the 

959 # new resolved ones. 

960 for groupForType in groupedData.values(): 

961 for dataset, resolvedRefs in groupForType.values(): 

962 dataset.refs = resolvedRefs 

963 

964 # Bulk-insert everything into Datastore. 

965 self.datastore.ingest(*datasets, transfer=transfer) 

966 

967 @contextlib.contextmanager 

968 def export(self, *, directory: Optional[str] = None, 

969 filename: Optional[str] = None, 

970 format: Optional[str] = None, 

971 transfer: Optional[str] = None) -> ContextManager[RepoExport]: 

972 """Export datasets from the repository represented by this `Butler`. 

973 

974 This method is a context manager that returns a helper object 

975 (`RepoExport`) that is used to indicate what information from the 

976 repository should be exported. 

977 

978 Parameters 

979 ---------- 

980 directory : `str`, optional 

981 Directory dataset files should be written to if ``transfer`` is not 

982 `None`. 

983 filename : `str`, optional 

984 Name for the file that will include database information associated 

985 with the exported datasets. If this is not an absolute path and 

986 ``directory`` is not `None`, it will be written to ``directory`` 

987 instead of the current working directory. Defaults to 

988 "export.{format}". 

989 format : `str`, optional 

990 File format for the database information file. If `None`, the 

991 extension of ``filename`` will be used. 

992 transfer : `str`, optional 

993 Transfer mode passed to `Datastore.export`. 

994 

995 Raises 

996 ------ 

997 TypeError 

998 Raised if the set of arguments passed is inconsistent. 

999 

1000 Examples 

1001 -------- 

1002 Typically the `Registry.queryDimensions` and `Registry.queryDatasets` 

1003 methods are used to provide the iterables over data IDs and/or datasets 

1004 to be exported:: 

1005 

1006 with butler.export("exports.yaml") as export: 

1007 # Export all flats, and the calibration_label dimensions 

1008 # associated with them. 

1009 export.saveDatasets(butler.registry.queryDatasets("flat"), 

1010 elements=[butler.registry.dimensions["calibration_label"]]) 

1011 # Export all datasets that start with "deepCoadd_" and all of 

1012 # their associated data ID information. 

1013 export.saveDatasets(butler.registry.queryDatasets("deepCoadd_*")) 

1014 """ 

1015 if directory is None and transfer is not None: 

1016 raise TypeError("Cannot transfer without providing a directory.") 

1017 if transfer == "move": 

1018 raise TypeError("Transfer may not be 'move': export is read-only") 

1019 if format is None: 

1020 if filename is None: 

1021 raise TypeError("At least one of 'filename' or 'format' must be provided.") 

1022 else: 

1023 _, format = os.path.splitext(filename) 

1024 elif filename is None: 

1025 filename = f"export.{format}" 

1026 if directory is not None: 

1027 filename = os.path.join(directory, filename) 

1028 BackendClass = getClassOf(self._config["repo_transfer_formats"][format]["export"]) 

1029 with open(filename, 'w') as stream: 

1030 backend = BackendClass(stream) 

1031 try: 

1032 helper = RepoExport(self.registry, self.datastore, backend=backend, 

1033 directory=directory, transfer=transfer) 

1034 yield helper 

1035 except BaseException: 

1036 raise 

1037 else: 

1038 helper._finish() 

1039 

1040 def import_(self, *, directory: Optional[str] = None, 

1041 filename: Optional[str] = None, 

1042 format: Optional[str] = None, 

1043 transfer: Optional[str] = None): 

1044 """Import datasets exported from a different butler repository. 

1045 

1046 Parameters 

1047 ---------- 

1048 directory : `str`, optional 

1049 Directory containing dataset files. If `None`, all file paths 

1050 must be absolute. 

1051 filename : `str`, optional 

1052 Name for the file that containing database information associated 

1053 with the exported datasets. If this is not an absolute path, does 

1054 not exist in the current working directory, and ``directory`` is 

1055 not `None`, it is assumed to be in ``directory``. Defaults to 

1056 "export.{format}". 

1057 format : `str`, optional 

1058 File format for the database information file. If `None`, the 

1059 extension of ``filename`` will be used. 

1060 transfer : `str`, optional 

1061 Transfer mode passed to `Datastore.export`. 

1062 

1063 Raises 

1064 ------ 

1065 TypeError 

1066 Raised if the set of arguments passed is inconsistent, or if the 

1067 butler is read-only. 

1068 """ 

1069 if not self.isWriteable(): 

1070 raise TypeError("Butler is read-only.") 

1071 if format is None: 

1072 if filename is None: 

1073 raise TypeError("At least one of 'filename' or 'format' must be provided.") 

1074 else: 

1075 _, format = os.path.splitext(filename) 

1076 elif filename is None: 

1077 filename = f"export.{format}" 

1078 if directory is not None and not os.path.exists(filename): 

1079 filename = os.path.join(directory, filename) 

1080 BackendClass = getClassOf(self._config["repo_transfer_formats"][format]["import"]) 

1081 with open(filename, 'r') as stream: 

1082 backend = BackendClass(stream, self.registry) 

1083 backend.register() 

1084 with self.transaction(): 

1085 backend.load(self.datastore, directory=directory, transfer=transfer) 

1086 

1087 def validateConfiguration(self, logFailures: bool = False, 

1088 datasetTypeNames: Optional[Iterable[str]] = None, 

1089 ignore: Iterable[str] = None): 

1090 """Validate butler configuration. 

1091 

1092 Checks that each `DatasetType` can be stored in the `Datastore`. 

1093 

1094 Parameters 

1095 ---------- 

1096 logFailures : `bool`, optional 

1097 If `True`, output a log message for every validation error 

1098 detected. 

1099 datasetTypeNames : iterable of `str`, optional 

1100 The `DatasetType` names that should be checked. This allows 

1101 only a subset to be selected. 

1102 ignore : iterable of `str`, optional 

1103 Names of DatasetTypes to skip over. This can be used to skip 

1104 known problems. If a named `DatasetType` corresponds to a 

1105 composite, all component of that `DatasetType` will also be 

1106 ignored. 

1107 

1108 Raises 

1109 ------ 

1110 ButlerValidationError 

1111 Raised if there is some inconsistency with how this Butler 

1112 is configured. 

1113 """ 

1114 if datasetTypeNames: 

1115 entities = [self.registry.getDatasetType(name) for name in datasetTypeNames] 

1116 else: 

1117 entities = list(self.registry.getAllDatasetTypes()) 

1118 

1119 # filter out anything from the ignore list 

1120 if ignore: 

1121 ignore = set(ignore) 

1122 entities = [e for e in entities if e.name not in ignore and e.nameAndComponent()[0] not in ignore] 

1123 else: 

1124 ignore = set() 

1125 

1126 # Find all the registered instruments 

1127 instruments = set( 

1128 dataId["instrument"] for dataId in self.registry.queryDimensions(["instrument"]) 

1129 ) 

1130 

1131 # For each datasetType that has an instrument dimension, create 

1132 # a DatasetRef for each defined instrument 

1133 datasetRefs = [] 

1134 

1135 for datasetType in entities: 

1136 if "instrument" in datasetType.dimensions: 

1137 for instrument in instruments: 

1138 datasetRef = DatasetRef(datasetType, {"instrument": instrument}, conform=False) 

1139 datasetRefs.append(datasetRef) 

1140 

1141 entities.extend(datasetRefs) 

1142 

1143 datastoreErrorStr = None 

1144 try: 

1145 self.datastore.validateConfiguration(entities, logFailures=logFailures) 

1146 except ValidationError as e: 

1147 datastoreErrorStr = str(e) 

1148 

1149 # Also check that the LookupKeys used by the datastores match 

1150 # registry and storage class definitions 

1151 keys = self.datastore.getLookupKeys() 

1152 

1153 failedNames = set() 

1154 failedDataId = set() 

1155 for key in keys: 

1156 datasetType = None 

1157 if key.name is not None: 

1158 if key.name in ignore: 

1159 continue 

1160 

1161 # skip if specific datasetType names were requested and this 

1162 # name does not match 

1163 if datasetTypeNames and key.name not in datasetTypeNames: 

1164 continue 

1165 

1166 # See if it is a StorageClass or a DatasetType 

1167 if key.name in self.storageClasses: 

1168 pass 

1169 else: 

1170 try: 

1171 self.registry.getDatasetType(key.name) 

1172 except KeyError: 

1173 if logFailures: 

1174 log.fatal("Key '%s' does not correspond to a DatasetType or StorageClass", key) 

1175 failedNames.add(key) 

1176 else: 

1177 # Dimensions are checked for consistency when the Butler 

1178 # is created and rendezvoused with a universe. 

1179 pass 

1180 

1181 # Check that the instrument is a valid instrument 

1182 # Currently only support instrument so check for that 

1183 if key.dataId: 

1184 dataIdKeys = set(key.dataId) 

1185 if set(["instrument"]) != dataIdKeys: 

1186 if logFailures: 

1187 log.fatal("Key '%s' has unsupported DataId override", key) 

1188 failedDataId.add(key) 

1189 elif key.dataId["instrument"] not in instruments: 

1190 if logFailures: 

1191 log.fatal("Key '%s' has unknown instrument", key) 

1192 failedDataId.add(key) 

1193 

1194 messages = [] 

1195 

1196 if datastoreErrorStr: 

1197 messages.append(datastoreErrorStr) 

1198 

1199 for failed, msg in ((failedNames, "Keys without corresponding DatasetType or StorageClass entry: "), 

1200 (failedDataId, "Keys with bad DataId entries: ")): 

1201 if failed: 

1202 msg += ", ".join(str(k) for k in failed) 

1203 messages.append(msg) 

1204 

1205 if messages: 

1206 raise ValidationError(";\n".join(messages)) 

1207 

1208 registry: Registry 

1209 """The object that manages dataset metadata and relationships (`Registry`). 

1210 

1211 Most operations that don't involve reading or writing butler datasets are 

1212 accessible only via `Registry` methods. 

1213 """ 

1214 

1215 datastore: Datastore 

1216 """The object that manages actual dataset storage (`Datastore`). 

1217 

1218 Direct user access to the datastore should rarely be necessary; the primary 

1219 exception is the case where a `Datastore` implementation provides extra 

1220 functionality beyond what the base class defines. 

1221 """ 

1222 

1223 storageClasses: StorageClassFactory 

1224 """An object that maps known storage class names to objects that fully 

1225 describe them (`StorageClassFactory`). 

1226 """ 

1227 

1228 run: Optional[str] 

1229 """Name of the run this butler writes outputs to (`str` or `None`). 

1230 """ 

1231 

1232 collection: Optional[str] 

1233 """Name of the collection this butler searches for datasets (`str` or 

1234 `None`). 

1235 """