Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22""" 

23Butler top level classes. 

24""" 

25from __future__ import annotations 

26 

27__all__ = ("Butler", "ButlerValidationError") 

28 

29import os 

30from collections import defaultdict 

31import contextlib 

32import logging 

33from typing import ( 

34 Any, 

35 ClassVar, 

36 ContextManager, 

37 Dict, 

38 Iterable, 

39 List, 

40 MutableMapping, 

41 Optional, 

42 Tuple, 

43 Union, 

44) 

45 

46try: 

47 import boto3 

48except ImportError: 

49 boto3 = None 

50 

51from lsst.utils import doImport 

52from .core import ( 

53 ButlerURI, 

54 CompositesMap, 

55 Config, 

56 ConfigSubset, 

57 DataCoordinate, 

58 DataId, 

59 DatasetRef, 

60 DatasetType, 

61 Datastore, 

62 FileDataset, 

63 Quantum, 

64 RepoExport, 

65 StorageClassFactory, 

66 ValidationError, 

67) 

68from .core.repoRelocation import BUTLER_ROOT_TAG 

69from .core.safeFileIo import safeMakeDir 

70from .core.utils import transactional, getClassOf 

71from ._deferredDatasetHandle import DeferredDatasetHandle 

72from ._butlerConfig import ButlerConfig 

73from .registry import Registry, RegistryConfig 

74 

75log = logging.getLogger(__name__) 

76 

77 

78class ButlerValidationError(ValidationError): 

79 """There is a problem with the Butler configuration.""" 

80 pass 

81 

82 

83class Butler: 

84 """Main entry point for the data access system. 

85 

86 Attributes 

87 ---------- 

88 config : `str`, `ButlerConfig` or `Config`, optional 

89 (filename to) configuration. If this is not a `ButlerConfig`, defaults 

90 will be read. If a `str`, may be the path to a directory containing 

91 a "butler.yaml" file. 

92 datastore : `Datastore` 

93 Datastore to use for storage. 

94 registry : `Registry` 

95 Registry to use for lookups. 

96 

97 Parameters 

98 ---------- 

99 config : `ButlerConfig`, `Config` or `str`, optional. 

100 Configuration. Anything acceptable to the 

101 `ButlerConfig` constructor. If a directory path 

102 is given the configuration will be read from a ``butler.yaml`` file in 

103 that location. If `None` is given default values will be used. 

104 butler : `Butler`, optional. 

105 If provided, construct a new Butler that uses the same registry and 

106 datastore as the given one, but with the given collection and run. 

107 Incompatible with the ``config``, ``searchPaths``, and ``writeable`` 

108 arguments. 

109 collection : `str`, optional 

110 Collection to use for all input lookups. May be `None` to either use 

111 the value passed to ``run``, or to defer passing a collection until 

112 the methods that require one are called. 

113 run : `str`, optional 

114 Name of the run datasets should be output to; also used as a tagged 

115 collection name these dataset will be associated with. If the run 

116 does not exist, it will be created. If ``collection`` is `None`, this 

117 collection will be used for input lookups as well; if not, it must have 

118 the same value as ``run``. 

119 searchPaths : `list` of `str`, optional 

120 Directory paths to search when calculating the full Butler 

121 configuration. Not used if the supplied config is already a 

122 `ButlerConfig`. 

123 writeable : `bool`, optional 

124 Explicitly sets whether the butler supports write operations. If not 

125 provided, a read-only butler is created unless ``run`` is passed. 

126 

127 Raises 

128 ------ 

129 ValueError 

130 Raised if neither "collection" nor "run" are provided by argument or 

131 config, or if both are provided and are inconsistent. 

132 """ 

133 def __init__(self, config: Union[Config, str, None] = None, *, 

134 butler: Optional[Butler] = None, 

135 collection: Optional[str] = None, 

136 run: Optional[str] = None, 

137 searchPaths: Optional[List[str]] = None, 

138 writeable: Optional[bool] = None): 

139 if butler is not None: 

140 if config is not None or searchPaths is not None or writeable is not None: 

141 raise TypeError("Cannot pass 'config', 'searchPaths', or 'writeable' " 

142 "arguments with 'butler' argument.") 

143 self.registry = butler.registry 

144 self.datastore = butler.datastore 

145 self.storageClasses = butler.storageClasses 

146 self._composites = butler._composites 

147 self._config = butler._config 

148 else: 

149 self._config = ButlerConfig(config, searchPaths=searchPaths) 

150 if "root" in self._config: 

151 butlerRoot = self._config["root"] 

152 else: 

153 butlerRoot = self._config.configDir 

154 if writeable is None: 

155 writeable = run is not None 

156 self.registry = Registry.fromConfig(self._config, butlerRoot=butlerRoot, writeable=writeable) 

157 self.datastore = Datastore.fromConfig(self._config, self.registry, butlerRoot=butlerRoot) 

158 self.storageClasses = StorageClassFactory() 

159 self.storageClasses.addFromConfig(self._config) 

160 self._composites = CompositesMap(self._config, universe=self.registry.dimensions) 

161 if "run" in self._config or "collection" in self._config: 

162 raise ValueError("Passing a run or collection via configuration is no longer supported.") 

163 if run is not None and writeable is False: 

164 raise ValueError(f"Butler initialized with run='{run}', " 

165 f"but is read-only; use collection='{run}' instead.") 

166 self.run = run 

167 if collection is None and run is not None: 

168 collection = run 

169 if self.run is not None and collection != self.run: 

170 raise ValueError( 

171 "Run ({}) and collection ({}) are inconsistent.".format(self.run, collection) 

172 ) 

173 self.collection = collection 

174 if self.run is not None: 

175 self.registry.registerRun(self.run) 

176 

177 GENERATION: ClassVar[int] = 3 

178 """This is a Generation 3 Butler. 

179 

180 This attribute may be removed in the future, once the Generation 2 Butler 

181 interface has been fully retired; it should only be used in transitional 

182 code. 

183 """ 

184 

185 @staticmethod 

186 def makeRepo(root: str, config: Union[Config, str, None] = None, standalone: bool = False, 

187 createRegistry: bool = True, searchPaths: Optional[List[str]] = None, 

188 forceConfigRoot: bool = True, outfile: Optional[str] = None) -> Config: 

189 """Create an empty data repository by adding a butler.yaml config 

190 to a repository root directory. 

191 

192 Parameters 

193 ---------- 

194 root : `str` 

195 Filesystem path to the root of the new repository. Will be created 

196 if it does not exist. 

197 config : `Config` or `str`, optional 

198 Configuration to write to the repository, after setting any 

199 root-dependent Registry or Datastore config options. Can not 

200 be a `ButlerConfig` or a `ConfigSubset`. If `None`, default 

201 configuration will be used. Root-dependent config options 

202 specified in this config are overwritten if ``forceConfigRoot`` 

203 is `True`. 

204 standalone : `bool` 

205 If True, write all expanded defaults, not just customized or 

206 repository-specific settings. 

207 This (mostly) decouples the repository from the default 

208 configuration, insulating it from changes to the defaults (which 

209 may be good or bad, depending on the nature of the changes). 

210 Future *additions* to the defaults will still be picked up when 

211 initializing `Butlers` to repos created with ``standalone=True``. 

212 createRegistry : `bool`, optional 

213 If `True` create a new Registry. 

214 searchPaths : `list` of `str`, optional 

215 Directory paths to search when calculating the full butler 

216 configuration. 

217 forceConfigRoot : `bool`, optional 

218 If `False`, any values present in the supplied ``config`` that 

219 would normally be reset are not overridden and will appear 

220 directly in the output config. This allows non-standard overrides 

221 of the root directory for a datastore or registry to be given. 

222 If this parameter is `True` the values for ``root`` will be 

223 forced into the resulting config if appropriate. 

224 outfile : `str`, optional 

225 If not-`None`, the output configuration will be written to this 

226 location rather than into the repository itself. Can be a URI 

227 string. Can refer to a directory that will be used to write 

228 ``butler.yaml``. 

229 

230 Returns 

231 ------- 

232 config : `Config` 

233 The updated `Config` instance written to the repo. 

234 

235 Raises 

236 ------ 

237 ValueError 

238 Raised if a ButlerConfig or ConfigSubset is passed instead of a 

239 regular Config (as these subclasses would make it impossible to 

240 support ``standalone=False``). 

241 os.error 

242 Raised if the directory does not exist, exists but is not a 

243 directory, or cannot be created. 

244 

245 Notes 

246 ----- 

247 Note that when ``standalone=False`` (the default), the configuration 

248 search path (see `ConfigSubset.defaultSearchPaths`) that was used to 

249 construct the repository should also be used to construct any Butlers 

250 to avoid configuration inconsistencies. 

251 """ 

252 if isinstance(config, (ButlerConfig, ConfigSubset)): 

253 raise ValueError("makeRepo must be passed a regular Config without defaults applied.") 

254 

255 # for "file" schemes we are assuming POSIX semantics for paths, for 

256 # schemeless URIs we are assuming os.path semantics. 

257 uri = ButlerURI(root) 

258 if uri.scheme == "file" or not uri.scheme: 

259 if not os.path.isdir(uri.ospath): 

260 safeMakeDir(uri.ospath) 

261 elif uri.scheme == "s3": 

262 s3 = boto3.resource("s3") 

263 # implies bucket exists, if not another level of checks 

264 bucket = s3.Bucket(uri.netloc) 

265 bucket.put_object(Bucket=uri.netloc, Key=uri.relativeToPathRoot) 

266 else: 

267 raise ValueError(f"Unrecognized scheme: {uri.scheme}") 

268 config = Config(config) 

269 

270 # If we are creating a new repo from scratch with relative roots, 

271 # do not propagate an explicit root from the config file 

272 if "root" in config: 

273 del config["root"] 

274 

275 full = ButlerConfig(config, searchPaths=searchPaths) # this applies defaults 

276 datastoreClass = doImport(full["datastore", "cls"]) 

277 datastoreClass.setConfigRoot(BUTLER_ROOT_TAG, config, full, overwrite=forceConfigRoot) 

278 

279 # if key exists in given config, parse it, otherwise parse the defaults 

280 # in the expanded config 

281 if config.get(("registry", "db")): 

282 registryConfig = RegistryConfig(config) 

283 else: 

284 registryConfig = RegistryConfig(full) 

285 defaultDatabaseUri = registryConfig.makeDefaultDatabaseUri(BUTLER_ROOT_TAG) 

286 if defaultDatabaseUri is not None: 

287 Config.updateParameters(RegistryConfig, config, full, 

288 toUpdate={"db": defaultDatabaseUri}, 

289 overwrite=forceConfigRoot) 

290 else: 

291 Config.updateParameters(RegistryConfig, config, full, toCopy=("db",), 

292 overwrite=forceConfigRoot) 

293 

294 if standalone: 

295 config.merge(full) 

296 if outfile is not None: 

297 # When writing to a separate location we must include 

298 # the root of the butler repo in the config else it won't know 

299 # where to look. 

300 config["root"] = uri.geturl() 

301 configURI = outfile 

302 else: 

303 configURI = uri 

304 config.dumpToUri(configURI) 

305 

306 # Create Registry and populate tables 

307 Registry.fromConfig(config, create=createRegistry, butlerRoot=root) 

308 return config 

309 

310 @classmethod 

311 def _unpickle(cls, config: ButlerConfig, collection: str, run: Optional[str], writeable: bool) -> Butler: 

312 """Callable used to unpickle a Butler. 

313 

314 We prefer not to use ``Butler.__init__`` directly so we can force some 

315 of its many arguments to be keyword-only (note that ``__reduce__`` 

316 can only invoke callables with positional arguments). 

317 

318 Parameters 

319 ---------- 

320 config : `ButlerConfig` 

321 Butler configuration, already coerced into a true `ButlerConfig` 

322 instance (and hence after any search paths for overrides have been 

323 utilized). 

324 collection : `str` 

325 String name of a collection to use for read operations. 

326 run : `str`, optional 

327 String name of a run to use for write operations, or `None` for a 

328 read-only butler. 

329 

330 Returns 

331 ------- 

332 butler : `Butler` 

333 A new `Butler` instance. 

334 """ 

335 return cls(config=config, collection=collection, run=run, writeable=writeable) 

336 

337 def __reduce__(self): 

338 """Support pickling. 

339 """ 

340 return (Butler._unpickle, (self._config, self.collection, self.run, self.registry.isWriteable())) 

341 

342 def __str__(self): 

343 return "Butler(collection='{}', datastore='{}', registry='{}')".format( 

344 self.collection, self.datastore, self.registry) 

345 

346 def isWriteable(self) -> bool: 

347 """Return `True` if this `Butler` supports write operations. 

348 """ 

349 return self.registry.isWriteable() 

350 

351 @contextlib.contextmanager 

352 def transaction(self): 

353 """Context manager supporting `Butler` transactions. 

354 

355 Transactions can be nested. 

356 """ 

357 with self.registry.transaction(): 

358 with self.datastore.transaction(): 

359 yield 

360 

361 def _standardizeArgs(self, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

362 dataId: Optional[DataId] = None, **kwds: Any) -> Tuple[DatasetType, DataId]: 

363 """Standardize the arguments passed to several Butler APIs. 

364 

365 Parameters 

366 ---------- 

367 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

368 When `DatasetRef` the `dataId` should be `None`. 

369 Otherwise the `DatasetType` or name thereof. 

370 dataId : `dict` or `DataCoordinate` 

371 A `dict` of `Dimension` link name, value pairs that label the 

372 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

373 should be provided as the second argument. 

374 kwds 

375 Additional keyword arguments used to augment or construct a 

376 `DataCoordinate`. See `DataCoordinate.standardize` 

377 parameters. 

378 

379 Returns 

380 ------- 

381 datasetType : `DatasetType` 

382 A `DatasetType` instance extracted from ``datasetRefOrType``. 

383 dataId : `dict` or `DataId`, optional 

384 Argument that can be used (along with ``kwds``) to construct a 

385 `DataId`. 

386 

387 Notes 

388 ----- 

389 Butler APIs that conceptually need a DatasetRef also allow passing a 

390 `DatasetType` (or the name of one) and a `DataId` (or a dict and 

391 keyword arguments that can be used to construct one) separately. This 

392 method accepts those arguments and always returns a true `DatasetType` 

393 and a `DataId` or `dict`. 

394 

395 Standardization of `dict` vs `DataId` is best handled by passing the 

396 returned ``dataId`` (and ``kwds``) to `Registry` APIs, which are 

397 generally similarly flexible. 

398 """ 

399 externalDatasetType = None 

400 internalDatasetType = None 

401 if isinstance(datasetRefOrType, DatasetRef): 

402 if dataId is not None or kwds: 

403 raise ValueError("DatasetRef given, cannot use dataId as well") 

404 externalDatasetType = datasetRefOrType.datasetType 

405 dataId = datasetRefOrType.dataId 

406 else: 

407 # Don't check whether DataId is provided, because Registry APIs 

408 # can usually construct a better error message when it wasn't. 

409 if isinstance(datasetRefOrType, DatasetType): 

410 externalDatasetType = datasetRefOrType 

411 else: 

412 internalDatasetType = self.registry.getDatasetType(datasetRefOrType) 

413 

414 # Check that they are self-consistent 

415 if externalDatasetType is not None: 

416 internalDatasetType = self.registry.getDatasetType(externalDatasetType.name) 

417 if externalDatasetType != internalDatasetType: 

418 raise ValueError(f"Supplied dataset type ({externalDatasetType}) inconsistent with " 

419 f"registry definition ({internalDatasetType})") 

420 

421 return internalDatasetType, dataId 

422 

423 def _findDatasetRef(self, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

424 dataId: Optional[DataId] = None, *, 

425 collection: Optional[str] = None, 

426 allowUnresolved: bool = False, 

427 **kwds: Any) -> DatasetRef: 

428 """Shared logic for methods that start with a search for a dataset in 

429 the registry. 

430 

431 Parameters 

432 ---------- 

433 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

434 When `DatasetRef` the `dataId` should be `None`. 

435 Otherwise the `DatasetType` or name thereof. 

436 dataId : `dict` or `DataCoordinate`, optional 

437 A `dict` of `Dimension` link name, value pairs that label the 

438 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

439 should be provided as the first argument. 

440 collection : `str`, optional 

441 Name of the collection to search, overriding ``self.collection``. 

442 allowUnresolved : `bool`, optional 

443 If `True`, return an unresolved `DatasetRef` if finding a resolved 

444 one in the `Registry` fails. Defaults to `False`. 

445 kwds 

446 Additional keyword arguments used to augment or construct a 

447 `DataId`. See `DataId` parameters. 

448 

449 Returns 

450 ------- 

451 ref : `DatasetRef` 

452 A reference to the dataset identified by the given arguments. 

453 

454 Raises 

455 ------ 

456 LookupError 

457 Raised if no matching dataset exists in the `Registry` (and 

458 ``allowUnresolved is False``). 

459 ValueError 

460 Raised if a resolved `DatasetRef` was passed as an input, but it 

461 differs from the one found in the registry in this collection. 

462 TypeError 

463 Raised if ``collection`` and ``self.collection`` are both `None`. 

464 """ 

465 datasetType, dataId = self._standardizeArgs(datasetRefOrType, dataId, **kwds) 

466 if isinstance(datasetRefOrType, DatasetRef): 

467 idNumber = datasetRefOrType.id 

468 else: 

469 idNumber = None 

470 # Expand the data ID first instead of letting registry.find do it, so 

471 # we get the result even if it returns None. 

472 dataId = self.registry.expandDataId(dataId, graph=datasetType.dimensions, **kwds) 

473 if collection is None: 

474 collection = self.collection 

475 if collection is None: 

476 raise TypeError("No collection provided.") 

477 # Always lookup the DatasetRef, even if one is given, to ensure it is 

478 # present in the current collection. 

479 ref = self.registry.find(collection, datasetType, dataId) 

480 if ref is None: 

481 if allowUnresolved: 

482 return DatasetRef(datasetType, dataId) 

483 else: 

484 raise LookupError(f"Dataset {datasetType.name} with data ID {dataId} " 

485 f"could not be found in collection '{collection}'.") 

486 if idNumber is not None and idNumber != ref.id: 

487 raise ValueError(f"DatasetRef.id provided ({idNumber}) does not match " 

488 f"id ({ref.id}) in registry in collection '{collection}'.") 

489 return ref 

490 

491 @transactional 

492 def put(self, obj: Any, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

493 dataId: Optional[DataId] = None, *, 

494 producer: Optional[Quantum] = None, 

495 run: Optional[str] = None, 

496 **kwds: Any) -> DatasetRef: 

497 """Store and register a dataset. 

498 

499 Parameters 

500 ---------- 

501 obj : `object` 

502 The dataset. 

503 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

504 When `DatasetRef` is provided, ``dataId`` should be `None`. 

505 Otherwise the `DatasetType` or name thereof. 

506 dataId : `dict` or `DataCoordinate` 

507 A `dict` of `Dimension` link name, value pairs that label the 

508 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

509 should be provided as the second argument. 

510 producer : `Quantum`, optional 

511 The producer. 

512 run : `str`, optional 

513 The name of the run the dataset should be added to, overriding 

514 ``self.run``. 

515 kwds 

516 Additional keyword arguments used to augment or construct a 

517 `DataCoordinate`. See `DataCoordinate.standardize` 

518 parameters. 

519 

520 Returns 

521 ------- 

522 ref : `DatasetRef` 

523 A reference to the stored dataset, updated with the correct id if 

524 given. 

525 

526 Raises 

527 ------ 

528 TypeError 

529 Raised if the butler is read-only or if no run has been provided. 

530 """ 

531 log.debug("Butler put: %s, dataId=%s, producer=%s, run=%s", datasetRefOrType, dataId, producer, run) 

532 if not self.isWriteable(): 

533 raise TypeError("Butler is read-only.") 

534 datasetType, dataId = self._standardizeArgs(datasetRefOrType, dataId, **kwds) 

535 if isinstance(datasetRefOrType, DatasetRef) and datasetRefOrType.id is not None: 

536 raise ValueError("DatasetRef must not be in registry, must have None id") 

537 

538 if run is None: 

539 if self.run is None: 

540 raise TypeError("No run provided.") 

541 run = self.run 

542 

543 isVirtualComposite = self._composites.shouldBeDisassembled(datasetType) 

544 

545 # Add Registry Dataset entry. If not a virtual composite, add 

546 # and attach components at the same time. 

547 dataId = self.registry.expandDataId(dataId, graph=datasetType.dimensions, **kwds) 

548 ref, = self.registry.insertDatasets(datasetType, run=run, dataIds=[dataId], 

549 producer=producer, recursive=not isVirtualComposite) 

550 

551 # Check to see if this datasetType requires disassembly 

552 if isVirtualComposite: 

553 components = datasetType.storageClass.assembler().disassemble(obj) 

554 for component, info in components.items(): 

555 compTypeName = datasetType.componentTypeName(component) 

556 compRef = self.put(info.component, compTypeName, dataId, producer=producer, run=run) 

557 self.registry.attachComponent(component, ref, compRef) 

558 else: 

559 # This is an entity without a disassembler. 

560 self.datastore.put(obj, ref) 

561 

562 return ref 

563 

564 def getDirect(self, ref: DatasetRef, *, parameters: Optional[Dict[str, Any]] = None): 

565 """Retrieve a stored dataset. 

566 

567 Unlike `Butler.get`, this method allows datasets outside the Butler's 

568 collection to be read as long as the `DatasetRef` that identifies them 

569 can be obtained separately. 

570 

571 Parameters 

572 ---------- 

573 ref : `DatasetRef` 

574 Reference to an already stored dataset. 

575 parameters : `dict` 

576 Additional StorageClass-defined options to control reading, 

577 typically used to efficiently read only a subset of the dataset. 

578 

579 Returns 

580 ------- 

581 obj : `object` 

582 The dataset. 

583 """ 

584 # if the ref exists in the store we return it directly 

585 if self.datastore.exists(ref): 

586 return self.datastore.get(ref, parameters=parameters) 

587 elif ref.isComposite(): 

588 # Check that we haven't got any unknown parameters 

589 ref.datasetType.storageClass.validateParameters(parameters) 

590 # Reconstruct the composite 

591 usedParams = set() 

592 components = {} 

593 for compName, compRef in ref.components.items(): 

594 # make a dictionary of parameters containing only the subset 

595 # supported by the StorageClass of the components 

596 compParams = compRef.datasetType.storageClass.filterParameters(parameters) 

597 usedParams.update(set(compParams)) 

598 components[compName] = self.datastore.get(compRef, parameters=compParams) 

599 

600 # Any unused parameters will have to be passed to the assembler 

601 if parameters: 

602 unusedParams = {k: v for k, v in parameters.items() if k not in usedParams} 

603 else: 

604 unusedParams = {} 

605 

606 # Assemble the components 

607 inMemoryDataset = ref.datasetType.storageClass.assembler().assemble(components) 

608 return ref.datasetType.storageClass.assembler().handleParameters(inMemoryDataset, 

609 parameters=unusedParams) 

610 else: 

611 # single entity in datastore 

612 raise FileNotFoundError(f"Unable to locate dataset '{ref}' in datastore {self.datastore.name}") 

613 

614 def getDeferred(self, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

615 dataId: Optional[DataId] = None, *, 

616 parameters: Union[dict, None] = None, 

617 collection: Optional[str] = None, 

618 **kwds: Any) -> DeferredDatasetHandle: 

619 """Create a `DeferredDatasetHandle` which can later retrieve a dataset 

620 

621 Parameters 

622 ---------- 

623 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

624 When `DatasetRef` the `dataId` should be `None`. 

625 Otherwise the `DatasetType` or name thereof. 

626 dataId : `dict` or `DataCoordinate`, optional 

627 A `dict` of `Dimension` link name, value pairs that label the 

628 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

629 should be provided as the first argument. 

630 collection : `str`, optional 

631 Name of the collection to search, overriding ``self.collection``. 

632 parameters : `dict` 

633 Additional StorageClass-defined options to control reading, 

634 typically used to efficiently read only a subset of the dataset. 

635 collection : `str`, optional 

636 Collection to search, overriding ``self.collection``. 

637 kwds 

638 Additional keyword arguments used to augment or construct a 

639 `DataId`. See `DataId` parameters. 

640 

641 Returns 

642 ------- 

643 obj : `DeferredDatasetHandle` 

644 A handle which can be used to retrieve a dataset at a later time. 

645 

646 Raises 

647 ------ 

648 LookupError 

649 Raised if no matching dataset exists in the `Registry` (and 

650 ``allowUnresolved is False``). 

651 ValueError 

652 Raised if a resolved `DatasetRef` was passed as an input, but it 

653 differs from the one found in the registry in this collection. 

654 TypeError 

655 Raised if ``collection`` and ``self.collection`` are both `None`. 

656 """ 

657 ref = self._findDatasetRef(datasetRefOrType, dataId, collection=collection, **kwds) 

658 return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters) 

659 

660 def get(self, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

661 dataId: Optional[DataId] = None, *, 

662 parameters: Optional[Dict[str, Any]] = None, 

663 collection: Optional[str] = None, 

664 **kwds: Any) -> Any: 

665 """Retrieve a stored dataset. 

666 

667 Parameters 

668 ---------- 

669 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

670 When `DatasetRef` the `dataId` should be `None`. 

671 Otherwise the `DatasetType` or name thereof. 

672 dataId : `dict` or `DataCoordinate` 

673 A `dict` of `Dimension` link name, value pairs that label the 

674 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

675 should be provided as the first argument. 

676 parameters : `dict` 

677 Additional StorageClass-defined options to control reading, 

678 typically used to efficiently read only a subset of the dataset. 

679 collection : `str`, optional 

680 Collection to search, overriding ``self.collection``. 

681 kwds 

682 Additional keyword arguments used to augment or construct a 

683 `DataCoordinate`. See `DataCoordinate.standardize` 

684 parameters. 

685 

686 Returns 

687 ------- 

688 obj : `object` 

689 The dataset. 

690 

691 Raises 

692 ------ 

693 ValueError 

694 Raised if a resolved `DatasetRef` was passed as an input, but it 

695 differs from the one found in the registry in this collection. 

696 LookupError 

697 Raised if no matching dataset exists in the `Registry`. 

698 TypeError 

699 Raised if ``collection`` and ``self.collection`` are both `None`. 

700 """ 

701 log.debug("Butler get: %s, dataId=%s, parameters=%s", datasetRefOrType, dataId, parameters) 

702 ref = self._findDatasetRef(datasetRefOrType, dataId, collection=collection, **kwds) 

703 return self.getDirect(ref, parameters=parameters) 

704 

705 def getUri(self, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

706 dataId: Optional[DataId] = None, *, 

707 predict: bool = False, 

708 collection: Optional[str] = None, 

709 run: Optional[str] = None, 

710 **kwds: Any) -> str: 

711 """Return the URI to the Dataset. 

712 

713 Parameters 

714 ---------- 

715 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

716 When `DatasetRef` the `dataId` should be `None`. 

717 Otherwise the `DatasetType` or name thereof. 

718 dataId : `dict` or `DataCoordinate` 

719 A `dict` of `Dimension` link name, value pairs that label the 

720 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

721 should be provided as the first argument. 

722 predict : `bool` 

723 If `True`, allow URIs to be returned of datasets that have not 

724 been written. 

725 collection : `str`, optional 

726 Collection to search, overriding ``self.collection``. 

727 run : `str`, optional 

728 Run to use for predictions, overriding ``self.run``. 

729 kwds 

730 Additional keyword arguments used to augment or construct a 

731 `DataCoordinate`. See `DataCoordinate.standardize` 

732 parameters. 

733 

734 Returns 

735 ------- 

736 uri : `str` 

737 URI string pointing to the Dataset within the datastore. If the 

738 Dataset does not exist in the datastore, and if ``predict`` is 

739 `True`, the URI will be a prediction and will include a URI 

740 fragment "#predicted". 

741 If the datastore does not have entities that relate well 

742 to the concept of a URI the returned URI string will be 

743 descriptive. The returned URI is not guaranteed to be obtainable. 

744 

745 Raises 

746 ------ 

747 LookupError 

748 A URI has been requested for a dataset that does not exist and 

749 guessing is not allowed. 

750 ValueError 

751 Raised if a resolved `DatasetRef` was passed as an input, but it 

752 differs from the one found in the registry in this collection. 

753 TypeError 

754 Raised if ``collection`` and ``self.collection`` are both `None`. 

755 """ 

756 ref = self._findDatasetRef(datasetRefOrType, dataId, allowUnresolved=predict, collection=collection, 

757 **kwds) 

758 if ref.id is None: # only possible if predict is True 

759 if run is None: 

760 run = self.run 

761 if run is None: 

762 raise TypeError("Cannot predict location with run=None.") 

763 # Lie about ID, because we can't guess it, and only 

764 # Datastore.getUri() will ever see it (and it doesn't use it). 

765 ref = ref.resolved(id=0, run=self.run) 

766 return self.datastore.getUri(ref, predict) 

767 

768 def datasetExists(self, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

769 dataId: Optional[DataId] = None, *, 

770 collection: Optional[str] = None, 

771 **kwds: Any) -> bool: 

772 """Return True if the Dataset is actually present in the Datastore. 

773 

774 Parameters 

775 ---------- 

776 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

777 When `DatasetRef` the `dataId` should be `None`. 

778 Otherwise the `DatasetType` or name thereof. 

779 dataId : `dict` or `DataCoordinate` 

780 A `dict` of `Dimension` link name, value pairs that label the 

781 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

782 should be provided as the first argument. 

783 collection : `str`, optional 

784 Collection to search, overriding ``self.collection``. 

785 kwds 

786 Additional keyword arguments used to augment or construct a 

787 `DataCoordinate`. See `DataCoordinate.standardize` 

788 parameters. 

789 

790 Raises 

791 ------ 

792 LookupError 

793 Raised if the dataset is not even present in the Registry. 

794 ValueError 

795 Raised if a resolved `DatasetRef` was passed as an input, but it 

796 differs from the one found in the registry in this collection. 

797 TypeError 

798 Raised if ``collection`` and ``self.collection`` are both `None`. 

799 """ 

800 ref = self._findDatasetRef(datasetRefOrType, dataId, collection=collection, **kwds) 

801 return self.datastore.exists(ref) 

802 

803 def remove(self, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

804 dataId: Optional[DataId] = None, *, 

805 delete: bool = True, remember: bool = True, collection: Optional[str] = None, **kwds: Any): 

806 """Remove a dataset from the collection and possibly the repository. 

807 

808 The identified dataset is always at least removed from the Butler's 

809 collection. By default it is also deleted from the Datastore (e.g. 

810 files are actually deleted), but the dataset is "remembered" by 

811 retaining its row in the dataset and provenance tables in the registry. 

812 

813 If the dataset is a composite, all components will also be removed. 

814 

815 Parameters 

816 ---------- 

817 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

818 When `DatasetRef` the `dataId` should be `None`. 

819 Otherwise the `DatasetType` or name thereof. 

820 dataId : `dict` or `DataId` 

821 A `dict` of `Dimension` link name, value pairs that label the 

822 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

823 should be provided as the first argument. 

824 delete : `bool` 

825 If `True` (default) actually delete the dataset from the 

826 Datastore (i.e. actually remove files). 

827 remember : `bool` 

828 If `True` (default), retain dataset and provenance records in 

829 the `Registry` for this dataset. 

830 collection : `str`, optional 

831 Collection to search, overriding ``self.collection``. 

832 kwds 

833 Additional keyword arguments used to augment or construct a 

834 `DataId`. See `DataId` parameters. 

835 

836 Raises 

837 ------ 

838 TypeError 

839 Raised if the butler is read-only, if no collection was provided, 

840 or if ``delete`` and ``remember`` are both `False`; a dataset 

841 cannot remain in a `Datastore` if its `Registry` entries is 

842 removed. 

843 OrphanedRecordError 

844 Raised if ``remember`` is `False` but the dataset is still present 

845 in a `Datastore` not recognized by this `Butler` client. 

846 ValueError 

847 Raised if a resolved `DatasetRef` was passed as an input, but it 

848 differs from the one found in the registry in this collection. 

849 """ 

850 if not self.isWriteable(): 

851 raise TypeError("Butler is read-only.") 

852 ref = self._findDatasetRef(datasetRefOrType, dataId, collection=collection, **kwds) 

853 if delete: 

854 # There is a difference between a concrete composite and virtual 

855 # composite. In a virtual composite the datastore is never 

856 # given the top level DatasetRef. In the concrete composite 

857 # the datastore knows all the refs and will clean up itself 

858 # if asked to remove the parent ref. 

859 # We can not check configuration for this since we can not trust 

860 # that the configuration is the same. We therefore have to ask 

861 # if the ref exists or not 

862 if self.datastore.exists(ref): 

863 self.datastore.remove(ref) 

864 elif ref.isComposite(): 

865 datastoreNames = set(self.datastore.names) 

866 for r in ref.components.values(): 

867 # If a dataset was removed previously but remembered 

868 # in registry, skip the removal in the datastore. 

869 datastoreLocations = self.registry.getDatasetLocations(r) 

870 if datastoreLocations & datastoreNames: 

871 self.datastore.remove(r) 

872 else: 

873 raise FileNotFoundError(f"Dataset {ref} not known to datastore") 

874 elif not remember: 

875 raise ValueError("Cannot retain dataset in Datastore without keeping Registry dataset record.") 

876 if remember: 

877 self.registry.disassociate(self.collection, [ref]) 

878 else: 

879 # This also implicitly disassociates. 

880 self.registry.removeDataset(ref) 

881 

882 @transactional 

883 def ingest(self, *datasets: FileDataset, transfer: Optional[str] = None, run: Optional[str] = None): 

884 """Store and register one or more datasets that already exist on disk. 

885 

886 Parameters 

887 ---------- 

888 datasets : `FileDataset` 

889 Each positional argument is a struct containing information about 

890 a file to be ingested, including its path (either absolute or 

891 relative to the datastore root, if applicable), a `DatasetRef`, 

892 and optionally a formatter class or its fully-qualified string 

893 name. If a formatter is not provided, the formatter that would be 

894 used for `put` is assumed. On successful return, all 

895 `FileDataset.ref` attributes will have their `DatasetRef.id` 

896 attribute populated and all `FileDataset.formatter` attributes will 

897 be set to the formatter class used. `FileDataset.path` attributes 

898 may be modified to put paths in whatever the datastore considers a 

899 standardized form. 

900 transfer : `str`, optional 

901 If not `None`, must be one of 'move', 'copy', 'hardlink', or 

902 'symlink', indicating how to transfer the file. 

903 run : `str`, optional 

904 The name of the run ingested datasets should be added to, 

905 overriding ``self.run``. 

906 

907 Raises 

908 ------ 

909 TypeError 

910 Raised if the butler is read-only or if no run was provided. 

911 NotImplementedError 

912 Raised if the `Datastore` does not support the given transfer mode. 

913 DatasetTypeNotSupportedError 

914 Raised if one or more files to be ingested have a dataset type that 

915 is not supported by the `Datastore`.. 

916 FileNotFoundError 

917 Raised if one of the given files does not exist. 

918 FileExistsError 

919 Raised if transfer is not `None` but the (internal) location the 

920 file would be moved to is already occupied. 

921 

922 Notes 

923 ----- 

924 This operation is not fully exception safe: if a database operation 

925 fails, the given `FileDataset` instances may be only partially updated. 

926 

927 It is atomic in terms of database operations (they will either all 

928 succeed or all fail) providing the database engine implements 

929 transactions correctly. It will attempt to be atomic in terms of 

930 filesystem operations as well, but this cannot be implemented 

931 rigorously for most datastores. 

932 """ 

933 if not self.isWriteable(): 

934 raise TypeError("Butler is read-only.") 

935 if run is None: 

936 if self.run is None: 

937 raise TypeError("No run provided.") 

938 run = self.run 

939 

940 # Reorganize the inputs so they're grouped by DatasetType and then 

941 # data ID. We also include a list of DatasetRefs for each FileDataset 

942 # to hold the resolved DatasetRefs returned by the Registry, before 

943 # it's safe to swap them into FileDataset.refs. 

944 # Some type annotation aliases to make that clearer: 

945 GroupForType = Dict[DataCoordinate, Tuple[FileDataset, List[DatasetRef]]] 

946 GroupedData = MutableMapping[DatasetType, GroupForType] 

947 # The actual data structure: 

948 groupedData: GroupedData = defaultdict(dict) 

949 # And the nested loop that populates it: 

950 for dataset in datasets: 

951 # This list intentionally shared across the inner loop, since it's 

952 # associated with `dataset`. 

953 resolvedRefs = [] 

954 for ref in dataset.refs: 

955 groupedData[ref.datasetType][ref.dataId] = (dataset, resolvedRefs) 

956 

957 # Now we can bulk-insert into Registry for each DatasetType. 

958 for datasetType, groupForType in groupedData.items(): 

959 refs = self.registry.insertDatasets(datasetType, 

960 dataIds=groupForType.keys(), 

961 run=run, 

962 recursive=True) 

963 # Append those resolved DatasetRefs to the new lists we set up for 

964 # them. 

965 for ref, (_, resolvedRefs) in zip(refs, groupForType.values()): 

966 resolvedRefs.append(ref) 

967 

968 # Go back to the original FileDatasets to replace their refs with the 

969 # new resolved ones. 

970 for groupForType in groupedData.values(): 

971 for dataset, resolvedRefs in groupForType.values(): 

972 dataset.refs = resolvedRefs 

973 

974 # Bulk-insert everything into Datastore. 

975 self.datastore.ingest(*datasets, transfer=transfer) 

976 

977 @contextlib.contextmanager 

978 def export(self, *, directory: Optional[str] = None, 

979 filename: Optional[str] = None, 

980 format: Optional[str] = None, 

981 transfer: Optional[str] = None) -> ContextManager[RepoExport]: 

982 """Export datasets from the repository represented by this `Butler`. 

983 

984 This method is a context manager that returns a helper object 

985 (`RepoExport`) that is used to indicate what information from the 

986 repository should be exported. 

987 

988 Parameters 

989 ---------- 

990 directory : `str`, optional 

991 Directory dataset files should be written to if ``transfer`` is not 

992 `None`. 

993 filename : `str`, optional 

994 Name for the file that will include database information associated 

995 with the exported datasets. If this is not an absolute path and 

996 ``directory`` is not `None`, it will be written to ``directory`` 

997 instead of the current working directory. Defaults to 

998 "export.{format}". 

999 format : `str`, optional 

1000 File format for the database information file. If `None`, the 

1001 extension of ``filename`` will be used. 

1002 transfer : `str`, optional 

1003 Transfer mode passed to `Datastore.export`. 

1004 

1005 Raises 

1006 ------ 

1007 TypeError 

1008 Raised if the set of arguments passed is inconsistent. 

1009 

1010 Examples 

1011 -------- 

1012 Typically the `Registry.queryDimensions` and `Registry.queryDatasets` 

1013 methods are used to provide the iterables over data IDs and/or datasets 

1014 to be exported:: 

1015 

1016 with butler.export("exports.yaml") as export: 

1017 # Export all flats, and the calibration_label dimensions 

1018 # associated with them. 

1019 export.saveDatasets(butler.registry.queryDatasets("flat"), 

1020 elements=[butler.registry.dimensions["calibration_label"]]) 

1021 # Export all datasets that start with "deepCoadd_" and all of 

1022 # their associated data ID information. 

1023 export.saveDatasets(butler.registry.queryDatasets("deepCoadd_*")) 

1024 """ 

1025 if directory is None and transfer is not None: 

1026 raise TypeError("Cannot transfer without providing a directory.") 

1027 if transfer == "move": 

1028 raise TypeError("Transfer may not be 'move': export is read-only") 

1029 if format is None: 

1030 if filename is None: 

1031 raise TypeError("At least one of 'filename' or 'format' must be provided.") 

1032 else: 

1033 _, format = os.path.splitext(filename) 

1034 elif filename is None: 

1035 filename = f"export.{format}" 

1036 if directory is not None: 

1037 filename = os.path.join(directory, filename) 

1038 BackendClass = getClassOf(self._config["repo_transfer_formats"][format]["export"]) 

1039 with open(filename, 'w') as stream: 

1040 backend = BackendClass(stream) 

1041 try: 

1042 helper = RepoExport(self.registry, self.datastore, backend=backend, 

1043 directory=directory, transfer=transfer) 

1044 yield helper 

1045 except BaseException: 

1046 raise 

1047 else: 

1048 helper._finish() 

1049 

1050 def import_(self, *, directory: Optional[str] = None, 

1051 filename: Optional[str] = None, 

1052 format: Optional[str] = None, 

1053 transfer: Optional[str] = None): 

1054 """Import datasets exported from a different butler repository. 

1055 

1056 Parameters 

1057 ---------- 

1058 directory : `str`, optional 

1059 Directory containing dataset files. If `None`, all file paths 

1060 must be absolute. 

1061 filename : `str`, optional 

1062 Name for the file that containing database information associated 

1063 with the exported datasets. If this is not an absolute path, does 

1064 not exist in the current working directory, and ``directory`` is 

1065 not `None`, it is assumed to be in ``directory``. Defaults to 

1066 "export.{format}". 

1067 format : `str`, optional 

1068 File format for the database information file. If `None`, the 

1069 extension of ``filename`` will be used. 

1070 transfer : `str`, optional 

1071 Transfer mode passed to `Datastore.export`. 

1072 

1073 Raises 

1074 ------ 

1075 TypeError 

1076 Raised if the set of arguments passed is inconsistent, or if the 

1077 butler is read-only. 

1078 """ 

1079 if not self.isWriteable(): 

1080 raise TypeError("Butler is read-only.") 

1081 if format is None: 

1082 if filename is None: 

1083 raise TypeError("At least one of 'filename' or 'format' must be provided.") 

1084 else: 

1085 _, format = os.path.splitext(filename) 

1086 elif filename is None: 

1087 filename = f"export.{format}" 

1088 if directory is not None and not os.path.exists(filename): 

1089 filename = os.path.join(directory, filename) 

1090 BackendClass = getClassOf(self._config["repo_transfer_formats"][format]["import"]) 

1091 with open(filename, 'r') as stream: 

1092 backend = BackendClass(stream, self.registry) 

1093 backend.register() 

1094 with self.transaction(): 

1095 backend.load(self.datastore, directory=directory, transfer=transfer) 

1096 

1097 def validateConfiguration(self, logFailures: bool = False, 

1098 datasetTypeNames: Optional[Iterable[str]] = None, 

1099 ignore: Iterable[str] = None): 

1100 """Validate butler configuration. 

1101 

1102 Checks that each `DatasetType` can be stored in the `Datastore`. 

1103 

1104 Parameters 

1105 ---------- 

1106 logFailures : `bool`, optional 

1107 If `True`, output a log message for every validation error 

1108 detected. 

1109 datasetTypeNames : iterable of `str`, optional 

1110 The `DatasetType` names that should be checked. This allows 

1111 only a subset to be selected. 

1112 ignore : iterable of `str`, optional 

1113 Names of DatasetTypes to skip over. This can be used to skip 

1114 known problems. If a named `DatasetType` corresponds to a 

1115 composite, all component of that `DatasetType` will also be 

1116 ignored. 

1117 

1118 Raises 

1119 ------ 

1120 ButlerValidationError 

1121 Raised if there is some inconsistency with how this Butler 

1122 is configured. 

1123 """ 

1124 if datasetTypeNames: 

1125 entities = [self.registry.getDatasetType(name) for name in datasetTypeNames] 

1126 else: 

1127 entities = list(self.registry.getAllDatasetTypes()) 

1128 

1129 # filter out anything from the ignore list 

1130 if ignore: 

1131 ignore = set(ignore) 

1132 entities = [e for e in entities if e.name not in ignore and e.nameAndComponent()[0] not in ignore] 

1133 else: 

1134 ignore = set() 

1135 

1136 # Find all the registered instruments 

1137 instruments = set( 

1138 dataId["instrument"] for dataId in self.registry.queryDimensions(["instrument"]) 

1139 ) 

1140 

1141 # For each datasetType that has an instrument dimension, create 

1142 # a DatasetRef for each defined instrument 

1143 datasetRefs = [] 

1144 

1145 for datasetType in entities: 

1146 if "instrument" in datasetType.dimensions: 

1147 for instrument in instruments: 

1148 datasetRef = DatasetRef(datasetType, {"instrument": instrument}, conform=False) 

1149 datasetRefs.append(datasetRef) 

1150 

1151 entities.extend(datasetRefs) 

1152 

1153 datastoreErrorStr = None 

1154 try: 

1155 self.datastore.validateConfiguration(entities, logFailures=logFailures) 

1156 except ValidationError as e: 

1157 datastoreErrorStr = str(e) 

1158 

1159 # Also check that the LookupKeys used by the datastores match 

1160 # registry and storage class definitions 

1161 keys = self.datastore.getLookupKeys() 

1162 

1163 failedNames = set() 

1164 failedDataId = set() 

1165 for key in keys: 

1166 datasetType = None 

1167 if key.name is not None: 

1168 if key.name in ignore: 

1169 continue 

1170 

1171 # skip if specific datasetType names were requested and this 

1172 # name does not match 

1173 if datasetTypeNames and key.name not in datasetTypeNames: 

1174 continue 

1175 

1176 # See if it is a StorageClass or a DatasetType 

1177 if key.name in self.storageClasses: 

1178 pass 

1179 else: 

1180 try: 

1181 self.registry.getDatasetType(key.name) 

1182 except KeyError: 

1183 if logFailures: 

1184 log.fatal("Key '%s' does not correspond to a DatasetType or StorageClass", key) 

1185 failedNames.add(key) 

1186 else: 

1187 # Dimensions are checked for consistency when the Butler 

1188 # is created and rendezvoused with a universe. 

1189 pass 

1190 

1191 # Check that the instrument is a valid instrument 

1192 # Currently only support instrument so check for that 

1193 if key.dataId: 

1194 dataIdKeys = set(key.dataId) 

1195 if set(["instrument"]) != dataIdKeys: 

1196 if logFailures: 

1197 log.fatal("Key '%s' has unsupported DataId override", key) 

1198 failedDataId.add(key) 

1199 elif key.dataId["instrument"] not in instruments: 

1200 if logFailures: 

1201 log.fatal("Key '%s' has unknown instrument", key) 

1202 failedDataId.add(key) 

1203 

1204 messages = [] 

1205 

1206 if datastoreErrorStr: 

1207 messages.append(datastoreErrorStr) 

1208 

1209 for failed, msg in ((failedNames, "Keys without corresponding DatasetType or StorageClass entry: "), 

1210 (failedDataId, "Keys with bad DataId entries: ")): 

1211 if failed: 

1212 msg += ", ".join(str(k) for k in failed) 

1213 messages.append(msg) 

1214 

1215 if messages: 

1216 raise ValidationError(";\n".join(messages)) 

1217 

1218 registry: Registry 

1219 """The object that manages dataset metadata and relationships (`Registry`). 

1220 

1221 Most operations that don't involve reading or writing butler datasets are 

1222 accessible only via `Registry` methods. 

1223 """ 

1224 

1225 datastore: Datastore 

1226 """The object that manages actual dataset storage (`Datastore`). 

1227 

1228 Direct user access to the datastore should rarely be necessary; the primary 

1229 exception is the case where a `Datastore` implementation provides extra 

1230 functionality beyond what the base class defines. 

1231 """ 

1232 

1233 storageClasses: StorageClassFactory 

1234 """An object that maps known storage class names to objects that fully 

1235 describe them (`StorageClassFactory`). 

1236 """ 

1237 

1238 run: Optional[str] 

1239 """Name of the run this butler writes outputs to (`str` or `None`). 

1240 """ 

1241 

1242 collection: Optional[str] 

1243 """Name of the collection this butler searches for datasets (`str` or 

1244 `None`). 

1245 """