Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22""" 

23Butler top level classes. 

24""" 

25from __future__ import annotations 

26 

27__all__ = ("Butler", "ButlerValidationError") 

28 

29import os 

30from collections import defaultdict 

31import contextlib 

32import logging 

33from typing import ( 

34 Any, 

35 ClassVar, 

36 ContextManager, 

37 Dict, 

38 Iterable, 

39 List, 

40 MutableMapping, 

41 Optional, 

42 Tuple, 

43 Union, 

44) 

45 

46try: 

47 import boto3 

48except ImportError: 

49 boto3 = None 

50 

51from lsst.utils import doImport 

52from .core import ( 

53 ButlerURI, 

54 CompositesMap, 

55 Config, 

56 ConfigSubset, 

57 DataCoordinate, 

58 DataId, 

59 DatasetRef, 

60 DatasetType, 

61 Datastore, 

62 FileDataset, 

63 Quantum, 

64 RepoExport, 

65 StorageClassFactory, 

66 ValidationError, 

67) 

68from .core.repoRelocation import BUTLER_ROOT_TAG 

69from .core.safeFileIo import safeMakeDir 

70from .core.utils import transactional, getClassOf 

71from ._deferredDatasetHandle import DeferredDatasetHandle 

72from ._butlerConfig import ButlerConfig 

73from .registry import Registry, RegistryConfig 

74 

75log = logging.getLogger(__name__) 

76 

77 

78class ButlerValidationError(ValidationError): 

79 """There is a problem with the Butler configuration.""" 

80 pass 

81 

82 

83class Butler: 

84 """Main entry point for the data access system. 

85 

86 Attributes 

87 ---------- 

88 config : `str`, `ButlerConfig` or `Config`, optional 

89 (filename to) configuration. If this is not a `ButlerConfig`, defaults 

90 will be read. If a `str`, may be the path to a directory containing 

91 a "butler.yaml" file. 

92 datastore : `Datastore` 

93 Datastore to use for storage. 

94 registry : `Registry` 

95 Registry to use for lookups. 

96 

97 Parameters 

98 ---------- 

99 config : `ButlerConfig`, `Config` or `str`, optional. 

100 Configuration. Anything acceptable to the 

101 `ButlerConfig` constructor. If a directory path 

102 is given the configuration will be read from a ``butler.yaml`` file in 

103 that location. If `None` is given default values will be used. 

104 butler : `Butler`, optional. 

105 If provided, construct a new Butler that uses the same registry and 

106 datastore as the given one, but with the given collection and run. 

107 Incompatible with the ``config``, ``searchPaths``, and ``writeable`` 

108 arguments. 

109 collection : `str`, optional 

110 Collection to use for all input lookups. May be `None` to either use 

111 the value passed to ``run``, or to defer passing a collection until 

112 the methods that require one are called. 

113 run : `str`, optional 

114 Name of the run datasets should be output to; also used as a tagged 

115 collection name these dataset will be associated with. If the run 

116 does not exist, it will be created. If ``collection`` is `None`, this 

117 collection will be used for input lookups as well; if not, it must have 

118 the same value as ``run``. 

119 searchPaths : `list` of `str`, optional 

120 Directory paths to search when calculating the full Butler 

121 configuration. Not used if the supplied config is already a 

122 `ButlerConfig`. 

123 writeable : `bool`, optional 

124 Explicitly sets whether the butler supports write operations. If not 

125 provided, a read-only butler is created unless ``run`` is passed. 

126 

127 Raises 

128 ------ 

129 ValueError 

130 Raised if neither "collection" nor "run" are provided by argument or 

131 config, or if both are provided and are inconsistent. 

132 """ 

133 def __init__(self, config: Union[Config, str, None] = None, *, 

134 butler: Optional[Butler] = None, 

135 collection: Optional[str] = None, 

136 run: Optional[str] = None, 

137 searchPaths: Optional[List[str]] = None, 

138 writeable: Optional[bool] = None): 

139 if butler is not None: 

140 if config is not None or searchPaths is not None or writeable is not None: 

141 raise TypeError("Cannot pass 'config', 'searchPaths', or 'writeable' " 

142 "arguments with 'butler' argument.") 

143 self.registry = butler.registry 

144 self.datastore = butler.datastore 

145 self.storageClasses = butler.storageClasses 

146 self._composites = butler._composites 

147 self._config = butler._config 

148 else: 

149 self._config = ButlerConfig(config, searchPaths=searchPaths) 

150 if "root" in self._config: 

151 butlerRoot = self._config["root"] 

152 else: 

153 butlerRoot = self._config.configDir 

154 if writeable is None: 

155 writeable = run is not None 

156 self.registry = Registry.fromConfig(self._config, butlerRoot=butlerRoot, writeable=writeable) 

157 self.datastore = Datastore.fromConfig(self._config, self.registry, butlerRoot=butlerRoot) 

158 self.storageClasses = StorageClassFactory() 

159 self.storageClasses.addFromConfig(self._config) 

160 self._composites = CompositesMap(self._config, universe=self.registry.dimensions) 

161 if "run" in self._config or "collection" in self._config: 

162 raise ValueError("Passing a run or collection via configuration is no longer supported.") 

163 if run is not None and writeable is False: 

164 raise ValueError(f"Butler initialized with run='{run}', " 

165 f"but is read-only; use collection='{run}' instead.") 

166 self.run = run 

167 if collection is None and run is not None: 

168 collection = run 

169 if self.run is not None and collection != self.run: 

170 raise ValueError( 

171 "Run ({}) and collection ({}) are inconsistent.".format(self.run, collection) 

172 ) 

173 self.collection = collection 

174 if self.run is not None: 

175 self.registry.registerRun(self.run) 

176 

177 GENERATION: ClassVar[int] = 3 

178 """This is a Generation 3 Butler. 

179 

180 This attribute may be removed in the future, once the Generation 2 Butler 

181 interface has been fully retired; it should only be used in transitional 

182 code. 

183 """ 

184 

185 @staticmethod 

186 def makeRepo(root: str, config: Union[Config, str, None] = None, standalone: bool = False, 

187 createRegistry: bool = True, searchPaths: Optional[List[str]] = None, 

188 forceConfigRoot: bool = True, outfile: Optional[str] = None, 

189 overwrite: bool = False) -> Config: 

190 """Create an empty data repository by adding a butler.yaml config 

191 to a repository root directory. 

192 

193 Parameters 

194 ---------- 

195 root : `str` 

196 Filesystem path to the root of the new repository. Will be created 

197 if it does not exist. 

198 config : `Config` or `str`, optional 

199 Configuration to write to the repository, after setting any 

200 root-dependent Registry or Datastore config options. Can not 

201 be a `ButlerConfig` or a `ConfigSubset`. If `None`, default 

202 configuration will be used. Root-dependent config options 

203 specified in this config are overwritten if ``forceConfigRoot`` 

204 is `True`. 

205 standalone : `bool` 

206 If True, write all expanded defaults, not just customized or 

207 repository-specific settings. 

208 This (mostly) decouples the repository from the default 

209 configuration, insulating it from changes to the defaults (which 

210 may be good or bad, depending on the nature of the changes). 

211 Future *additions* to the defaults will still be picked up when 

212 initializing `Butlers` to repos created with ``standalone=True``. 

213 createRegistry : `bool`, optional 

214 If `True` create a new Registry. 

215 searchPaths : `list` of `str`, optional 

216 Directory paths to search when calculating the full butler 

217 configuration. 

218 forceConfigRoot : `bool`, optional 

219 If `False`, any values present in the supplied ``config`` that 

220 would normally be reset are not overridden and will appear 

221 directly in the output config. This allows non-standard overrides 

222 of the root directory for a datastore or registry to be given. 

223 If this parameter is `True` the values for ``root`` will be 

224 forced into the resulting config if appropriate. 

225 outfile : `str`, optional 

226 If not-`None`, the output configuration will be written to this 

227 location rather than into the repository itself. Can be a URI 

228 string. Can refer to a directory that will be used to write 

229 ``butler.yaml``. 

230 overwrite : `bool`, optional 

231 Create a new configuration file even if one already exists 

232 in the specified output location. Default is to raise 

233 an exception. 

234 

235 Returns 

236 ------- 

237 config : `Config` 

238 The updated `Config` instance written to the repo. 

239 

240 Raises 

241 ------ 

242 ValueError 

243 Raised if a ButlerConfig or ConfigSubset is passed instead of a 

244 regular Config (as these subclasses would make it impossible to 

245 support ``standalone=False``). 

246 FileExistsError 

247 Raised if the output config file already exists. 

248 os.error 

249 Raised if the directory does not exist, exists but is not a 

250 directory, or cannot be created. 

251 

252 Notes 

253 ----- 

254 Note that when ``standalone=False`` (the default), the configuration 

255 search path (see `ConfigSubset.defaultSearchPaths`) that was used to 

256 construct the repository should also be used to construct any Butlers 

257 to avoid configuration inconsistencies. 

258 """ 

259 if isinstance(config, (ButlerConfig, ConfigSubset)): 

260 raise ValueError("makeRepo must be passed a regular Config without defaults applied.") 

261 

262 # for "file" schemes we are assuming POSIX semantics for paths, for 

263 # schemeless URIs we are assuming os.path semantics. 

264 uri = ButlerURI(root) 

265 if uri.scheme == "file" or not uri.scheme: 

266 if not os.path.isdir(uri.ospath): 

267 safeMakeDir(uri.ospath) 

268 elif uri.scheme == "s3": 

269 s3 = boto3.resource("s3") 

270 # implies bucket exists, if not another level of checks 

271 bucket = s3.Bucket(uri.netloc) 

272 bucket.put_object(Bucket=uri.netloc, Key=uri.relativeToPathRoot) 

273 else: 

274 raise ValueError(f"Unrecognized scheme: {uri.scheme}") 

275 config = Config(config) 

276 

277 # If we are creating a new repo from scratch with relative roots, 

278 # do not propagate an explicit root from the config file 

279 if "root" in config: 

280 del config["root"] 

281 

282 full = ButlerConfig(config, searchPaths=searchPaths) # this applies defaults 

283 datastoreClass = doImport(full["datastore", "cls"]) 

284 datastoreClass.setConfigRoot(BUTLER_ROOT_TAG, config, full, overwrite=forceConfigRoot) 

285 

286 # if key exists in given config, parse it, otherwise parse the defaults 

287 # in the expanded config 

288 if config.get(("registry", "db")): 

289 registryConfig = RegistryConfig(config) 

290 else: 

291 registryConfig = RegistryConfig(full) 

292 defaultDatabaseUri = registryConfig.makeDefaultDatabaseUri(BUTLER_ROOT_TAG) 

293 if defaultDatabaseUri is not None: 

294 Config.updateParameters(RegistryConfig, config, full, 

295 toUpdate={"db": defaultDatabaseUri}, 

296 overwrite=forceConfigRoot) 

297 else: 

298 Config.updateParameters(RegistryConfig, config, full, toCopy=("db",), 

299 overwrite=forceConfigRoot) 

300 

301 if standalone: 

302 config.merge(full) 

303 if outfile is not None: 

304 # When writing to a separate location we must include 

305 # the root of the butler repo in the config else it won't know 

306 # where to look. 

307 config["root"] = uri.geturl() 

308 configURI = outfile 

309 else: 

310 configURI = uri 

311 config.dumpToUri(configURI, overwrite=overwrite) 

312 

313 # Create Registry and populate tables 

314 Registry.fromConfig(config, create=createRegistry, butlerRoot=root) 

315 return config 

316 

317 @classmethod 

318 def _unpickle(cls, config: ButlerConfig, collection: str, run: Optional[str], writeable: bool) -> Butler: 

319 """Callable used to unpickle a Butler. 

320 

321 We prefer not to use ``Butler.__init__`` directly so we can force some 

322 of its many arguments to be keyword-only (note that ``__reduce__`` 

323 can only invoke callables with positional arguments). 

324 

325 Parameters 

326 ---------- 

327 config : `ButlerConfig` 

328 Butler configuration, already coerced into a true `ButlerConfig` 

329 instance (and hence after any search paths for overrides have been 

330 utilized). 

331 collection : `str` 

332 String name of a collection to use for read operations. 

333 run : `str`, optional 

334 String name of a run to use for write operations, or `None` for a 

335 read-only butler. 

336 

337 Returns 

338 ------- 

339 butler : `Butler` 

340 A new `Butler` instance. 

341 """ 

342 return cls(config=config, collection=collection, run=run, writeable=writeable) 

343 

344 def __reduce__(self): 

345 """Support pickling. 

346 """ 

347 return (Butler._unpickle, (self._config, self.collection, self.run, self.registry.isWriteable())) 

348 

349 def __str__(self): 

350 return "Butler(collection='{}', datastore='{}', registry='{}')".format( 

351 self.collection, self.datastore, self.registry) 

352 

353 def isWriteable(self) -> bool: 

354 """Return `True` if this `Butler` supports write operations. 

355 """ 

356 return self.registry.isWriteable() 

357 

358 @contextlib.contextmanager 

359 def transaction(self): 

360 """Context manager supporting `Butler` transactions. 

361 

362 Transactions can be nested. 

363 """ 

364 with self.registry.transaction(): 

365 with self.datastore.transaction(): 

366 yield 

367 

368 def _standardizeArgs(self, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

369 dataId: Optional[DataId] = None, **kwds: Any) -> Tuple[DatasetType, DataId]: 

370 """Standardize the arguments passed to several Butler APIs. 

371 

372 Parameters 

373 ---------- 

374 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

375 When `DatasetRef` the `dataId` should be `None`. 

376 Otherwise the `DatasetType` or name thereof. 

377 dataId : `dict` or `DataCoordinate` 

378 A `dict` of `Dimension` link name, value pairs that label the 

379 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

380 should be provided as the second argument. 

381 kwds 

382 Additional keyword arguments used to augment or construct a 

383 `DataCoordinate`. See `DataCoordinate.standardize` 

384 parameters. 

385 

386 Returns 

387 ------- 

388 datasetType : `DatasetType` 

389 A `DatasetType` instance extracted from ``datasetRefOrType``. 

390 dataId : `dict` or `DataId`, optional 

391 Argument that can be used (along with ``kwds``) to construct a 

392 `DataId`. 

393 

394 Notes 

395 ----- 

396 Butler APIs that conceptually need a DatasetRef also allow passing a 

397 `DatasetType` (or the name of one) and a `DataId` (or a dict and 

398 keyword arguments that can be used to construct one) separately. This 

399 method accepts those arguments and always returns a true `DatasetType` 

400 and a `DataId` or `dict`. 

401 

402 Standardization of `dict` vs `DataId` is best handled by passing the 

403 returned ``dataId`` (and ``kwds``) to `Registry` APIs, which are 

404 generally similarly flexible. 

405 """ 

406 externalDatasetType = None 

407 internalDatasetType = None 

408 if isinstance(datasetRefOrType, DatasetRef): 

409 if dataId is not None or kwds: 

410 raise ValueError("DatasetRef given, cannot use dataId as well") 

411 externalDatasetType = datasetRefOrType.datasetType 

412 dataId = datasetRefOrType.dataId 

413 else: 

414 # Don't check whether DataId is provided, because Registry APIs 

415 # can usually construct a better error message when it wasn't. 

416 if isinstance(datasetRefOrType, DatasetType): 

417 externalDatasetType = datasetRefOrType 

418 else: 

419 internalDatasetType = self.registry.getDatasetType(datasetRefOrType) 

420 

421 # Check that they are self-consistent 

422 if externalDatasetType is not None: 

423 internalDatasetType = self.registry.getDatasetType(externalDatasetType.name) 

424 if externalDatasetType != internalDatasetType: 

425 raise ValueError(f"Supplied dataset type ({externalDatasetType}) inconsistent with " 

426 f"registry definition ({internalDatasetType})") 

427 

428 return internalDatasetType, dataId 

429 

430 def _findDatasetRef(self, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

431 dataId: Optional[DataId] = None, *, 

432 collection: Optional[str] = None, 

433 allowUnresolved: bool = False, 

434 **kwds: Any) -> DatasetRef: 

435 """Shared logic for methods that start with a search for a dataset in 

436 the registry. 

437 

438 Parameters 

439 ---------- 

440 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

441 When `DatasetRef` the `dataId` should be `None`. 

442 Otherwise the `DatasetType` or name thereof. 

443 dataId : `dict` or `DataCoordinate`, optional 

444 A `dict` of `Dimension` link name, value pairs that label the 

445 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

446 should be provided as the first argument. 

447 collection : `str`, optional 

448 Name of the collection to search, overriding ``self.collection``. 

449 allowUnresolved : `bool`, optional 

450 If `True`, return an unresolved `DatasetRef` if finding a resolved 

451 one in the `Registry` fails. Defaults to `False`. 

452 kwds 

453 Additional keyword arguments used to augment or construct a 

454 `DataId`. See `DataId` parameters. 

455 

456 Returns 

457 ------- 

458 ref : `DatasetRef` 

459 A reference to the dataset identified by the given arguments. 

460 

461 Raises 

462 ------ 

463 LookupError 

464 Raised if no matching dataset exists in the `Registry` (and 

465 ``allowUnresolved is False``). 

466 ValueError 

467 Raised if a resolved `DatasetRef` was passed as an input, but it 

468 differs from the one found in the registry in this collection. 

469 TypeError 

470 Raised if ``collection`` and ``self.collection`` are both `None`. 

471 """ 

472 datasetType, dataId = self._standardizeArgs(datasetRefOrType, dataId, **kwds) 

473 if isinstance(datasetRefOrType, DatasetRef): 

474 idNumber = datasetRefOrType.id 

475 else: 

476 idNumber = None 

477 # Expand the data ID first instead of letting registry.find do it, so 

478 # we get the result even if it returns None. 

479 dataId = self.registry.expandDataId(dataId, graph=datasetType.dimensions, **kwds) 

480 if collection is None: 

481 collection = self.collection 

482 if collection is None: 

483 raise TypeError("No collection provided.") 

484 # Always lookup the DatasetRef, even if one is given, to ensure it is 

485 # present in the current collection. 

486 ref = self.registry.find(collection, datasetType, dataId) 

487 if ref is None: 

488 if allowUnresolved: 

489 return DatasetRef(datasetType, dataId) 

490 else: 

491 raise LookupError(f"Dataset {datasetType.name} with data ID {dataId} " 

492 f"could not be found in collection '{collection}'.") 

493 if idNumber is not None and idNumber != ref.id: 

494 raise ValueError(f"DatasetRef.id provided ({idNumber}) does not match " 

495 f"id ({ref.id}) in registry in collection '{collection}'.") 

496 return ref 

497 

498 @transactional 

499 def put(self, obj: Any, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

500 dataId: Optional[DataId] = None, *, 

501 producer: Optional[Quantum] = None, 

502 run: Optional[str] = None, 

503 **kwds: Any) -> DatasetRef: 

504 """Store and register a dataset. 

505 

506 Parameters 

507 ---------- 

508 obj : `object` 

509 The dataset. 

510 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

511 When `DatasetRef` is provided, ``dataId`` should be `None`. 

512 Otherwise the `DatasetType` or name thereof. 

513 dataId : `dict` or `DataCoordinate` 

514 A `dict` of `Dimension` link name, value pairs that label the 

515 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

516 should be provided as the second argument. 

517 producer : `Quantum`, optional 

518 The producer. 

519 run : `str`, optional 

520 The name of the run the dataset should be added to, overriding 

521 ``self.run``. 

522 kwds 

523 Additional keyword arguments used to augment or construct a 

524 `DataCoordinate`. See `DataCoordinate.standardize` 

525 parameters. 

526 

527 Returns 

528 ------- 

529 ref : `DatasetRef` 

530 A reference to the stored dataset, updated with the correct id if 

531 given. 

532 

533 Raises 

534 ------ 

535 TypeError 

536 Raised if the butler is read-only or if no run has been provided. 

537 """ 

538 log.debug("Butler put: %s, dataId=%s, producer=%s, run=%s", datasetRefOrType, dataId, producer, run) 

539 if not self.isWriteable(): 

540 raise TypeError("Butler is read-only.") 

541 datasetType, dataId = self._standardizeArgs(datasetRefOrType, dataId, **kwds) 

542 if isinstance(datasetRefOrType, DatasetRef) and datasetRefOrType.id is not None: 

543 raise ValueError("DatasetRef must not be in registry, must have None id") 

544 

545 if run is None: 

546 if self.run is None: 

547 raise TypeError("No run provided.") 

548 run = self.run 

549 

550 isVirtualComposite = self._composites.shouldBeDisassembled(datasetType) 

551 

552 # Add Registry Dataset entry. If not a virtual composite, add 

553 # and attach components at the same time. 

554 dataId = self.registry.expandDataId(dataId, graph=datasetType.dimensions, **kwds) 

555 ref, = self.registry.insertDatasets(datasetType, run=run, dataIds=[dataId], 

556 producer=producer, recursive=not isVirtualComposite) 

557 

558 # Check to see if this datasetType requires disassembly 

559 if isVirtualComposite: 

560 components = datasetType.storageClass.assembler().disassemble(obj) 

561 for component, info in components.items(): 

562 compTypeName = datasetType.componentTypeName(component) 

563 compRef = self.put(info.component, compTypeName, dataId, producer=producer, run=run) 

564 self.registry.attachComponent(component, ref, compRef) 

565 else: 

566 # This is an entity without a disassembler. 

567 self.datastore.put(obj, ref) 

568 

569 return ref 

570 

571 def getDirect(self, ref: DatasetRef, *, parameters: Optional[Dict[str, Any]] = None): 

572 """Retrieve a stored dataset. 

573 

574 Unlike `Butler.get`, this method allows datasets outside the Butler's 

575 collection to be read as long as the `DatasetRef` that identifies them 

576 can be obtained separately. 

577 

578 Parameters 

579 ---------- 

580 ref : `DatasetRef` 

581 Reference to an already stored dataset. 

582 parameters : `dict` 

583 Additional StorageClass-defined options to control reading, 

584 typically used to efficiently read only a subset of the dataset. 

585 

586 Returns 

587 ------- 

588 obj : `object` 

589 The dataset. 

590 """ 

591 # if the ref exists in the store we return it directly 

592 if self.datastore.exists(ref): 

593 return self.datastore.get(ref, parameters=parameters) 

594 elif ref.isComposite(): 

595 # Check that we haven't got any unknown parameters 

596 ref.datasetType.storageClass.validateParameters(parameters) 

597 # Reconstruct the composite 

598 usedParams = set() 

599 components = {} 

600 for compName, compRef in ref.components.items(): 

601 # make a dictionary of parameters containing only the subset 

602 # supported by the StorageClass of the components 

603 compParams = compRef.datasetType.storageClass.filterParameters(parameters) 

604 usedParams.update(set(compParams)) 

605 components[compName] = self.datastore.get(compRef, parameters=compParams) 

606 

607 # Any unused parameters will have to be passed to the assembler 

608 if parameters: 

609 unusedParams = {k: v for k, v in parameters.items() if k not in usedParams} 

610 else: 

611 unusedParams = {} 

612 

613 # Assemble the components 

614 inMemoryDataset = ref.datasetType.storageClass.assembler().assemble(components) 

615 return ref.datasetType.storageClass.assembler().handleParameters(inMemoryDataset, 

616 parameters=unusedParams) 

617 else: 

618 # single entity in datastore 

619 raise FileNotFoundError(f"Unable to locate dataset '{ref}' in datastore {self.datastore.name}") 

620 

621 def getDeferred(self, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

622 dataId: Optional[DataId] = None, *, 

623 parameters: Union[dict, None] = None, 

624 collection: Optional[str] = None, 

625 **kwds: Any) -> DeferredDatasetHandle: 

626 """Create a `DeferredDatasetHandle` which can later retrieve a dataset 

627 

628 Parameters 

629 ---------- 

630 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

631 When `DatasetRef` the `dataId` should be `None`. 

632 Otherwise the `DatasetType` or name thereof. 

633 dataId : `dict` or `DataCoordinate`, optional 

634 A `dict` of `Dimension` link name, value pairs that label the 

635 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

636 should be provided as the first argument. 

637 collection : `str`, optional 

638 Name of the collection to search, overriding ``self.collection``. 

639 parameters : `dict` 

640 Additional StorageClass-defined options to control reading, 

641 typically used to efficiently read only a subset of the dataset. 

642 collection : `str`, optional 

643 Collection to search, overriding ``self.collection``. 

644 kwds 

645 Additional keyword arguments used to augment or construct a 

646 `DataId`. See `DataId` parameters. 

647 

648 Returns 

649 ------- 

650 obj : `DeferredDatasetHandle` 

651 A handle which can be used to retrieve a dataset at a later time. 

652 

653 Raises 

654 ------ 

655 LookupError 

656 Raised if no matching dataset exists in the `Registry` (and 

657 ``allowUnresolved is False``). 

658 ValueError 

659 Raised if a resolved `DatasetRef` was passed as an input, but it 

660 differs from the one found in the registry in this collection. 

661 TypeError 

662 Raised if ``collection`` and ``self.collection`` are both `None`. 

663 """ 

664 ref = self._findDatasetRef(datasetRefOrType, dataId, collection=collection, **kwds) 

665 return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters) 

666 

667 def get(self, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

668 dataId: Optional[DataId] = None, *, 

669 parameters: Optional[Dict[str, Any]] = None, 

670 collection: Optional[str] = None, 

671 **kwds: Any) -> Any: 

672 """Retrieve a stored dataset. 

673 

674 Parameters 

675 ---------- 

676 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

677 When `DatasetRef` the `dataId` should be `None`. 

678 Otherwise the `DatasetType` or name thereof. 

679 dataId : `dict` or `DataCoordinate` 

680 A `dict` of `Dimension` link name, value pairs that label the 

681 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

682 should be provided as the first argument. 

683 parameters : `dict` 

684 Additional StorageClass-defined options to control reading, 

685 typically used to efficiently read only a subset of the dataset. 

686 collection : `str`, optional 

687 Collection to search, overriding ``self.collection``. 

688 kwds 

689 Additional keyword arguments used to augment or construct a 

690 `DataCoordinate`. See `DataCoordinate.standardize` 

691 parameters. 

692 

693 Returns 

694 ------- 

695 obj : `object` 

696 The dataset. 

697 

698 Raises 

699 ------ 

700 ValueError 

701 Raised if a resolved `DatasetRef` was passed as an input, but it 

702 differs from the one found in the registry in this collection. 

703 LookupError 

704 Raised if no matching dataset exists in the `Registry`. 

705 TypeError 

706 Raised if ``collection`` and ``self.collection`` are both `None`. 

707 """ 

708 log.debug("Butler get: %s, dataId=%s, parameters=%s", datasetRefOrType, dataId, parameters) 

709 ref = self._findDatasetRef(datasetRefOrType, dataId, collection=collection, **kwds) 

710 return self.getDirect(ref, parameters=parameters) 

711 

712 def getUri(self, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

713 dataId: Optional[DataId] = None, *, 

714 predict: bool = False, 

715 collection: Optional[str] = None, 

716 run: Optional[str] = None, 

717 **kwds: Any) -> str: 

718 """Return the URI to the Dataset. 

719 

720 Parameters 

721 ---------- 

722 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

723 When `DatasetRef` the `dataId` should be `None`. 

724 Otherwise the `DatasetType` or name thereof. 

725 dataId : `dict` or `DataCoordinate` 

726 A `dict` of `Dimension` link name, value pairs that label the 

727 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

728 should be provided as the first argument. 

729 predict : `bool` 

730 If `True`, allow URIs to be returned of datasets that have not 

731 been written. 

732 collection : `str`, optional 

733 Collection to search, overriding ``self.collection``. 

734 run : `str`, optional 

735 Run to use for predictions, overriding ``self.run``. 

736 kwds 

737 Additional keyword arguments used to augment or construct a 

738 `DataCoordinate`. See `DataCoordinate.standardize` 

739 parameters. 

740 

741 Returns 

742 ------- 

743 uri : `str` 

744 URI string pointing to the Dataset within the datastore. If the 

745 Dataset does not exist in the datastore, and if ``predict`` is 

746 `True`, the URI will be a prediction and will include a URI 

747 fragment "#predicted". 

748 If the datastore does not have entities that relate well 

749 to the concept of a URI the returned URI string will be 

750 descriptive. The returned URI is not guaranteed to be obtainable. 

751 

752 Raises 

753 ------ 

754 LookupError 

755 A URI has been requested for a dataset that does not exist and 

756 guessing is not allowed. 

757 ValueError 

758 Raised if a resolved `DatasetRef` was passed as an input, but it 

759 differs from the one found in the registry in this collection. 

760 TypeError 

761 Raised if ``collection`` and ``self.collection`` are both `None`. 

762 """ 

763 ref = self._findDatasetRef(datasetRefOrType, dataId, allowUnresolved=predict, collection=collection, 

764 **kwds) 

765 if ref.id is None: # only possible if predict is True 

766 if run is None: 

767 run = self.run 

768 if run is None: 

769 raise TypeError("Cannot predict location with run=None.") 

770 # Lie about ID, because we can't guess it, and only 

771 # Datastore.getUri() will ever see it (and it doesn't use it). 

772 ref = ref.resolved(id=0, run=self.run) 

773 return self.datastore.getUri(ref, predict) 

774 

775 def datasetExists(self, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

776 dataId: Optional[DataId] = None, *, 

777 collection: Optional[str] = None, 

778 **kwds: Any) -> bool: 

779 """Return True if the Dataset is actually present in the Datastore. 

780 

781 Parameters 

782 ---------- 

783 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

784 When `DatasetRef` the `dataId` should be `None`. 

785 Otherwise the `DatasetType` or name thereof. 

786 dataId : `dict` or `DataCoordinate` 

787 A `dict` of `Dimension` link name, value pairs that label the 

788 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

789 should be provided as the first argument. 

790 collection : `str`, optional 

791 Collection to search, overriding ``self.collection``. 

792 kwds 

793 Additional keyword arguments used to augment or construct a 

794 `DataCoordinate`. See `DataCoordinate.standardize` 

795 parameters. 

796 

797 Raises 

798 ------ 

799 LookupError 

800 Raised if the dataset is not even present in the Registry. 

801 ValueError 

802 Raised if a resolved `DatasetRef` was passed as an input, but it 

803 differs from the one found in the registry in this collection. 

804 TypeError 

805 Raised if ``collection`` and ``self.collection`` are both `None`. 

806 """ 

807 ref = self._findDatasetRef(datasetRefOrType, dataId, collection=collection, **kwds) 

808 return self.datastore.exists(ref) 

809 

810 def remove(self, datasetRefOrType: Union[DatasetRef, DatasetType, str], 

811 dataId: Optional[DataId] = None, *, 

812 delete: bool = True, remember: bool = True, collection: Optional[str] = None, **kwds: Any): 

813 """Remove a dataset from the collection and possibly the repository. 

814 

815 The identified dataset is always at least removed from the Butler's 

816 collection. By default it is also deleted from the Datastore (e.g. 

817 files are actually deleted), but the dataset is "remembered" by 

818 retaining its row in the dataset and provenance tables in the registry. 

819 

820 If the dataset is a composite, all components will also be removed. 

821 

822 Parameters 

823 ---------- 

824 datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` 

825 When `DatasetRef` the `dataId` should be `None`. 

826 Otherwise the `DatasetType` or name thereof. 

827 dataId : `dict` or `DataId` 

828 A `dict` of `Dimension` link name, value pairs that label the 

829 `DatasetRef` within a Collection. When `None`, a `DatasetRef` 

830 should be provided as the first argument. 

831 delete : `bool` 

832 If `True` (default) actually delete the dataset from the 

833 Datastore (i.e. actually remove files). 

834 remember : `bool` 

835 If `True` (default), retain dataset and provenance records in 

836 the `Registry` for this dataset. 

837 collection : `str`, optional 

838 Collection to search, overriding ``self.collection``. 

839 kwds 

840 Additional keyword arguments used to augment or construct a 

841 `DataId`. See `DataId` parameters. 

842 

843 Raises 

844 ------ 

845 TypeError 

846 Raised if the butler is read-only, if no collection was provided, 

847 or if ``delete`` and ``remember`` are both `False`; a dataset 

848 cannot remain in a `Datastore` if its `Registry` entries is 

849 removed. 

850 OrphanedRecordError 

851 Raised if ``remember`` is `False` but the dataset is still present 

852 in a `Datastore` not recognized by this `Butler` client. 

853 ValueError 

854 Raised if a resolved `DatasetRef` was passed as an input, but it 

855 differs from the one found in the registry in this collection. 

856 """ 

857 if not self.isWriteable(): 

858 raise TypeError("Butler is read-only.") 

859 ref = self._findDatasetRef(datasetRefOrType, dataId, collection=collection, **kwds) 

860 if delete: 

861 # There is a difference between a concrete composite and virtual 

862 # composite. In a virtual composite the datastore is never 

863 # given the top level DatasetRef. In the concrete composite 

864 # the datastore knows all the refs and will clean up itself 

865 # if asked to remove the parent ref. 

866 # We can not check configuration for this since we can not trust 

867 # that the configuration is the same. We therefore have to ask 

868 # if the ref exists or not 

869 if self.datastore.exists(ref): 

870 self.datastore.remove(ref) 

871 elif ref.isComposite(): 

872 datastoreNames = set(self.datastore.names) 

873 for r in ref.components.values(): 

874 # If a dataset was removed previously but remembered 

875 # in registry, skip the removal in the datastore. 

876 datastoreLocations = self.registry.getDatasetLocations(r) 

877 if datastoreLocations & datastoreNames: 

878 self.datastore.remove(r) 

879 else: 

880 raise FileNotFoundError(f"Dataset {ref} not known to datastore") 

881 elif not remember: 

882 raise ValueError("Cannot retain dataset in Datastore without keeping Registry dataset record.") 

883 if remember: 

884 self.registry.disassociate(self.collection, [ref]) 

885 else: 

886 # This also implicitly disassociates. 

887 self.registry.removeDataset(ref) 

888 

889 @transactional 

890 def ingest(self, *datasets: FileDataset, transfer: Optional[str] = None, run: Optional[str] = None): 

891 """Store and register one or more datasets that already exist on disk. 

892 

893 Parameters 

894 ---------- 

895 datasets : `FileDataset` 

896 Each positional argument is a struct containing information about 

897 a file to be ingested, including its path (either absolute or 

898 relative to the datastore root, if applicable), a `DatasetRef`, 

899 and optionally a formatter class or its fully-qualified string 

900 name. If a formatter is not provided, the formatter that would be 

901 used for `put` is assumed. On successful return, all 

902 `FileDataset.ref` attributes will have their `DatasetRef.id` 

903 attribute populated and all `FileDataset.formatter` attributes will 

904 be set to the formatter class used. `FileDataset.path` attributes 

905 may be modified to put paths in whatever the datastore considers a 

906 standardized form. 

907 transfer : `str`, optional 

908 If not `None`, must be one of 'move', 'copy', 'hardlink', or 

909 'symlink', indicating how to transfer the file. 

910 run : `str`, optional 

911 The name of the run ingested datasets should be added to, 

912 overriding ``self.run``. 

913 

914 Raises 

915 ------ 

916 TypeError 

917 Raised if the butler is read-only or if no run was provided. 

918 NotImplementedError 

919 Raised if the `Datastore` does not support the given transfer mode. 

920 DatasetTypeNotSupportedError 

921 Raised if one or more files to be ingested have a dataset type that 

922 is not supported by the `Datastore`.. 

923 FileNotFoundError 

924 Raised if one of the given files does not exist. 

925 FileExistsError 

926 Raised if transfer is not `None` but the (internal) location the 

927 file would be moved to is already occupied. 

928 

929 Notes 

930 ----- 

931 This operation is not fully exception safe: if a database operation 

932 fails, the given `FileDataset` instances may be only partially updated. 

933 

934 It is atomic in terms of database operations (they will either all 

935 succeed or all fail) providing the database engine implements 

936 transactions correctly. It will attempt to be atomic in terms of 

937 filesystem operations as well, but this cannot be implemented 

938 rigorously for most datastores. 

939 """ 

940 if not self.isWriteable(): 

941 raise TypeError("Butler is read-only.") 

942 if run is None: 

943 if self.run is None: 

944 raise TypeError("No run provided.") 

945 run = self.run 

946 

947 # Reorganize the inputs so they're grouped by DatasetType and then 

948 # data ID. We also include a list of DatasetRefs for each FileDataset 

949 # to hold the resolved DatasetRefs returned by the Registry, before 

950 # it's safe to swap them into FileDataset.refs. 

951 # Some type annotation aliases to make that clearer: 

952 GroupForType = Dict[DataCoordinate, Tuple[FileDataset, List[DatasetRef]]] 

953 GroupedData = MutableMapping[DatasetType, GroupForType] 

954 # The actual data structure: 

955 groupedData: GroupedData = defaultdict(dict) 

956 # And the nested loop that populates it: 

957 for dataset in datasets: 

958 # This list intentionally shared across the inner loop, since it's 

959 # associated with `dataset`. 

960 resolvedRefs = [] 

961 for ref in dataset.refs: 

962 groupedData[ref.datasetType][ref.dataId] = (dataset, resolvedRefs) 

963 

964 # Now we can bulk-insert into Registry for each DatasetType. 

965 for datasetType, groupForType in groupedData.items(): 

966 refs = self.registry.insertDatasets(datasetType, 

967 dataIds=groupForType.keys(), 

968 run=run, 

969 recursive=True) 

970 # Append those resolved DatasetRefs to the new lists we set up for 

971 # them. 

972 for ref, (_, resolvedRefs) in zip(refs, groupForType.values()): 

973 resolvedRefs.append(ref) 

974 

975 # Go back to the original FileDatasets to replace their refs with the 

976 # new resolved ones. 

977 for groupForType in groupedData.values(): 

978 for dataset, resolvedRefs in groupForType.values(): 

979 dataset.refs = resolvedRefs 

980 

981 # Bulk-insert everything into Datastore. 

982 self.datastore.ingest(*datasets, transfer=transfer) 

983 

984 @contextlib.contextmanager 

985 def export(self, *, directory: Optional[str] = None, 

986 filename: Optional[str] = None, 

987 format: Optional[str] = None, 

988 transfer: Optional[str] = None) -> ContextManager[RepoExport]: 

989 """Export datasets from the repository represented by this `Butler`. 

990 

991 This method is a context manager that returns a helper object 

992 (`RepoExport`) that is used to indicate what information from the 

993 repository should be exported. 

994 

995 Parameters 

996 ---------- 

997 directory : `str`, optional 

998 Directory dataset files should be written to if ``transfer`` is not 

999 `None`. 

1000 filename : `str`, optional 

1001 Name for the file that will include database information associated 

1002 with the exported datasets. If this is not an absolute path and 

1003 ``directory`` is not `None`, it will be written to ``directory`` 

1004 instead of the current working directory. Defaults to 

1005 "export.{format}". 

1006 format : `str`, optional 

1007 File format for the database information file. If `None`, the 

1008 extension of ``filename`` will be used. 

1009 transfer : `str`, optional 

1010 Transfer mode passed to `Datastore.export`. 

1011 

1012 Raises 

1013 ------ 

1014 TypeError 

1015 Raised if the set of arguments passed is inconsistent. 

1016 

1017 Examples 

1018 -------- 

1019 Typically the `Registry.queryDimensions` and `Registry.queryDatasets` 

1020 methods are used to provide the iterables over data IDs and/or datasets 

1021 to be exported:: 

1022 

1023 with butler.export("exports.yaml") as export: 

1024 # Export all flats, and the calibration_label dimensions 

1025 # associated with them. 

1026 export.saveDatasets(butler.registry.queryDatasets("flat"), 

1027 elements=[butler.registry.dimensions["calibration_label"]]) 

1028 # Export all datasets that start with "deepCoadd_" and all of 

1029 # their associated data ID information. 

1030 export.saveDatasets(butler.registry.queryDatasets("deepCoadd_*")) 

1031 """ 

1032 if directory is None and transfer is not None: 

1033 raise TypeError("Cannot transfer without providing a directory.") 

1034 if transfer == "move": 

1035 raise TypeError("Transfer may not be 'move': export is read-only") 

1036 if format is None: 

1037 if filename is None: 

1038 raise TypeError("At least one of 'filename' or 'format' must be provided.") 

1039 else: 

1040 _, format = os.path.splitext(filename) 

1041 elif filename is None: 

1042 filename = f"export.{format}" 

1043 if directory is not None: 

1044 filename = os.path.join(directory, filename) 

1045 BackendClass = getClassOf(self._config["repo_transfer_formats"][format]["export"]) 

1046 with open(filename, 'w') as stream: 

1047 backend = BackendClass(stream) 

1048 try: 

1049 helper = RepoExport(self.registry, self.datastore, backend=backend, 

1050 directory=directory, transfer=transfer) 

1051 yield helper 

1052 except BaseException: 

1053 raise 

1054 else: 

1055 helper._finish() 

1056 

1057 def import_(self, *, directory: Optional[str] = None, 

1058 filename: Optional[str] = None, 

1059 format: Optional[str] = None, 

1060 transfer: Optional[str] = None): 

1061 """Import datasets exported from a different butler repository. 

1062 

1063 Parameters 

1064 ---------- 

1065 directory : `str`, optional 

1066 Directory containing dataset files. If `None`, all file paths 

1067 must be absolute. 

1068 filename : `str`, optional 

1069 Name for the file that containing database information associated 

1070 with the exported datasets. If this is not an absolute path, does 

1071 not exist in the current working directory, and ``directory`` is 

1072 not `None`, it is assumed to be in ``directory``. Defaults to 

1073 "export.{format}". 

1074 format : `str`, optional 

1075 File format for the database information file. If `None`, the 

1076 extension of ``filename`` will be used. 

1077 transfer : `str`, optional 

1078 Transfer mode passed to `Datastore.export`. 

1079 

1080 Raises 

1081 ------ 

1082 TypeError 

1083 Raised if the set of arguments passed is inconsistent, or if the 

1084 butler is read-only. 

1085 """ 

1086 if not self.isWriteable(): 

1087 raise TypeError("Butler is read-only.") 

1088 if format is None: 

1089 if filename is None: 

1090 raise TypeError("At least one of 'filename' or 'format' must be provided.") 

1091 else: 

1092 _, format = os.path.splitext(filename) 

1093 elif filename is None: 

1094 filename = f"export.{format}" 

1095 if directory is not None and not os.path.exists(filename): 

1096 filename = os.path.join(directory, filename) 

1097 BackendClass = getClassOf(self._config["repo_transfer_formats"][format]["import"]) 

1098 with open(filename, 'r') as stream: 

1099 backend = BackendClass(stream, self.registry) 

1100 backend.register() 

1101 with self.transaction(): 

1102 backend.load(self.datastore, directory=directory, transfer=transfer) 

1103 

1104 def validateConfiguration(self, logFailures: bool = False, 

1105 datasetTypeNames: Optional[Iterable[str]] = None, 

1106 ignore: Iterable[str] = None): 

1107 """Validate butler configuration. 

1108 

1109 Checks that each `DatasetType` can be stored in the `Datastore`. 

1110 

1111 Parameters 

1112 ---------- 

1113 logFailures : `bool`, optional 

1114 If `True`, output a log message for every validation error 

1115 detected. 

1116 datasetTypeNames : iterable of `str`, optional 

1117 The `DatasetType` names that should be checked. This allows 

1118 only a subset to be selected. 

1119 ignore : iterable of `str`, optional 

1120 Names of DatasetTypes to skip over. This can be used to skip 

1121 known problems. If a named `DatasetType` corresponds to a 

1122 composite, all component of that `DatasetType` will also be 

1123 ignored. 

1124 

1125 Raises 

1126 ------ 

1127 ButlerValidationError 

1128 Raised if there is some inconsistency with how this Butler 

1129 is configured. 

1130 """ 

1131 if datasetTypeNames: 

1132 entities = [self.registry.getDatasetType(name) for name in datasetTypeNames] 

1133 else: 

1134 entities = list(self.registry.getAllDatasetTypes()) 

1135 

1136 # filter out anything from the ignore list 

1137 if ignore: 

1138 ignore = set(ignore) 

1139 entities = [e for e in entities if e.name not in ignore and e.nameAndComponent()[0] not in ignore] 

1140 else: 

1141 ignore = set() 

1142 

1143 # Find all the registered instruments 

1144 instruments = set( 

1145 dataId["instrument"] for dataId in self.registry.queryDimensions(["instrument"]) 

1146 ) 

1147 

1148 # For each datasetType that has an instrument dimension, create 

1149 # a DatasetRef for each defined instrument 

1150 datasetRefs = [] 

1151 

1152 for datasetType in entities: 

1153 if "instrument" in datasetType.dimensions: 

1154 for instrument in instruments: 

1155 datasetRef = DatasetRef(datasetType, {"instrument": instrument}, conform=False) 

1156 datasetRefs.append(datasetRef) 

1157 

1158 entities.extend(datasetRefs) 

1159 

1160 datastoreErrorStr = None 

1161 try: 

1162 self.datastore.validateConfiguration(entities, logFailures=logFailures) 

1163 except ValidationError as e: 

1164 datastoreErrorStr = str(e) 

1165 

1166 # Also check that the LookupKeys used by the datastores match 

1167 # registry and storage class definitions 

1168 keys = self.datastore.getLookupKeys() 

1169 

1170 failedNames = set() 

1171 failedDataId = set() 

1172 for key in keys: 

1173 datasetType = None 

1174 if key.name is not None: 

1175 if key.name in ignore: 

1176 continue 

1177 

1178 # skip if specific datasetType names were requested and this 

1179 # name does not match 

1180 if datasetTypeNames and key.name not in datasetTypeNames: 

1181 continue 

1182 

1183 # See if it is a StorageClass or a DatasetType 

1184 if key.name in self.storageClasses: 

1185 pass 

1186 else: 

1187 try: 

1188 self.registry.getDatasetType(key.name) 

1189 except KeyError: 

1190 if logFailures: 

1191 log.fatal("Key '%s' does not correspond to a DatasetType or StorageClass", key) 

1192 failedNames.add(key) 

1193 else: 

1194 # Dimensions are checked for consistency when the Butler 

1195 # is created and rendezvoused with a universe. 

1196 pass 

1197 

1198 # Check that the instrument is a valid instrument 

1199 # Currently only support instrument so check for that 

1200 if key.dataId: 

1201 dataIdKeys = set(key.dataId) 

1202 if set(["instrument"]) != dataIdKeys: 

1203 if logFailures: 

1204 log.fatal("Key '%s' has unsupported DataId override", key) 

1205 failedDataId.add(key) 

1206 elif key.dataId["instrument"] not in instruments: 

1207 if logFailures: 

1208 log.fatal("Key '%s' has unknown instrument", key) 

1209 failedDataId.add(key) 

1210 

1211 messages = [] 

1212 

1213 if datastoreErrorStr: 

1214 messages.append(datastoreErrorStr) 

1215 

1216 for failed, msg in ((failedNames, "Keys without corresponding DatasetType or StorageClass entry: "), 

1217 (failedDataId, "Keys with bad DataId entries: ")): 

1218 if failed: 

1219 msg += ", ".join(str(k) for k in failed) 

1220 messages.append(msg) 

1221 

1222 if messages: 

1223 raise ValidationError(";\n".join(messages)) 

1224 

1225 registry: Registry 

1226 """The object that manages dataset metadata and relationships (`Registry`). 

1227 

1228 Most operations that don't involve reading or writing butler datasets are 

1229 accessible only via `Registry` methods. 

1230 """ 

1231 

1232 datastore: Datastore 

1233 """The object that manages actual dataset storage (`Datastore`). 

1234 

1235 Direct user access to the datastore should rarely be necessary; the primary 

1236 exception is the case where a `Datastore` implementation provides extra 

1237 functionality beyond what the base class defines. 

1238 """ 

1239 

1240 storageClasses: StorageClassFactory 

1241 """An object that maps known storage class names to objects that fully 

1242 describe them (`StorageClassFactory`). 

1243 """ 

1244 

1245 run: Optional[str] 

1246 """Name of the run this butler writes outputs to (`str` or `None`). 

1247 """ 

1248 

1249 collection: Optional[str] 

1250 """Name of the collection this butler searches for datasets (`str` or 

1251 `None`). 

1252 """