Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24"""Chained datastore.""" 

25 

26__all__ = ("ChainedDatastore",) 

27 

28import time 

29import logging 

30import warnings 

31import itertools 

32from typing import ( 

33 TYPE_CHECKING, 

34 Any, 

35 Dict, 

36 List, 

37 Iterable, 

38 Mapping, 

39 Optional, 

40 Sequence, 

41 Set, 

42 Tuple, 

43 Union, 

44) 

45 

46from lsst.utils import doImportType 

47from lsst.daf.butler import ButlerURI, Datastore, DatastoreConfig, DatasetTypeNotSupportedError, \ 

48 DatastoreValidationError, Constraints, FileDataset, DatasetRef 

49 

50if TYPE_CHECKING: 50 ↛ 51line 50 didn't jump to line 51, because the condition on line 50 was never true

51 from lsst.daf.butler import Config, DatasetType, LookupKey, StorageClass 

52 from lsst.daf.butler.registry.interfaces import DatastoreRegistryBridgeManager 

53 

54log = logging.getLogger(__name__) 

55 

56 

57class _IngestPrepData(Datastore.IngestPrepData): 

58 """Helper class for ChainedDatastore ingest implementation. 

59 

60 Parameters 

61 ---------- 

62 children : `list` of `tuple` 

63 Pairs of `Datastore`, `IngestPrepData` for all child datastores. 

64 """ 

65 def __init__(self, children: List[Tuple[Datastore, Datastore.IngestPrepData]]): 

66 super().__init__(itertools.chain.from_iterable(data.refs.values() for _, data in children)) 

67 self.children = children 

68 

69 

70class ChainedDatastore(Datastore): 

71 """Chained Datastores to allow read and writes from multiple datastores. 

72 

73 A ChainedDatastore is configured with multiple datastore configurations. 

74 A ``put()`` is always sent to each datastore. A ``get()`` 

75 operation is sent to each datastore in turn and the first datastore 

76 to return a valid dataset is used. 

77 

78 Parameters 

79 ---------- 

80 config : `DatastoreConfig` or `str` 

81 Configuration. This configuration must include a ``datastores`` field 

82 as a sequence of datastore configurations. The order in this sequence 

83 indicates the order to use for read operations. 

84 bridgeManager : `DatastoreRegistryBridgeManager` 

85 Object that manages the interface between `Registry` and datastores. 

86 butlerRoot : `str`, optional 

87 New datastore root to use to override the configuration value. This 

88 root is sent to each child datastore. 

89 

90 Notes 

91 ----- 

92 ChainedDatastore never supports `None` or `"move"` as an `ingest` transfer 

93 mode. It supports `"copy"`, `"symlink"`, `"relsymlink"` 

94 and `"hardlink"` if and only if all its child datastores do. 

95 """ 

96 

97 defaultConfigFile = "datastores/chainedDatastore.yaml" 

98 """Path to configuration defaults. Accessed within the ``configs`` resource 

99 or relative to a search path. Can be None if no defaults specified. 

100 """ 

101 

102 containerKey = "datastores" 

103 """Key to specify where child datastores are configured.""" 

104 

105 datastores: List[Datastore] 

106 """All the child datastores known to this datastore.""" 

107 

108 datastoreConstraints: Sequence[Optional[Constraints]] 

109 """Constraints to be applied to each of the child datastores.""" 

110 

111 @classmethod 

112 def setConfigRoot(cls, root: str, config: Config, full: Config, overwrite: bool = True) -> None: 

113 """Set any filesystem-dependent config options for child Datastores to 

114 be appropriate for a new empty repository with the given root. 

115 

116 Parameters 

117 ---------- 

118 root : `str` 

119 Filesystem path to the root of the data repository. 

120 config : `Config` 

121 A `Config` to update. Only the subset understood by 

122 this component will be updated. Will not expand 

123 defaults. 

124 full : `Config` 

125 A complete config with all defaults expanded that can be 

126 converted to a `DatastoreConfig`. Read-only and will not be 

127 modified by this method. 

128 Repository-specific options that should not be obtained 

129 from defaults when Butler instances are constructed 

130 should be copied from ``full`` to ``config``. 

131 overwrite : `bool`, optional 

132 If `False`, do not modify a value in ``config`` if the value 

133 already exists. Default is always to overwrite with the provided 

134 ``root``. 

135 

136 Notes 

137 ----- 

138 If a keyword is explicitly defined in the supplied ``config`` it 

139 will not be overridden by this method if ``overwrite`` is `False`. 

140 This allows explicit values set in external configs to be retained. 

141 """ 

142 

143 # Extract the part of the config we care about updating 

144 datastoreConfig = DatastoreConfig(config, mergeDefaults=False) 

145 

146 # And the subset of the full config that we can use for reference. 

147 # Do not bother with defaults because we are told this already has 

148 # them. 

149 fullDatastoreConfig = DatastoreConfig(full, mergeDefaults=False) 

150 

151 # Loop over each datastore config and pass the subsets to the 

152 # child datastores to process. 

153 

154 containerKey = cls.containerKey 

155 for idx, (child, fullChild) in enumerate(zip(datastoreConfig[containerKey], 

156 fullDatastoreConfig[containerKey])): 

157 childConfig = DatastoreConfig(child, mergeDefaults=False) 

158 fullChildConfig = DatastoreConfig(fullChild, mergeDefaults=False) 

159 datastoreClass = doImportType(fullChildConfig["cls"]) 

160 if not issubclass(datastoreClass, Datastore): 160 ↛ 161line 160 didn't jump to line 161, because the condition on line 160 was never true

161 raise TypeError(f"Imported child class {fullChildConfig['cls']} is not a Datastore") 

162 newroot = "{}/{}_{}".format(root, datastoreClass.__qualname__, idx) 

163 datastoreClass.setConfigRoot(newroot, childConfig, fullChildConfig, overwrite=overwrite) 

164 

165 # Reattach to parent 

166 datastoreConfig[containerKey, idx] = childConfig 

167 

168 # Reattach modified datastore config to parent 

169 # If this has a datastore key we attach there, otherwise we assume 

170 # this information goes at the top of the config hierarchy. 

171 if DatastoreConfig.component in config: 

172 config[DatastoreConfig.component] = datastoreConfig 

173 else: 

174 config.update(datastoreConfig) 

175 

176 return 

177 

178 def __init__(self, config: Union[Config, str], bridgeManager: DatastoreRegistryBridgeManager, 

179 butlerRoot: str = None): 

180 super().__init__(config, bridgeManager) 

181 

182 # Scan for child datastores and instantiate them with the same registry 

183 self.datastores = [] 

184 for c in self.config["datastores"]: 

185 c = DatastoreConfig(c) 

186 datastoreType = doImportType(c["cls"]) 

187 if not issubclass(datastoreType, Datastore): 187 ↛ 188line 187 didn't jump to line 188, because the condition on line 187 was never true

188 raise TypeError(f"Imported child class {c['cls']} is not a Datastore") 

189 datastore = datastoreType(c, bridgeManager, butlerRoot=butlerRoot) 

190 log.debug("Creating child datastore %s", datastore.name) 

191 self.datastores.append(datastore) 

192 

193 # Name ourself based on our children 

194 if self.datastores: 194 ↛ 199line 194 didn't jump to line 199, because the condition on line 194 was never false

195 # We must set the names explicitly 

196 self._names = [d.name for d in self.datastores] 

197 childNames = ",".join(self.names) 

198 else: 

199 childNames = "(empty@{})".format(time.time()) 

200 self._names = [childNames] 

201 self.name = "{}[{}]".format(type(self).__qualname__, childNames) 

202 

203 # We declare we are ephemeral if all our child datastores declare 

204 # they are ephemeral 

205 isEphemeral = True 

206 for d in self.datastores: 

207 if not d.isEphemeral: 

208 isEphemeral = False 

209 break 

210 self.isEphemeral = isEphemeral 

211 

212 # per-datastore override constraints 

213 if "datastore_constraints" in self.config: 

214 overrides = self.config["datastore_constraints"] 

215 

216 if len(overrides) != len(self.datastores): 216 ↛ 217line 216 didn't jump to line 217, because the condition on line 216 was never true

217 raise DatastoreValidationError(f"Number of registered datastores ({len(self.datastores)})" 

218 " differs from number of constraints overrides" 

219 f" {len(overrides)}") 

220 

221 self.datastoreConstraints = [Constraints(c.get("constraints"), universe=bridgeManager.universe) 

222 for c in overrides] 

223 

224 else: 

225 self.datastoreConstraints = (None,) * len(self.datastores) 

226 

227 log.debug("Created %s (%s)", self.name, ("ephemeral" if self.isEphemeral else "permanent")) 

228 

229 @property 

230 def names(self) -> Tuple[str, ...]: 

231 return tuple(self._names) 

232 

233 def __str__(self) -> str: 

234 chainName = ", ".join(str(ds) for ds in self.datastores) 

235 return chainName 

236 

237 def knows(self, ref: DatasetRef) -> bool: 

238 """Check if the dataset is known to any of the datastores. 

239 

240 Does not check for existence of any artifact. 

241 

242 Parameters 

243 ---------- 

244 ref : `DatasetRef` 

245 Reference to the required dataset. 

246 

247 Returns 

248 ------- 

249 exists : `bool` 

250 `True` if the dataset is known to the datastore. 

251 """ 

252 for datastore in self.datastores: 

253 if datastore.knows(ref): 

254 log.debug("%s known to datastore %s", ref, datastore.name) 

255 return True 

256 return False 

257 

258 def mexists(self, refs: Iterable[DatasetRef], 

259 artifact_existence: Optional[Dict[ButlerURI, bool]] = None) -> Dict[DatasetRef, bool]: 

260 """Check the existence of multiple datasets at once. 

261 

262 Parameters 

263 ---------- 

264 refs : iterable of `DatasetRef` 

265 The datasets to be checked. 

266 artifact_existence : `dict` of [`ButlerURI`, `bool`], optional 

267 Mapping of datastore artifact to existence. Updated by this 

268 method with details of all artifacts tested. Can be `None` 

269 if the caller is not interested. 

270 

271 Returns 

272 ------- 

273 existence : `dict` of [`DatasetRef`, `bool`] 

274 Mapping from dataset to boolean indicating existence in any 

275 of the child datastores. 

276 """ 

277 dataset_existence: Dict[DatasetRef, bool] = {} 

278 for datastore in self.datastores: 

279 dataset_existence.update(datastore.mexists(refs, artifact_existence=artifact_existence)) 

280 

281 # For next datastore no point asking about ones we know 

282 # exist already. No special exemption for ephemeral datastores. 

283 refs = [ref for ref, exists in dataset_existence.items() if not exists] 

284 

285 return dataset_existence 

286 

287 def exists(self, ref: DatasetRef) -> bool: 

288 """Check if the dataset exists in one of the datastores. 

289 

290 Parameters 

291 ---------- 

292 ref : `DatasetRef` 

293 Reference to the required dataset. 

294 

295 Returns 

296 ------- 

297 exists : `bool` 

298 `True` if the entity exists in one of the child datastores. 

299 """ 

300 for datastore in self.datastores: 

301 if datastore.exists(ref): 

302 log.debug("Found %s in datastore %s", ref, datastore.name) 

303 return True 

304 return False 

305 

306 def get(self, ref: DatasetRef, parameters: Optional[Mapping[str, Any]] = None) -> Any: 

307 """Load an InMemoryDataset from the store. 

308 

309 The dataset is returned from the first datastore that has 

310 the dataset. 

311 

312 Parameters 

313 ---------- 

314 ref : `DatasetRef` 

315 Reference to the required Dataset. 

316 parameters : `dict` 

317 `StorageClass`-specific parameters that specify, for example, 

318 a slice of the dataset to be loaded. 

319 

320 Returns 

321 ------- 

322 inMemoryDataset : `object` 

323 Requested dataset or slice thereof as an InMemoryDataset. 

324 

325 Raises 

326 ------ 

327 FileNotFoundError 

328 Requested dataset can not be retrieved. 

329 TypeError 

330 Return value from formatter has unexpected type. 

331 ValueError 

332 Formatter failed to process the dataset. 

333 """ 

334 

335 for datastore in self.datastores: 

336 try: 

337 inMemoryObject = datastore.get(ref, parameters) 

338 log.debug("Found dataset %s in datastore %s", ref, datastore.name) 

339 return inMemoryObject 

340 except FileNotFoundError: 

341 pass 

342 

343 raise FileNotFoundError("Dataset {} could not be found in any of the datastores".format(ref)) 

344 

345 def put(self, inMemoryDataset: Any, ref: DatasetRef) -> None: 

346 """Write a InMemoryDataset with a given `DatasetRef` to each 

347 datastore. 

348 

349 The put() to child datastores can fail with 

350 `DatasetTypeNotSupportedError`. The put() for this datastore will be 

351 deemed to have succeeded so long as at least one child datastore 

352 accepted the inMemoryDataset. 

353 

354 Parameters 

355 ---------- 

356 inMemoryDataset : `object` 

357 The dataset to store. 

358 ref : `DatasetRef` 

359 Reference to the associated Dataset. 

360 

361 Raises 

362 ------ 

363 TypeError 

364 Supplied object and storage class are inconsistent. 

365 DatasetTypeNotSupportedError 

366 All datastores reported `DatasetTypeNotSupportedError`. 

367 """ 

368 log.debug("Put %s", ref) 

369 

370 # Confirm that we can accept this dataset 

371 if not self.constraints.isAcceptable(ref): 

372 # Raise rather than use boolean return value. 

373 raise DatasetTypeNotSupportedError(f"Dataset {ref} has been rejected by this datastore via" 

374 " configuration.") 

375 

376 isPermanent = False 

377 nsuccess = 0 

378 npermanent = 0 

379 nephemeral = 0 

380 for datastore, constraints in zip(self.datastores, self.datastoreConstraints): 

381 if constraints is not None and not constraints.isAcceptable(ref): 

382 log.debug("Datastore %s skipping put via configuration for ref %s", 

383 datastore.name, ref) 

384 continue 

385 

386 if datastore.isEphemeral: 

387 nephemeral += 1 

388 else: 

389 npermanent += 1 

390 try: 

391 datastore.put(inMemoryDataset, ref) 

392 nsuccess += 1 

393 if not datastore.isEphemeral: 

394 isPermanent = True 

395 except DatasetTypeNotSupportedError: 

396 pass 

397 

398 if nsuccess == 0: 

399 raise DatasetTypeNotSupportedError(f"None of the chained datastores supported ref {ref}") 

400 

401 if not isPermanent and npermanent > 0: 401 ↛ 402line 401 didn't jump to line 402, because the condition on line 401 was never true

402 warnings.warn(f"Put of {ref} only succeeded in ephemeral databases", stacklevel=2) 

403 

404 if self._transaction is not None: 

405 self._transaction.registerUndo('put', self.remove, ref) 

406 

407 def _overrideTransferMode(self, *datasets: Any, transfer: Optional[str] = None) -> Optional[str]: 

408 # Docstring inherited from base class. 

409 if transfer != "auto": 

410 return transfer 

411 # Ask each datastore what they think auto means 

412 transfers = {d._overrideTransferMode(*datasets, transfer=transfer) for d in self.datastores} 

413 

414 # Remove any untranslated "auto" values 

415 transfers.discard(transfer) 

416 

417 if len(transfers) == 1: 417 ↛ 418line 417 didn't jump to line 418, because the condition on line 417 was never true

418 return transfers.pop() 

419 if not transfers: 419 ↛ 423line 419 didn't jump to line 423, because the condition on line 419 was never false

420 # Everything reported "auto" 

421 return transfer 

422 

423 raise RuntimeError("Chained datastore does not yet support different transfer modes" 

424 f" from 'auto' in each child datastore (wanted {transfers})") 

425 

426 def _prepIngest(self, *datasets: FileDataset, transfer: Optional[str] = None) -> _IngestPrepData: 

427 # Docstring inherited from Datastore._prepIngest. 

428 if transfer is None or transfer == "move": 

429 raise NotImplementedError("ChainedDatastore does not support transfer=None or transfer='move'.") 

430 

431 def isDatasetAcceptable(dataset: FileDataset, *, name: str, constraints: Constraints) -> bool: 

432 acceptable = [ref for ref in dataset.refs if constraints.isAcceptable(ref)] 

433 if not acceptable: 

434 log.debug("Datastore %s skipping ingest via configuration for refs %s", 

435 name, ", ".join(str(ref) for ref in dataset.refs)) 

436 return False 

437 else: 

438 return True 

439 

440 # Filter down to just datasets the chained datastore's own 

441 # configuration accepts. 

442 okForParent: List[FileDataset] = [dataset for dataset in datasets 

443 if isDatasetAcceptable(dataset, name=self.name, 

444 constraints=self.constraints)] 

445 

446 # Iterate over nested datastores and call _prepIngest on each. 

447 # Save the results to a list: 

448 children: List[Tuple[Datastore, Datastore.IngestPrepData]] = [] 

449 # ...and remember whether all of the failures are due to 

450 # NotImplementedError being raised. 

451 allFailuresAreNotImplementedError = True 

452 for datastore, constraints in zip(self.datastores, self.datastoreConstraints): 

453 okForChild: List[FileDataset] 

454 if constraints is not None: 

455 okForChild = [dataset for dataset in okForParent 

456 if isDatasetAcceptable(dataset, name=datastore.name, 

457 constraints=constraints)] 

458 else: 

459 okForChild = okForParent 

460 try: 

461 prepDataForChild = datastore._prepIngest(*okForChild, transfer=transfer) 

462 except NotImplementedError: 

463 log.debug("Skipping ingest for datastore %s because transfer " 

464 "mode %s is not supported.", datastore.name, transfer) 

465 continue 

466 allFailuresAreNotImplementedError = False 

467 children.append((datastore, prepDataForChild)) 

468 if allFailuresAreNotImplementedError: 

469 raise NotImplementedError(f"No child datastore supports transfer mode {transfer}.") 

470 return _IngestPrepData(children=children) 

471 

472 def _finishIngest(self, prepData: _IngestPrepData, *, transfer: Optional[str] = None) -> None: 

473 # Docstring inherited from Datastore._finishIngest. 

474 for datastore, prepDataForChild in prepData.children: 

475 datastore._finishIngest(prepDataForChild, transfer=transfer) 

476 

477 def getURIs(self, ref: DatasetRef, 

478 predict: bool = False) -> Tuple[Optional[ButlerURI], Dict[str, ButlerURI]]: 

479 """Return URIs associated with dataset. 

480 

481 Parameters 

482 ---------- 

483 ref : `DatasetRef` 

484 Reference to the required dataset. 

485 predict : `bool`, optional 

486 If the datastore does not know about the dataset, should it 

487 return a predicted URI or not? 

488 

489 Returns 

490 ------- 

491 primary : `ButlerURI` 

492 The URI to the primary artifact associated with this dataset. 

493 If the dataset was disassembled within the datastore this 

494 may be `None`. 

495 components : `dict` 

496 URIs to any components associated with the dataset artifact. 

497 Can be empty if there are no components. 

498 

499 Notes 

500 ----- 

501 The returned URI is from the first datastore in the list that has 

502 the dataset with preference given to the first dataset coming from 

503 a permanent datastore. If no datastores have the dataset and prediction 

504 is allowed, the predicted URI for the first datastore in the list will 

505 be returned. 

506 """ 

507 DatastoreURIs = Tuple[Optional[ButlerURI], Dict[str, ButlerURI]] 

508 log.debug("Requesting URIs for %s", ref) 

509 predictedUri: Optional[DatastoreURIs] = None 

510 predictedEphemeralUri: Optional[DatastoreURIs] = None 

511 firstEphemeralUri: Optional[DatastoreURIs] = None 

512 for datastore in self.datastores: 

513 if datastore.exists(ref): 

514 if not datastore.isEphemeral: 

515 uri = datastore.getURIs(ref) 

516 log.debug("Retrieved non-ephemeral URI: %s", uri) 

517 return uri 

518 elif not firstEphemeralUri: 

519 firstEphemeralUri = datastore.getURIs(ref) 

520 elif predict: 

521 if not predictedUri and not datastore.isEphemeral: 

522 predictedUri = datastore.getURIs(ref, predict) 

523 elif not predictedEphemeralUri and datastore.isEphemeral: 

524 predictedEphemeralUri = datastore.getURIs(ref, predict) 

525 

526 if firstEphemeralUri: 

527 log.debug("Retrieved ephemeral URI: %s", firstEphemeralUri) 

528 return firstEphemeralUri 

529 

530 if predictedUri: 

531 log.debug("Retrieved predicted URI: %s", predictedUri) 

532 return predictedUri 

533 

534 if predictedEphemeralUri: 

535 log.debug("Retrieved predicted ephemeral URI: %s", predictedEphemeralUri) 

536 return predictedEphemeralUri 

537 

538 raise FileNotFoundError("Dataset {} not in any datastore".format(ref)) 

539 

540 def getURI(self, ref: DatasetRef, predict: bool = False) -> ButlerURI: 

541 """URI to the Dataset. 

542 

543 The returned URI is from the first datastore in the list that has 

544 the dataset with preference given to the first dataset coming from 

545 a permanent datastore. If no datastores have the dataset and prediction 

546 is allowed, the predicted URI for the first datastore in the list will 

547 be returned. 

548 

549 Parameters 

550 ---------- 

551 ref : `DatasetRef` 

552 Reference to the required Dataset. 

553 predict : `bool` 

554 If `True`, allow URIs to be returned of datasets that have not 

555 been written. 

556 

557 Returns 

558 ------- 

559 uri : `ButlerURI` 

560 URI pointing to the dataset within the datastore. If the 

561 dataset does not exist in the datastore, and if ``predict`` is 

562 `True`, the URI will be a prediction and will include a URI 

563 fragment "#predicted". 

564 

565 Notes 

566 ----- 

567 If the datastore does not have entities that relate well 

568 to the concept of a URI the returned URI string will be 

569 descriptive. The returned URI is not guaranteed to be obtainable. 

570 

571 Raises 

572 ------ 

573 FileNotFoundError 

574 A URI has been requested for a dataset that does not exist and 

575 guessing is not allowed. 

576 RuntimeError 

577 Raised if a request is made for a single URI but multiple URIs 

578 are associated with this dataset. 

579 """ 

580 log.debug("Requesting URI for %s", ref) 

581 primary, components = self.getURIs(ref, predict) 

582 if primary is None or components: 582 ↛ 583line 582 didn't jump to line 583, because the condition on line 582 was never true

583 raise RuntimeError(f"Dataset ({ref}) includes distinct URIs for components. " 

584 "Use Dataastore.getURIs() instead.") 

585 return primary 

586 

587 def retrieveArtifacts(self, refs: Iterable[DatasetRef], 

588 destination: ButlerURI, transfer: str = "auto", 

589 preserve_path: bool = True, 

590 overwrite: bool = False) -> List[ButlerURI]: 

591 """Retrieve the file artifacts associated with the supplied refs. 

592 

593 Parameters 

594 ---------- 

595 refs : iterable of `DatasetRef` 

596 The datasets for which file artifacts are to be retrieved. 

597 A single ref can result in multiple files. The refs must 

598 be resolved. 

599 destination : `ButlerURI` 

600 Location to write the file artifacts. 

601 transfer : `str`, optional 

602 Method to use to transfer the artifacts. Must be one of the options 

603 supported by `ButlerURI.transfer_from()`. "move" is not allowed. 

604 preserve_path : `bool`, optional 

605 If `True` the full path of the file artifact within the datastore 

606 is preserved. If `False` the final file component of the path 

607 is used. 

608 overwrite : `bool`, optional 

609 If `True` allow transfers to overwrite existing files at the 

610 destination. 

611 

612 Returns 

613 ------- 

614 targets : `list` of `ButlerURI` 

615 URIs of file artifacts in destination location. Order is not 

616 preserved. 

617 """ 

618 if not destination.isdir(): 618 ↛ 619line 618 didn't jump to line 619, because the condition on line 618 was never true

619 raise ValueError(f"Destination location must refer to a directory. Given {destination}") 

620 

621 # Using getURIs is not feasible since it becomes difficult to 

622 # determine the path within the datastore later on. For now 

623 # follow getURIs implementation approach. 

624 

625 pending = set(refs) 

626 

627 # There is a question as to whether an exception should be raised 

628 # early if some of the refs are missing, or whether files should be 

629 # transferred until a problem is hit. Prefer to complain up front. 

630 # Use the datastore integer as primary key. 

631 grouped_by_datastore: Dict[int, Set[DatasetRef]] = {} 

632 

633 for number, datastore in enumerate(self.datastores): 

634 if datastore.isEphemeral: 

635 # In the future we will want to distinguish in-memory from 

636 # caching datastore since using an on-disk local 

637 # cache is exactly what we should be doing. 

638 continue 

639 datastore_refs = {ref for ref in pending if datastore.exists(ref)} 

640 

641 if datastore_refs: 

642 grouped_by_datastore[number] = datastore_refs 

643 

644 # Remove these from the pending list so that we do not bother 

645 # looking for them any more. 

646 pending = pending - datastore_refs 

647 

648 if pending: 648 ↛ 649line 648 didn't jump to line 649, because the condition on line 648 was never true

649 raise RuntimeError(f"Some datasets were not found in any datastores: {pending}") 

650 

651 # Now do the transfer. 

652 targets: List[ButlerURI] = [] 

653 for number, datastore_refs in grouped_by_datastore.items(): 

654 targets.extend(self.datastores[number].retrieveArtifacts(datastore_refs, destination, 

655 transfer=transfer, 

656 preserve_path=preserve_path, 

657 overwrite=overwrite)) 

658 

659 return targets 

660 

661 def remove(self, ref: DatasetRef) -> None: 

662 """Indicate to the datastore that a dataset can be removed. 

663 

664 The dataset will be removed from each datastore. The dataset is 

665 not required to exist in every child datastore. 

666 

667 Parameters 

668 ---------- 

669 ref : `DatasetRef` 

670 Reference to the required dataset. 

671 

672 Raises 

673 ------ 

674 FileNotFoundError 

675 Attempt to remove a dataset that does not exist. Raised if none 

676 of the child datastores removed the dataset. 

677 """ 

678 log.debug("Removing %s", ref) 

679 self.trash(ref, ignore_errors=False) 

680 self.emptyTrash(ignore_errors=False) 

681 

682 def forget(self, refs: Iterable[DatasetRef]) -> None: 

683 for datastore in tuple(self.datastores): 

684 datastore.forget(refs) 

685 

686 def trash(self, ref: Union[DatasetRef, Iterable[DatasetRef]], ignore_errors: bool = True) -> None: 

687 if isinstance(ref, DatasetRef): 

688 ref_label = str(ref) 

689 else: 

690 ref_label = "bulk datasets" 

691 

692 log.debug("Trashing %s", ref_label) 

693 

694 counter = 0 

695 for datastore in self.datastores: 

696 try: 

697 datastore.trash(ref, ignore_errors=ignore_errors) 

698 counter += 1 

699 except FileNotFoundError: 

700 pass 

701 

702 if counter == 0: 

703 err_msg = f"Could not mark for removal from any child datastore: {ref_label}" 

704 if ignore_errors: 704 ↛ 705line 704 didn't jump to line 705, because the condition on line 704 was never true

705 log.warning(err_msg) 

706 else: 

707 raise FileNotFoundError(err_msg) 

708 

709 def emptyTrash(self, ignore_errors: bool = True) -> None: 

710 for datastore in self.datastores: 

711 datastore.emptyTrash(ignore_errors=ignore_errors) 

712 

713 def transfer(self, inputDatastore: Datastore, ref: DatasetRef) -> None: 

714 """Retrieve a dataset from an input `Datastore`, 

715 and store the result in this `Datastore`. 

716 

717 Parameters 

718 ---------- 

719 inputDatastore : `Datastore` 

720 The external `Datastore` from which to retreive the Dataset. 

721 ref : `DatasetRef` 

722 Reference to the required dataset in the input data store. 

723 

724 Returns 

725 ------- 

726 results : `list` 

727 List containing the return value from the ``put()`` to each 

728 child datastore. 

729 """ 

730 assert inputDatastore is not self # unless we want it for renames? 

731 inMemoryDataset = inputDatastore.get(ref) 

732 self.put(inMemoryDataset, ref) 

733 

734 def validateConfiguration(self, entities: Iterable[Union[DatasetRef, DatasetType, StorageClass]], 

735 logFailures: bool = False) -> None: 

736 """Validate some of the configuration for this datastore. 

737 

738 Parameters 

739 ---------- 

740 entities : iterable of `DatasetRef`, `DatasetType`, or `StorageClass` 

741 Entities to test against this configuration. Can be differing 

742 types. 

743 logFailures : `bool`, optional 

744 If `True`, output a log message for every validation error 

745 detected. 

746 

747 Raises 

748 ------ 

749 DatastoreValidationError 

750 Raised if there is a validation problem with a configuration. 

751 All the problems are reported in a single exception. 

752 

753 Notes 

754 ----- 

755 This method checks each datastore in turn. 

756 """ 

757 

758 # Need to catch each of the datastore outputs and ensure that 

759 # all are tested. 

760 failures = [] 

761 for datastore in self.datastores: 

762 try: 

763 datastore.validateConfiguration(entities, logFailures=logFailures) 

764 except DatastoreValidationError as e: 

765 if logFailures: 765 ↛ 767line 765 didn't jump to line 767, because the condition on line 765 was never false

766 log.critical("Datastore %s failed validation", datastore.name) 

767 failures.append(f"Datastore {self.name}: {e}") 

768 

769 if failures: 

770 msg = ";\n".join(failures) 

771 raise DatastoreValidationError(msg) 

772 

773 def validateKey(self, lookupKey: LookupKey, 

774 entity: Union[DatasetRef, DatasetType, StorageClass]) -> None: 

775 # Docstring is inherited from base class 

776 failures = [] 

777 for datastore in self.datastores: 

778 try: 

779 datastore.validateKey(lookupKey, entity) 

780 except DatastoreValidationError as e: 

781 failures.append(f"Datastore {self.name}: {e}") 

782 

783 if failures: 

784 msg = ";\n".join(failures) 

785 raise DatastoreValidationError(msg) 

786 

787 def getLookupKeys(self) -> Set[LookupKey]: 

788 # Docstring is inherited from base class 

789 keys = set() 

790 for datastore in self.datastores: 

791 keys.update(datastore.getLookupKeys()) 

792 

793 keys.update(self.constraints.getLookupKeys()) 

794 for p in self.datastoreConstraints: 

795 if p is not None: 795 ↛ 796line 795 didn't jump to line 796, because the condition on line 795 was never true

796 keys.update(p.getLookupKeys()) 

797 

798 return keys 

799 

800 def needs_expanded_data_ids( 

801 self, 

802 transfer: Optional[str], 

803 entity: Optional[Union[DatasetRef, DatasetType, StorageClass]] = None, 

804 ) -> bool: 

805 # Docstring inherited. 

806 # We can't safely use `self.datastoreConstraints` with `entity` to 

807 # check whether a child datastore would even want to ingest this 

808 # dataset, because we don't want to filter out datastores that might 

809 # need an expanded data ID based in incomplete information (e.g. we 

810 # pass a StorageClass, but the constraint dispatches on DatasetType). 

811 # So we pessimistically check if any datastore would need an expanded 

812 # data ID for this transfer mode. 

813 return any(datastore.needs_expanded_data_ids(transfer) for datastore in self.datastores) 813 ↛ exitline 813 didn't finish the generator expression on line 813