Coverage for python/lsst/daf/butler/datastores/chainedDatastore.py: 90%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

288 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24"""Chained datastore.""" 

25 

26__all__ = ("ChainedDatastore",) 

27 

28import itertools 

29import logging 

30import time 

31import warnings 

32from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Mapping, Optional, Sequence, Set, Tuple, Union 

33 

34from lsst.daf.butler import ( 

35 Constraints, 

36 DatasetRef, 

37 DatasetTypeNotSupportedError, 

38 Datastore, 

39 DatastoreConfig, 

40 DatastoreValidationError, 

41 FileDataset, 

42) 

43from lsst.resources import ResourcePath 

44from lsst.utils import doImportType 

45 

46if TYPE_CHECKING: 46 ↛ 47line 46 didn't jump to line 47, because the condition on line 46 was never true

47 from lsst.daf.butler import Config, DatasetType, LookupKey, StorageClass 

48 from lsst.daf.butler.registry.interfaces import DatastoreRegistryBridgeManager 

49 

50log = logging.getLogger(__name__) 

51 

52 

53class _IngestPrepData(Datastore.IngestPrepData): 

54 """Helper class for ChainedDatastore ingest implementation. 

55 

56 Parameters 

57 ---------- 

58 children : `list` of `tuple` 

59 Pairs of `Datastore`, `IngestPrepData` for all child datastores. 

60 """ 

61 

62 def __init__(self, children: List[Tuple[Datastore, Datastore.IngestPrepData]]): 

63 super().__init__(itertools.chain.from_iterable(data.refs.values() for _, data in children)) 

64 self.children = children 

65 

66 

67class ChainedDatastore(Datastore): 

68 """Chained Datastores to allow read and writes from multiple datastores. 

69 

70 A ChainedDatastore is configured with multiple datastore configurations. 

71 A ``put()`` is always sent to each datastore. A ``get()`` 

72 operation is sent to each datastore in turn and the first datastore 

73 to return a valid dataset is used. 

74 

75 Parameters 

76 ---------- 

77 config : `DatastoreConfig` or `str` 

78 Configuration. This configuration must include a ``datastores`` field 

79 as a sequence of datastore configurations. The order in this sequence 

80 indicates the order to use for read operations. 

81 bridgeManager : `DatastoreRegistryBridgeManager` 

82 Object that manages the interface between `Registry` and datastores. 

83 butlerRoot : `str`, optional 

84 New datastore root to use to override the configuration value. This 

85 root is sent to each child datastore. 

86 

87 Notes 

88 ----- 

89 ChainedDatastore never supports `None` or `"move"` as an `ingest` transfer 

90 mode. It supports `"copy"`, `"symlink"`, `"relsymlink"` 

91 and `"hardlink"` if and only if all its child datastores do. 

92 """ 

93 

94 defaultConfigFile = "datastores/chainedDatastore.yaml" 

95 """Path to configuration defaults. Accessed within the ``configs`` resource 

96 or relative to a search path. Can be None if no defaults specified. 

97 """ 

98 

99 containerKey = "datastores" 

100 """Key to specify where child datastores are configured.""" 

101 

102 datastores: List[Datastore] 

103 """All the child datastores known to this datastore.""" 

104 

105 datastoreConstraints: Sequence[Optional[Constraints]] 

106 """Constraints to be applied to each of the child datastores.""" 

107 

108 @classmethod 

109 def setConfigRoot(cls, root: str, config: Config, full: Config, overwrite: bool = True) -> None: 

110 """Set any filesystem-dependent config options for child Datastores to 

111 be appropriate for a new empty repository with the given root. 

112 

113 Parameters 

114 ---------- 

115 root : `str` 

116 Filesystem path to the root of the data repository. 

117 config : `Config` 

118 A `Config` to update. Only the subset understood by 

119 this component will be updated. Will not expand 

120 defaults. 

121 full : `Config` 

122 A complete config with all defaults expanded that can be 

123 converted to a `DatastoreConfig`. Read-only and will not be 

124 modified by this method. 

125 Repository-specific options that should not be obtained 

126 from defaults when Butler instances are constructed 

127 should be copied from ``full`` to ``config``. 

128 overwrite : `bool`, optional 

129 If `False`, do not modify a value in ``config`` if the value 

130 already exists. Default is always to overwrite with the provided 

131 ``root``. 

132 

133 Notes 

134 ----- 

135 If a keyword is explicitly defined in the supplied ``config`` it 

136 will not be overridden by this method if ``overwrite`` is `False`. 

137 This allows explicit values set in external configs to be retained. 

138 """ 

139 

140 # Extract the part of the config we care about updating 

141 datastoreConfig = DatastoreConfig(config, mergeDefaults=False) 

142 

143 # And the subset of the full config that we can use for reference. 

144 # Do not bother with defaults because we are told this already has 

145 # them. 

146 fullDatastoreConfig = DatastoreConfig(full, mergeDefaults=False) 

147 

148 # Loop over each datastore config and pass the subsets to the 

149 # child datastores to process. 

150 

151 containerKey = cls.containerKey 

152 for idx, (child, fullChild) in enumerate( 

153 zip(datastoreConfig[containerKey], fullDatastoreConfig[containerKey]) 

154 ): 

155 childConfig = DatastoreConfig(child, mergeDefaults=False) 

156 fullChildConfig = DatastoreConfig(fullChild, mergeDefaults=False) 

157 datastoreClass = doImportType(fullChildConfig["cls"]) 

158 if not issubclass(datastoreClass, Datastore): 158 ↛ 159line 158 didn't jump to line 159, because the condition on line 158 was never true

159 raise TypeError(f"Imported child class {fullChildConfig['cls']} is not a Datastore") 

160 newroot = "{}/{}_{}".format(root, datastoreClass.__qualname__, idx) 

161 datastoreClass.setConfigRoot(newroot, childConfig, fullChildConfig, overwrite=overwrite) 

162 

163 # Reattach to parent 

164 datastoreConfig[containerKey, idx] = childConfig 

165 

166 # Reattach modified datastore config to parent 

167 # If this has a datastore key we attach there, otherwise we assume 

168 # this information goes at the top of the config hierarchy. 

169 if DatastoreConfig.component in config: 

170 config[DatastoreConfig.component] = datastoreConfig 

171 else: 

172 config.update(datastoreConfig) 

173 

174 return 

175 

176 def __init__( 

177 self, 

178 config: Union[Config, str], 

179 bridgeManager: DatastoreRegistryBridgeManager, 

180 butlerRoot: str = None, 

181 ): 

182 super().__init__(config, bridgeManager) 

183 

184 # Scan for child datastores and instantiate them with the same registry 

185 self.datastores = [] 

186 for c in self.config["datastores"]: 

187 c = DatastoreConfig(c) 

188 datastoreType = doImportType(c["cls"]) 

189 if not issubclass(datastoreType, Datastore): 189 ↛ 190line 189 didn't jump to line 190, because the condition on line 189 was never true

190 raise TypeError(f"Imported child class {c['cls']} is not a Datastore") 

191 datastore = datastoreType(c, bridgeManager, butlerRoot=butlerRoot) 

192 log.debug("Creating child datastore %s", datastore.name) 

193 self.datastores.append(datastore) 

194 

195 # Name ourself based on our children 

196 if self.datastores: 196 ↛ 201line 196 didn't jump to line 201, because the condition on line 196 was never false

197 # We must set the names explicitly 

198 self._names = [d.name for d in self.datastores] 

199 childNames = ",".join(self.names) 

200 else: 

201 childNames = "(empty@{})".format(time.time()) 

202 self._names = [childNames] 

203 self.name = "{}[{}]".format(type(self).__qualname__, childNames) 

204 

205 # We declare we are ephemeral if all our child datastores declare 

206 # they are ephemeral 

207 isEphemeral = True 

208 for d in self.datastores: 

209 if not d.isEphemeral: 

210 isEphemeral = False 

211 break 

212 self.isEphemeral = isEphemeral 

213 

214 # per-datastore override constraints 

215 if "datastore_constraints" in self.config: 

216 overrides = self.config["datastore_constraints"] 

217 

218 if len(overrides) != len(self.datastores): 218 ↛ 219line 218 didn't jump to line 219, because the condition on line 218 was never true

219 raise DatastoreValidationError( 

220 f"Number of registered datastores ({len(self.datastores)})" 

221 " differs from number of constraints overrides" 

222 f" {len(overrides)}" 

223 ) 

224 

225 self.datastoreConstraints = [ 

226 Constraints(c.get("constraints"), universe=bridgeManager.universe) for c in overrides 

227 ] 

228 

229 else: 

230 self.datastoreConstraints = (None,) * len(self.datastores) 

231 

232 log.debug("Created %s (%s)", self.name, ("ephemeral" if self.isEphemeral else "permanent")) 

233 

234 @property 

235 def names(self) -> Tuple[str, ...]: 

236 return tuple(self._names) 

237 

238 def __str__(self) -> str: 

239 chainName = ", ".join(str(ds) for ds in self.datastores) 

240 return chainName 

241 

242 def knows(self, ref: DatasetRef) -> bool: 

243 """Check if the dataset is known to any of the datastores. 

244 

245 Does not check for existence of any artifact. 

246 

247 Parameters 

248 ---------- 

249 ref : `DatasetRef` 

250 Reference to the required dataset. 

251 

252 Returns 

253 ------- 

254 exists : `bool` 

255 `True` if the dataset is known to the datastore. 

256 """ 

257 for datastore in self.datastores: 

258 if datastore.knows(ref): 

259 log.debug("%s known to datastore %s", ref, datastore.name) 

260 return True 

261 return False 

262 

263 def mexists( 

264 self, refs: Iterable[DatasetRef], artifact_existence: Optional[Dict[ResourcePath, bool]] = None 

265 ) -> Dict[DatasetRef, bool]: 

266 """Check the existence of multiple datasets at once. 

267 

268 Parameters 

269 ---------- 

270 refs : iterable of `DatasetRef` 

271 The datasets to be checked. 

272 artifact_existence : `dict` [`lsst.resources.ResourcePath`, `bool`] 

273 Optional mapping of datastore artifact to existence. Updated by 

274 this method with details of all artifacts tested. Can be `None` 

275 if the caller is not interested. 

276 

277 Returns 

278 ------- 

279 existence : `dict` of [`DatasetRef`, `bool`] 

280 Mapping from dataset to boolean indicating existence in any 

281 of the child datastores. 

282 """ 

283 dataset_existence: Dict[DatasetRef, bool] = {} 

284 for datastore in self.datastores: 

285 dataset_existence.update(datastore.mexists(refs, artifact_existence=artifact_existence)) 

286 

287 # For next datastore no point asking about ones we know 

288 # exist already. No special exemption for ephemeral datastores. 

289 refs = [ref for ref, exists in dataset_existence.items() if not exists] 

290 

291 return dataset_existence 

292 

293 def exists(self, ref: DatasetRef) -> bool: 

294 """Check if the dataset exists in one of the datastores. 

295 

296 Parameters 

297 ---------- 

298 ref : `DatasetRef` 

299 Reference to the required dataset. 

300 

301 Returns 

302 ------- 

303 exists : `bool` 

304 `True` if the entity exists in one of the child datastores. 

305 """ 

306 for datastore in self.datastores: 

307 if datastore.exists(ref): 

308 log.debug("Found %s in datastore %s", ref, datastore.name) 

309 return True 

310 return False 

311 

312 def get(self, ref: DatasetRef, parameters: Optional[Mapping[str, Any]] = None) -> Any: 

313 """Load an InMemoryDataset from the store. 

314 

315 The dataset is returned from the first datastore that has 

316 the dataset. 

317 

318 Parameters 

319 ---------- 

320 ref : `DatasetRef` 

321 Reference to the required Dataset. 

322 parameters : `dict` 

323 `StorageClass`-specific parameters that specify, for example, 

324 a slice of the dataset to be loaded. 

325 

326 Returns 

327 ------- 

328 inMemoryDataset : `object` 

329 Requested dataset or slice thereof as an InMemoryDataset. 

330 

331 Raises 

332 ------ 

333 FileNotFoundError 

334 Requested dataset can not be retrieved. 

335 TypeError 

336 Return value from formatter has unexpected type. 

337 ValueError 

338 Formatter failed to process the dataset. 

339 """ 

340 

341 for datastore in self.datastores: 

342 try: 

343 inMemoryObject = datastore.get(ref, parameters) 

344 log.debug("Found dataset %s in datastore %s", ref, datastore.name) 

345 return inMemoryObject 

346 except FileNotFoundError: 

347 pass 

348 

349 raise FileNotFoundError("Dataset {} could not be found in any of the datastores".format(ref)) 

350 

351 def put(self, inMemoryDataset: Any, ref: DatasetRef) -> None: 

352 """Write a InMemoryDataset with a given `DatasetRef` to each 

353 datastore. 

354 

355 The put() to child datastores can fail with 

356 `DatasetTypeNotSupportedError`. The put() for this datastore will be 

357 deemed to have succeeded so long as at least one child datastore 

358 accepted the inMemoryDataset. 

359 

360 Parameters 

361 ---------- 

362 inMemoryDataset : `object` 

363 The dataset to store. 

364 ref : `DatasetRef` 

365 Reference to the associated Dataset. 

366 

367 Raises 

368 ------ 

369 TypeError 

370 Supplied object and storage class are inconsistent. 

371 DatasetTypeNotSupportedError 

372 All datastores reported `DatasetTypeNotSupportedError`. 

373 """ 

374 log.debug("Put %s", ref) 

375 

376 # Confirm that we can accept this dataset 

377 if not self.constraints.isAcceptable(ref): 

378 # Raise rather than use boolean return value. 

379 raise DatasetTypeNotSupportedError( 

380 f"Dataset {ref} has been rejected by this datastore via configuration." 

381 ) 

382 

383 isPermanent = False 

384 nsuccess = 0 

385 npermanent = 0 

386 nephemeral = 0 

387 for datastore, constraints in zip(self.datastores, self.datastoreConstraints): 

388 if constraints is not None and not constraints.isAcceptable(ref): 

389 log.debug("Datastore %s skipping put via configuration for ref %s", datastore.name, ref) 

390 continue 

391 

392 if datastore.isEphemeral: 

393 nephemeral += 1 

394 else: 

395 npermanent += 1 

396 try: 

397 datastore.put(inMemoryDataset, ref) 

398 nsuccess += 1 

399 if not datastore.isEphemeral: 

400 isPermanent = True 

401 except DatasetTypeNotSupportedError: 

402 pass 

403 

404 if nsuccess == 0: 

405 raise DatasetTypeNotSupportedError(f"None of the chained datastores supported ref {ref}") 

406 

407 if not isPermanent and npermanent > 0: 407 ↛ 408line 407 didn't jump to line 408, because the condition on line 407 was never true

408 warnings.warn(f"Put of {ref} only succeeded in ephemeral databases", stacklevel=2) 

409 

410 if self._transaction is not None: 

411 self._transaction.registerUndo("put", self.remove, ref) 

412 

413 def _overrideTransferMode(self, *datasets: Any, transfer: Optional[str] = None) -> Optional[str]: 

414 # Docstring inherited from base class. 

415 if transfer != "auto": 

416 return transfer 

417 # Ask each datastore what they think auto means 

418 transfers = {d._overrideTransferMode(*datasets, transfer=transfer) for d in self.datastores} 

419 

420 # Remove any untranslated "auto" values 

421 transfers.discard(transfer) 

422 

423 if len(transfers) == 1: 423 ↛ 424line 423 didn't jump to line 424, because the condition on line 423 was never true

424 return transfers.pop() 

425 if not transfers: 425 ↛ 429line 425 didn't jump to line 429, because the condition on line 425 was never false

426 # Everything reported "auto" 

427 return transfer 

428 

429 raise RuntimeError( 

430 "Chained datastore does not yet support different transfer modes" 

431 f" from 'auto' in each child datastore (wanted {transfers})" 

432 ) 

433 

434 def _prepIngest(self, *datasets: FileDataset, transfer: Optional[str] = None) -> _IngestPrepData: 

435 # Docstring inherited from Datastore._prepIngest. 

436 if transfer is None or transfer == "move": 

437 raise NotImplementedError("ChainedDatastore does not support transfer=None or transfer='move'.") 

438 

439 def isDatasetAcceptable(dataset: FileDataset, *, name: str, constraints: Constraints) -> bool: 

440 acceptable = [ref for ref in dataset.refs if constraints.isAcceptable(ref)] 

441 if not acceptable: 

442 log.debug( 

443 "Datastore %s skipping ingest via configuration for refs %s", 

444 name, 

445 ", ".join(str(ref) for ref in dataset.refs), 

446 ) 

447 return False 

448 else: 

449 return True 

450 

451 # Filter down to just datasets the chained datastore's own 

452 # configuration accepts. 

453 okForParent: List[FileDataset] = [ 

454 dataset 

455 for dataset in datasets 

456 if isDatasetAcceptable(dataset, name=self.name, constraints=self.constraints) 

457 ] 

458 

459 # Iterate over nested datastores and call _prepIngest on each. 

460 # Save the results to a list: 

461 children: List[Tuple[Datastore, Datastore.IngestPrepData]] = [] 

462 # ...and remember whether all of the failures are due to 

463 # NotImplementedError being raised. 

464 allFailuresAreNotImplementedError = True 

465 for datastore, constraints in zip(self.datastores, self.datastoreConstraints): 

466 okForChild: List[FileDataset] 

467 if constraints is not None: 

468 okForChild = [ 

469 dataset 

470 for dataset in okForParent 

471 if isDatasetAcceptable(dataset, name=datastore.name, constraints=constraints) 

472 ] 

473 else: 

474 okForChild = okForParent 

475 try: 

476 prepDataForChild = datastore._prepIngest(*okForChild, transfer=transfer) 

477 except NotImplementedError: 

478 log.debug( 

479 "Skipping ingest for datastore %s because transfer mode %s is not supported.", 

480 datastore.name, 

481 transfer, 

482 ) 

483 continue 

484 allFailuresAreNotImplementedError = False 

485 children.append((datastore, prepDataForChild)) 

486 if allFailuresAreNotImplementedError: 

487 raise NotImplementedError(f"No child datastore supports transfer mode {transfer}.") 

488 return _IngestPrepData(children=children) 

489 

490 def _finishIngest( 

491 self, 

492 prepData: _IngestPrepData, 

493 *, 

494 transfer: Optional[str] = None, 

495 record_validation_info: bool = True, 

496 ) -> None: 

497 # Docstring inherited from Datastore._finishIngest. 

498 for datastore, prepDataForChild in prepData.children: 

499 datastore._finishIngest( 

500 prepDataForChild, transfer=transfer, record_validation_info=record_validation_info 

501 ) 

502 

503 def getURIs( 

504 self, ref: DatasetRef, predict: bool = False 

505 ) -> Tuple[Optional[ResourcePath], Dict[str, ResourcePath]]: 

506 """Return URIs associated with dataset. 

507 

508 Parameters 

509 ---------- 

510 ref : `DatasetRef` 

511 Reference to the required dataset. 

512 predict : `bool`, optional 

513 If the datastore does not know about the dataset, should it 

514 return a predicted URI or not? 

515 

516 Returns 

517 ------- 

518 primary : `lsst.resources.ResourcePath` 

519 The URI to the primary artifact associated with this dataset. 

520 If the dataset was disassembled within the datastore this 

521 may be `None`. 

522 components : `dict` 

523 URIs to any components associated with the dataset artifact. 

524 Can be empty if there are no components. 

525 

526 Notes 

527 ----- 

528 The returned URI is from the first datastore in the list that has 

529 the dataset with preference given to the first dataset coming from 

530 a permanent datastore. If no datastores have the dataset and prediction 

531 is allowed, the predicted URI for the first datastore in the list will 

532 be returned. 

533 """ 

534 DatastoreURIs = Tuple[Optional[ResourcePath], Dict[str, ResourcePath]] 

535 log.debug("Requesting URIs for %s", ref) 

536 predictedUri: Optional[DatastoreURIs] = None 

537 predictedEphemeralUri: Optional[DatastoreURIs] = None 

538 firstEphemeralUri: Optional[DatastoreURIs] = None 

539 for datastore in self.datastores: 

540 if datastore.exists(ref): 

541 if not datastore.isEphemeral: 

542 uri = datastore.getURIs(ref) 

543 log.debug("Retrieved non-ephemeral URI: %s", uri) 

544 return uri 

545 elif not firstEphemeralUri: 

546 firstEphemeralUri = datastore.getURIs(ref) 

547 elif predict: 

548 if not predictedUri and not datastore.isEphemeral: 

549 predictedUri = datastore.getURIs(ref, predict) 

550 elif not predictedEphemeralUri and datastore.isEphemeral: 

551 predictedEphemeralUri = datastore.getURIs(ref, predict) 

552 

553 if firstEphemeralUri: 

554 log.debug("Retrieved ephemeral URI: %s", firstEphemeralUri) 

555 return firstEphemeralUri 

556 

557 if predictedUri: 

558 log.debug("Retrieved predicted URI: %s", predictedUri) 

559 return predictedUri 

560 

561 if predictedEphemeralUri: 

562 log.debug("Retrieved predicted ephemeral URI: %s", predictedEphemeralUri) 

563 return predictedEphemeralUri 

564 

565 raise FileNotFoundError("Dataset {} not in any datastore".format(ref)) 

566 

567 def getURI(self, ref: DatasetRef, predict: bool = False) -> ResourcePath: 

568 """URI to the Dataset. 

569 

570 The returned URI is from the first datastore in the list that has 

571 the dataset with preference given to the first dataset coming from 

572 a permanent datastore. If no datastores have the dataset and prediction 

573 is allowed, the predicted URI for the first datastore in the list will 

574 be returned. 

575 

576 Parameters 

577 ---------- 

578 ref : `DatasetRef` 

579 Reference to the required Dataset. 

580 predict : `bool` 

581 If `True`, allow URIs to be returned of datasets that have not 

582 been written. 

583 

584 Returns 

585 ------- 

586 uri : `lsst.resources.ResourcePath` 

587 URI pointing to the dataset within the datastore. If the 

588 dataset does not exist in the datastore, and if ``predict`` is 

589 `True`, the URI will be a prediction and will include a URI 

590 fragment "#predicted". 

591 

592 Notes 

593 ----- 

594 If the datastore does not have entities that relate well 

595 to the concept of a URI the returned URI string will be 

596 descriptive. The returned URI is not guaranteed to be obtainable. 

597 

598 Raises 

599 ------ 

600 FileNotFoundError 

601 A URI has been requested for a dataset that does not exist and 

602 guessing is not allowed. 

603 RuntimeError 

604 Raised if a request is made for a single URI but multiple URIs 

605 are associated with this dataset. 

606 """ 

607 log.debug("Requesting URI for %s", ref) 

608 primary, components = self.getURIs(ref, predict) 

609 if primary is None or components: 609 ↛ 610line 609 didn't jump to line 610, because the condition on line 609 was never true

610 raise RuntimeError( 

611 f"Dataset ({ref}) includes distinct URIs for components. Use Datastore.getURIs() instead." 

612 ) 

613 return primary 

614 

615 def retrieveArtifacts( 

616 self, 

617 refs: Iterable[DatasetRef], 

618 destination: ResourcePath, 

619 transfer: str = "auto", 

620 preserve_path: bool = True, 

621 overwrite: bool = False, 

622 ) -> List[ResourcePath]: 

623 """Retrieve the file artifacts associated with the supplied refs. 

624 

625 Parameters 

626 ---------- 

627 refs : iterable of `DatasetRef` 

628 The datasets for which file artifacts are to be retrieved. 

629 A single ref can result in multiple files. The refs must 

630 be resolved. 

631 destination : `lsst.resources.ResourcePath` 

632 Location to write the file artifacts. 

633 transfer : `str`, optional 

634 Method to use to transfer the artifacts. Must be one of the options 

635 supported by `lsst.resources.ResourcePath.transfer_from()`. 

636 "move" is not allowed. 

637 preserve_path : `bool`, optional 

638 If `True` the full path of the file artifact within the datastore 

639 is preserved. If `False` the final file component of the path 

640 is used. 

641 overwrite : `bool`, optional 

642 If `True` allow transfers to overwrite existing files at the 

643 destination. 

644 

645 Returns 

646 ------- 

647 targets : `list` of `lsst.resources.ResourcePath` 

648 URIs of file artifacts in destination location. Order is not 

649 preserved. 

650 """ 

651 if not destination.isdir(): 651 ↛ 652line 651 didn't jump to line 652, because the condition on line 651 was never true

652 raise ValueError(f"Destination location must refer to a directory. Given {destination}") 

653 

654 # Using getURIs is not feasible since it becomes difficult to 

655 # determine the path within the datastore later on. For now 

656 # follow getURIs implementation approach. 

657 

658 pending = set(refs) 

659 

660 # There is a question as to whether an exception should be raised 

661 # early if some of the refs are missing, or whether files should be 

662 # transferred until a problem is hit. Prefer to complain up front. 

663 # Use the datastore integer as primary key. 

664 grouped_by_datastore: Dict[int, Set[DatasetRef]] = {} 

665 

666 for number, datastore in enumerate(self.datastores): 

667 if datastore.isEphemeral: 

668 # In the future we will want to distinguish in-memory from 

669 # caching datastore since using an on-disk local 

670 # cache is exactly what we should be doing. 

671 continue 

672 datastore_refs = {ref for ref in pending if datastore.exists(ref)} 

673 

674 if datastore_refs: 

675 grouped_by_datastore[number] = datastore_refs 

676 

677 # Remove these from the pending list so that we do not bother 

678 # looking for them any more. 

679 pending = pending - datastore_refs 

680 

681 if pending: 681 ↛ 682line 681 didn't jump to line 682, because the condition on line 681 was never true

682 raise RuntimeError(f"Some datasets were not found in any datastores: {pending}") 

683 

684 # Now do the transfer. 

685 targets: List[ResourcePath] = [] 

686 for number, datastore_refs in grouped_by_datastore.items(): 

687 targets.extend( 

688 self.datastores[number].retrieveArtifacts( 

689 datastore_refs, 

690 destination, 

691 transfer=transfer, 

692 preserve_path=preserve_path, 

693 overwrite=overwrite, 

694 ) 

695 ) 

696 

697 return targets 

698 

699 def remove(self, ref: DatasetRef) -> None: 

700 """Indicate to the datastore that a dataset can be removed. 

701 

702 The dataset will be removed from each datastore. The dataset is 

703 not required to exist in every child datastore. 

704 

705 Parameters 

706 ---------- 

707 ref : `DatasetRef` 

708 Reference to the required dataset. 

709 

710 Raises 

711 ------ 

712 FileNotFoundError 

713 Attempt to remove a dataset that does not exist. Raised if none 

714 of the child datastores removed the dataset. 

715 """ 

716 log.debug("Removing %s", ref) 

717 self.trash(ref, ignore_errors=False) 

718 self.emptyTrash(ignore_errors=False) 

719 

720 def forget(self, refs: Iterable[DatasetRef]) -> None: 

721 for datastore in tuple(self.datastores): 

722 datastore.forget(refs) 

723 

724 def trash(self, ref: Union[DatasetRef, Iterable[DatasetRef]], ignore_errors: bool = True) -> None: 

725 if isinstance(ref, DatasetRef): 

726 ref_label = str(ref) 

727 else: 

728 ref_label = "bulk datasets" 

729 

730 log.debug("Trashing %s", ref_label) 

731 

732 counter = 0 

733 for datastore in self.datastores: 

734 try: 

735 datastore.trash(ref, ignore_errors=ignore_errors) 

736 counter += 1 

737 except FileNotFoundError: 

738 pass 

739 

740 if counter == 0: 

741 err_msg = f"Could not mark for removal from any child datastore: {ref_label}" 

742 if ignore_errors: 742 ↛ 743line 742 didn't jump to line 743, because the condition on line 742 was never true

743 log.warning(err_msg) 

744 else: 

745 raise FileNotFoundError(err_msg) 

746 

747 def emptyTrash(self, ignore_errors: bool = True) -> None: 

748 for datastore in self.datastores: 

749 datastore.emptyTrash(ignore_errors=ignore_errors) 

750 

751 def transfer(self, inputDatastore: Datastore, ref: DatasetRef) -> None: 

752 """Retrieve a dataset from an input `Datastore`, 

753 and store the result in this `Datastore`. 

754 

755 Parameters 

756 ---------- 

757 inputDatastore : `Datastore` 

758 The external `Datastore` from which to retreive the Dataset. 

759 ref : `DatasetRef` 

760 Reference to the required dataset in the input data store. 

761 

762 Returns 

763 ------- 

764 results : `list` 

765 List containing the return value from the ``put()`` to each 

766 child datastore. 

767 """ 

768 assert inputDatastore is not self # unless we want it for renames? 

769 inMemoryDataset = inputDatastore.get(ref) 

770 self.put(inMemoryDataset, ref) 

771 

772 def validateConfiguration( 

773 self, entities: Iterable[Union[DatasetRef, DatasetType, StorageClass]], logFailures: bool = False 

774 ) -> None: 

775 """Validate some of the configuration for this datastore. 

776 

777 Parameters 

778 ---------- 

779 entities : iterable of `DatasetRef`, `DatasetType`, or `StorageClass` 

780 Entities to test against this configuration. Can be differing 

781 types. 

782 logFailures : `bool`, optional 

783 If `True`, output a log message for every validation error 

784 detected. 

785 

786 Raises 

787 ------ 

788 DatastoreValidationError 

789 Raised if there is a validation problem with a configuration. 

790 All the problems are reported in a single exception. 

791 

792 Notes 

793 ----- 

794 This method checks each datastore in turn. 

795 """ 

796 

797 # Need to catch each of the datastore outputs and ensure that 

798 # all are tested. 

799 failures = [] 

800 for datastore in self.datastores: 

801 try: 

802 datastore.validateConfiguration(entities, logFailures=logFailures) 

803 except DatastoreValidationError as e: 

804 if logFailures: 804 ↛ 806line 804 didn't jump to line 806, because the condition on line 804 was never false

805 log.critical("Datastore %s failed validation", datastore.name) 

806 failures.append(f"Datastore {self.name}: {e}") 

807 

808 if failures: 

809 msg = ";\n".join(failures) 

810 raise DatastoreValidationError(msg) 

811 

812 def validateKey(self, lookupKey: LookupKey, entity: Union[DatasetRef, DatasetType, StorageClass]) -> None: 

813 # Docstring is inherited from base class 

814 failures = [] 

815 for datastore in self.datastores: 

816 try: 

817 datastore.validateKey(lookupKey, entity) 

818 except DatastoreValidationError as e: 

819 failures.append(f"Datastore {self.name}: {e}") 

820 

821 if failures: 

822 msg = ";\n".join(failures) 

823 raise DatastoreValidationError(msg) 

824 

825 def getLookupKeys(self) -> Set[LookupKey]: 

826 # Docstring is inherited from base class 

827 keys = set() 

828 for datastore in self.datastores: 

829 keys.update(datastore.getLookupKeys()) 

830 

831 keys.update(self.constraints.getLookupKeys()) 

832 for p in self.datastoreConstraints: 

833 if p is not None: 833 ↛ 834line 833 didn't jump to line 834, because the condition on line 833 was never true

834 keys.update(p.getLookupKeys()) 

835 

836 return keys 

837 

838 def needs_expanded_data_ids( 

839 self, 

840 transfer: Optional[str], 

841 entity: Optional[Union[DatasetRef, DatasetType, StorageClass]] = None, 

842 ) -> bool: 

843 # Docstring inherited. 

844 # We can't safely use `self.datastoreConstraints` with `entity` to 

845 # check whether a child datastore would even want to ingest this 

846 # dataset, because we don't want to filter out datastores that might 

847 # need an expanded data ID based in incomplete information (e.g. we 

848 # pass a StorageClass, but the constraint dispatches on DatasetType). 

849 # So we pessimistically check if any datastore would need an expanded 

850 # data ID for this transfer mode. 

851 return any(datastore.needs_expanded_data_ids(transfer) for datastore in self.datastores) 851 ↛ exitline 851 didn't finish the generator expression on line 851