Coverage for python/lsst/daf/butler/datastores/chainedDatastore.py: 90%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

300 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24"""Chained datastore.""" 

25 

26__all__ = ("ChainedDatastore",) 

27 

28import itertools 

29import logging 

30import time 

31import warnings 

32from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Mapping, Optional, Sequence, Set, Tuple, Union 

33 

34from lsst.daf.butler import ( 

35 Constraints, 

36 DatasetRef, 

37 DatasetTypeNotSupportedError, 

38 Datastore, 

39 DatastoreConfig, 

40 DatastoreRecordData, 

41 DatastoreValidationError, 

42 FileDataset, 

43) 

44from lsst.resources import ResourcePath 

45from lsst.utils import doImportType 

46 

47if TYPE_CHECKING: 47 ↛ 48line 47 didn't jump to line 48, because the condition on line 47 was never true

48 from lsst.daf.butler import Config, DatasetType, LookupKey, StorageClass 

49 from lsst.daf.butler.registry.interfaces import DatasetIdRef, DatastoreRegistryBridgeManager 

50 

51log = logging.getLogger(__name__) 

52 

53 

54class _IngestPrepData(Datastore.IngestPrepData): 

55 """Helper class for ChainedDatastore ingest implementation. 

56 

57 Parameters 

58 ---------- 

59 children : `list` of `tuple` 

60 Pairs of `Datastore`, `IngestPrepData` for all child datastores. 

61 """ 

62 

63 def __init__(self, children: List[Tuple[Datastore, Datastore.IngestPrepData]]): 

64 super().__init__(itertools.chain.from_iterable(data.refs.values() for _, data in children)) 

65 self.children = children 

66 

67 

68class ChainedDatastore(Datastore): 

69 """Chained Datastores to allow read and writes from multiple datastores. 

70 

71 A ChainedDatastore is configured with multiple datastore configurations. 

72 A ``put()`` is always sent to each datastore. A ``get()`` 

73 operation is sent to each datastore in turn and the first datastore 

74 to return a valid dataset is used. 

75 

76 Parameters 

77 ---------- 

78 config : `DatastoreConfig` or `str` 

79 Configuration. This configuration must include a ``datastores`` field 

80 as a sequence of datastore configurations. The order in this sequence 

81 indicates the order to use for read operations. 

82 bridgeManager : `DatastoreRegistryBridgeManager` 

83 Object that manages the interface between `Registry` and datastores. 

84 butlerRoot : `str`, optional 

85 New datastore root to use to override the configuration value. This 

86 root is sent to each child datastore. 

87 

88 Notes 

89 ----- 

90 ChainedDatastore never supports `None` or `"move"` as an `ingest` transfer 

91 mode. It supports `"copy"`, `"symlink"`, `"relsymlink"` 

92 and `"hardlink"` if and only if all its child datastores do. 

93 """ 

94 

95 defaultConfigFile = "datastores/chainedDatastore.yaml" 

96 """Path to configuration defaults. Accessed within the ``configs`` resource 

97 or relative to a search path. Can be None if no defaults specified. 

98 """ 

99 

100 containerKey = "datastores" 

101 """Key to specify where child datastores are configured.""" 

102 

103 datastores: List[Datastore] 

104 """All the child datastores known to this datastore.""" 

105 

106 datastoreConstraints: Sequence[Optional[Constraints]] 

107 """Constraints to be applied to each of the child datastores.""" 

108 

109 @classmethod 

110 def setConfigRoot(cls, root: str, config: Config, full: Config, overwrite: bool = True) -> None: 

111 """Set any filesystem-dependent config options for child Datastores to 

112 be appropriate for a new empty repository with the given root. 

113 

114 Parameters 

115 ---------- 

116 root : `str` 

117 Filesystem path to the root of the data repository. 

118 config : `Config` 

119 A `Config` to update. Only the subset understood by 

120 this component will be updated. Will not expand 

121 defaults. 

122 full : `Config` 

123 A complete config with all defaults expanded that can be 

124 converted to a `DatastoreConfig`. Read-only and will not be 

125 modified by this method. 

126 Repository-specific options that should not be obtained 

127 from defaults when Butler instances are constructed 

128 should be copied from ``full`` to ``config``. 

129 overwrite : `bool`, optional 

130 If `False`, do not modify a value in ``config`` if the value 

131 already exists. Default is always to overwrite with the provided 

132 ``root``. 

133 

134 Notes 

135 ----- 

136 If a keyword is explicitly defined in the supplied ``config`` it 

137 will not be overridden by this method if ``overwrite`` is `False`. 

138 This allows explicit values set in external configs to be retained. 

139 """ 

140 

141 # Extract the part of the config we care about updating 

142 datastoreConfig = DatastoreConfig(config, mergeDefaults=False) 

143 

144 # And the subset of the full config that we can use for reference. 

145 # Do not bother with defaults because we are told this already has 

146 # them. 

147 fullDatastoreConfig = DatastoreConfig(full, mergeDefaults=False) 

148 

149 # Loop over each datastore config and pass the subsets to the 

150 # child datastores to process. 

151 

152 containerKey = cls.containerKey 

153 for idx, (child, fullChild) in enumerate( 

154 zip(datastoreConfig[containerKey], fullDatastoreConfig[containerKey]) 

155 ): 

156 childConfig = DatastoreConfig(child, mergeDefaults=False) 

157 fullChildConfig = DatastoreConfig(fullChild, mergeDefaults=False) 

158 datastoreClass = doImportType(fullChildConfig["cls"]) 

159 if not issubclass(datastoreClass, Datastore): 159 ↛ 160line 159 didn't jump to line 160, because the condition on line 159 was never true

160 raise TypeError(f"Imported child class {fullChildConfig['cls']} is not a Datastore") 

161 newroot = "{}/{}_{}".format(root, datastoreClass.__qualname__, idx) 

162 datastoreClass.setConfigRoot(newroot, childConfig, fullChildConfig, overwrite=overwrite) 

163 

164 # Reattach to parent 

165 datastoreConfig[containerKey, idx] = childConfig 

166 

167 # Reattach modified datastore config to parent 

168 # If this has a datastore key we attach there, otherwise we assume 

169 # this information goes at the top of the config hierarchy. 

170 if DatastoreConfig.component in config: 

171 config[DatastoreConfig.component] = datastoreConfig 

172 else: 

173 config.update(datastoreConfig) 

174 

175 return 

176 

177 def __init__( 

178 self, 

179 config: Union[Config, str], 

180 bridgeManager: DatastoreRegistryBridgeManager, 

181 butlerRoot: str = None, 

182 ): 

183 super().__init__(config, bridgeManager) 

184 

185 # Scan for child datastores and instantiate them with the same registry 

186 self.datastores = [] 

187 for c in self.config["datastores"]: 

188 c = DatastoreConfig(c) 

189 datastoreType = doImportType(c["cls"]) 

190 if not issubclass(datastoreType, Datastore): 190 ↛ 191line 190 didn't jump to line 191, because the condition on line 190 was never true

191 raise TypeError(f"Imported child class {c['cls']} is not a Datastore") 

192 datastore = datastoreType(c, bridgeManager, butlerRoot=butlerRoot) 

193 log.debug("Creating child datastore %s", datastore.name) 

194 self.datastores.append(datastore) 

195 

196 # Name ourself based on our children 

197 if self.datastores: 197 ↛ 202line 197 didn't jump to line 202, because the condition on line 197 was never false

198 # We must set the names explicitly 

199 self._names = [d.name for d in self.datastores] 

200 childNames = ",".join(self.names) 

201 else: 

202 childNames = "(empty@{})".format(time.time()) 

203 self._names = [childNames] 

204 self.name = "{}[{}]".format(type(self).__qualname__, childNames) 

205 

206 # We declare we are ephemeral if all our child datastores declare 

207 # they are ephemeral 

208 isEphemeral = True 

209 for d in self.datastores: 

210 if not d.isEphemeral: 

211 isEphemeral = False 

212 break 

213 self.isEphemeral = isEphemeral 

214 

215 # per-datastore override constraints 

216 if "datastore_constraints" in self.config: 

217 overrides = self.config["datastore_constraints"] 

218 

219 if len(overrides) != len(self.datastores): 219 ↛ 220line 219 didn't jump to line 220, because the condition on line 219 was never true

220 raise DatastoreValidationError( 

221 f"Number of registered datastores ({len(self.datastores)})" 

222 " differs from number of constraints overrides" 

223 f" {len(overrides)}" 

224 ) 

225 

226 self.datastoreConstraints = [ 

227 Constraints(c.get("constraints"), universe=bridgeManager.universe) for c in overrides 

228 ] 

229 

230 else: 

231 self.datastoreConstraints = (None,) * len(self.datastores) 

232 

233 log.debug("Created %s (%s)", self.name, ("ephemeral" if self.isEphemeral else "permanent")) 

234 

235 @property 

236 def names(self) -> Tuple[str, ...]: 

237 return tuple(self._names) 

238 

239 def __str__(self) -> str: 

240 chainName = ", ".join(str(ds) for ds in self.datastores) 

241 return chainName 

242 

243 def knows(self, ref: DatasetRef) -> bool: 

244 """Check if the dataset is known to any of the datastores. 

245 

246 Does not check for existence of any artifact. 

247 

248 Parameters 

249 ---------- 

250 ref : `DatasetRef` 

251 Reference to the required dataset. 

252 

253 Returns 

254 ------- 

255 exists : `bool` 

256 `True` if the dataset is known to the datastore. 

257 """ 

258 for datastore in self.datastores: 

259 if datastore.knows(ref): 

260 log.debug("%s known to datastore %s", ref, datastore.name) 

261 return True 

262 return False 

263 

264 def mexists( 

265 self, refs: Iterable[DatasetRef], artifact_existence: Optional[Dict[ResourcePath, bool]] = None 

266 ) -> Dict[DatasetRef, bool]: 

267 """Check the existence of multiple datasets at once. 

268 

269 Parameters 

270 ---------- 

271 refs : iterable of `DatasetRef` 

272 The datasets to be checked. 

273 artifact_existence : `dict` [`lsst.resources.ResourcePath`, `bool`] 

274 Optional mapping of datastore artifact to existence. Updated by 

275 this method with details of all artifacts tested. Can be `None` 

276 if the caller is not interested. 

277 

278 Returns 

279 ------- 

280 existence : `dict` of [`DatasetRef`, `bool`] 

281 Mapping from dataset to boolean indicating existence in any 

282 of the child datastores. 

283 """ 

284 dataset_existence: Dict[DatasetRef, bool] = {} 

285 for datastore in self.datastores: 

286 dataset_existence.update(datastore.mexists(refs, artifact_existence=artifact_existence)) 

287 

288 # For next datastore no point asking about ones we know 

289 # exist already. No special exemption for ephemeral datastores. 

290 refs = [ref for ref, exists in dataset_existence.items() if not exists] 

291 

292 return dataset_existence 

293 

294 def exists(self, ref: DatasetRef) -> bool: 

295 """Check if the dataset exists in one of the datastores. 

296 

297 Parameters 

298 ---------- 

299 ref : `DatasetRef` 

300 Reference to the required dataset. 

301 

302 Returns 

303 ------- 

304 exists : `bool` 

305 `True` if the entity exists in one of the child datastores. 

306 """ 

307 for datastore in self.datastores: 

308 if datastore.exists(ref): 

309 log.debug("Found %s in datastore %s", ref, datastore.name) 

310 return True 

311 return False 

312 

313 def get(self, ref: DatasetRef, parameters: Optional[Mapping[str, Any]] = None) -> Any: 

314 """Load an InMemoryDataset from the store. 

315 

316 The dataset is returned from the first datastore that has 

317 the dataset. 

318 

319 Parameters 

320 ---------- 

321 ref : `DatasetRef` 

322 Reference to the required Dataset. 

323 parameters : `dict` 

324 `StorageClass`-specific parameters that specify, for example, 

325 a slice of the dataset to be loaded. 

326 

327 Returns 

328 ------- 

329 inMemoryDataset : `object` 

330 Requested dataset or slice thereof as an InMemoryDataset. 

331 

332 Raises 

333 ------ 

334 FileNotFoundError 

335 Requested dataset can not be retrieved. 

336 TypeError 

337 Return value from formatter has unexpected type. 

338 ValueError 

339 Formatter failed to process the dataset. 

340 """ 

341 

342 for datastore in self.datastores: 

343 try: 

344 inMemoryObject = datastore.get(ref, parameters) 

345 log.debug("Found dataset %s in datastore %s", ref, datastore.name) 

346 return inMemoryObject 

347 except FileNotFoundError: 

348 pass 

349 

350 raise FileNotFoundError("Dataset {} could not be found in any of the datastores".format(ref)) 

351 

352 def put(self, inMemoryDataset: Any, ref: DatasetRef) -> None: 

353 """Write a InMemoryDataset with a given `DatasetRef` to each 

354 datastore. 

355 

356 The put() to child datastores can fail with 

357 `DatasetTypeNotSupportedError`. The put() for this datastore will be 

358 deemed to have succeeded so long as at least one child datastore 

359 accepted the inMemoryDataset. 

360 

361 Parameters 

362 ---------- 

363 inMemoryDataset : `object` 

364 The dataset to store. 

365 ref : `DatasetRef` 

366 Reference to the associated Dataset. 

367 

368 Raises 

369 ------ 

370 TypeError 

371 Supplied object and storage class are inconsistent. 

372 DatasetTypeNotSupportedError 

373 All datastores reported `DatasetTypeNotSupportedError`. 

374 """ 

375 log.debug("Put %s", ref) 

376 

377 # Confirm that we can accept this dataset 

378 if not self.constraints.isAcceptable(ref): 

379 # Raise rather than use boolean return value. 

380 raise DatasetTypeNotSupportedError( 

381 f"Dataset {ref} has been rejected by this datastore via configuration." 

382 ) 

383 

384 isPermanent = False 

385 nsuccess = 0 

386 npermanent = 0 

387 nephemeral = 0 

388 for datastore, constraints in zip(self.datastores, self.datastoreConstraints): 

389 if constraints is not None and not constraints.isAcceptable(ref): 

390 log.debug("Datastore %s skipping put via configuration for ref %s", datastore.name, ref) 

391 continue 

392 

393 if datastore.isEphemeral: 

394 nephemeral += 1 

395 else: 

396 npermanent += 1 

397 try: 

398 datastore.put(inMemoryDataset, ref) 

399 nsuccess += 1 

400 if not datastore.isEphemeral: 

401 isPermanent = True 

402 except DatasetTypeNotSupportedError: 

403 pass 

404 

405 if nsuccess == 0: 

406 raise DatasetTypeNotSupportedError(f"None of the chained datastores supported ref {ref}") 

407 

408 if not isPermanent and npermanent > 0: 408 ↛ 409line 408 didn't jump to line 409, because the condition on line 408 was never true

409 warnings.warn(f"Put of {ref} only succeeded in ephemeral databases", stacklevel=2) 

410 

411 if self._transaction is not None: 

412 self._transaction.registerUndo("put", self.remove, ref) 

413 

414 def _overrideTransferMode(self, *datasets: Any, transfer: Optional[str] = None) -> Optional[str]: 

415 # Docstring inherited from base class. 

416 if transfer != "auto": 

417 return transfer 

418 # Ask each datastore what they think auto means 

419 transfers = {d._overrideTransferMode(*datasets, transfer=transfer) for d in self.datastores} 

420 

421 # Remove any untranslated "auto" values 

422 transfers.discard(transfer) 

423 

424 if len(transfers) == 1: 424 ↛ 425line 424 didn't jump to line 425, because the condition on line 424 was never true

425 return transfers.pop() 

426 if not transfers: 426 ↛ 430line 426 didn't jump to line 430, because the condition on line 426 was never false

427 # Everything reported "auto" 

428 return transfer 

429 

430 raise RuntimeError( 

431 "Chained datastore does not yet support different transfer modes" 

432 f" from 'auto' in each child datastore (wanted {transfers})" 

433 ) 

434 

435 def _prepIngest(self, *datasets: FileDataset, transfer: Optional[str] = None) -> _IngestPrepData: 

436 # Docstring inherited from Datastore._prepIngest. 

437 if transfer is None or transfer == "move": 

438 raise NotImplementedError("ChainedDatastore does not support transfer=None or transfer='move'.") 

439 

440 def isDatasetAcceptable(dataset: FileDataset, *, name: str, constraints: Constraints) -> bool: 

441 acceptable = [ref for ref in dataset.refs if constraints.isAcceptable(ref)] 

442 if not acceptable: 

443 log.debug( 

444 "Datastore %s skipping ingest via configuration for refs %s", 

445 name, 

446 ", ".join(str(ref) for ref in dataset.refs), 

447 ) 

448 return False 

449 else: 

450 return True 

451 

452 # Filter down to just datasets the chained datastore's own 

453 # configuration accepts. 

454 okForParent: List[FileDataset] = [ 

455 dataset 

456 for dataset in datasets 

457 if isDatasetAcceptable(dataset, name=self.name, constraints=self.constraints) 

458 ] 

459 

460 # Iterate over nested datastores and call _prepIngest on each. 

461 # Save the results to a list: 

462 children: List[Tuple[Datastore, Datastore.IngestPrepData]] = [] 

463 # ...and remember whether all of the failures are due to 

464 # NotImplementedError being raised. 

465 allFailuresAreNotImplementedError = True 

466 for datastore, constraints in zip(self.datastores, self.datastoreConstraints): 

467 okForChild: List[FileDataset] 

468 if constraints is not None: 

469 okForChild = [ 

470 dataset 

471 for dataset in okForParent 

472 if isDatasetAcceptable(dataset, name=datastore.name, constraints=constraints) 

473 ] 

474 else: 

475 okForChild = okForParent 

476 try: 

477 prepDataForChild = datastore._prepIngest(*okForChild, transfer=transfer) 

478 except NotImplementedError: 

479 log.debug( 

480 "Skipping ingest for datastore %s because transfer mode %s is not supported.", 

481 datastore.name, 

482 transfer, 

483 ) 

484 continue 

485 allFailuresAreNotImplementedError = False 

486 children.append((datastore, prepDataForChild)) 

487 if allFailuresAreNotImplementedError: 

488 raise NotImplementedError(f"No child datastore supports transfer mode {transfer}.") 

489 return _IngestPrepData(children=children) 

490 

491 def _finishIngest( 

492 self, 

493 prepData: _IngestPrepData, 

494 *, 

495 transfer: Optional[str] = None, 

496 record_validation_info: bool = True, 

497 ) -> None: 

498 # Docstring inherited from Datastore._finishIngest. 

499 for datastore, prepDataForChild in prepData.children: 

500 datastore._finishIngest( 

501 prepDataForChild, transfer=transfer, record_validation_info=record_validation_info 

502 ) 

503 

504 def getURIs( 

505 self, ref: DatasetRef, predict: bool = False 

506 ) -> Tuple[Optional[ResourcePath], Dict[str, ResourcePath]]: 

507 """Return URIs associated with dataset. 

508 

509 Parameters 

510 ---------- 

511 ref : `DatasetRef` 

512 Reference to the required dataset. 

513 predict : `bool`, optional 

514 If the datastore does not know about the dataset, should it 

515 return a predicted URI or not? 

516 

517 Returns 

518 ------- 

519 primary : `lsst.resources.ResourcePath` 

520 The URI to the primary artifact associated with this dataset. 

521 If the dataset was disassembled within the datastore this 

522 may be `None`. 

523 components : `dict` 

524 URIs to any components associated with the dataset artifact. 

525 Can be empty if there are no components. 

526 

527 Notes 

528 ----- 

529 The returned URI is from the first datastore in the list that has 

530 the dataset with preference given to the first dataset coming from 

531 a permanent datastore. If no datastores have the dataset and prediction 

532 is allowed, the predicted URI for the first datastore in the list will 

533 be returned. 

534 """ 

535 DatastoreURIs = Tuple[Optional[ResourcePath], Dict[str, ResourcePath]] 

536 log.debug("Requesting URIs for %s", ref) 

537 predictedUri: Optional[DatastoreURIs] = None 

538 predictedEphemeralUri: Optional[DatastoreURIs] = None 

539 firstEphemeralUri: Optional[DatastoreURIs] = None 

540 for datastore in self.datastores: 

541 if datastore.exists(ref): 

542 if not datastore.isEphemeral: 

543 uri = datastore.getURIs(ref) 

544 log.debug("Retrieved non-ephemeral URI: %s", uri) 

545 return uri 

546 elif not firstEphemeralUri: 

547 firstEphemeralUri = datastore.getURIs(ref) 

548 elif predict: 

549 if not predictedUri and not datastore.isEphemeral: 

550 predictedUri = datastore.getURIs(ref, predict) 

551 elif not predictedEphemeralUri and datastore.isEphemeral: 

552 predictedEphemeralUri = datastore.getURIs(ref, predict) 

553 

554 if firstEphemeralUri: 

555 log.debug("Retrieved ephemeral URI: %s", firstEphemeralUri) 

556 return firstEphemeralUri 

557 

558 if predictedUri: 

559 log.debug("Retrieved predicted URI: %s", predictedUri) 

560 return predictedUri 

561 

562 if predictedEphemeralUri: 

563 log.debug("Retrieved predicted ephemeral URI: %s", predictedEphemeralUri) 

564 return predictedEphemeralUri 

565 

566 raise FileNotFoundError("Dataset {} not in any datastore".format(ref)) 

567 

568 def getURI(self, ref: DatasetRef, predict: bool = False) -> ResourcePath: 

569 """URI to the Dataset. 

570 

571 The returned URI is from the first datastore in the list that has 

572 the dataset with preference given to the first dataset coming from 

573 a permanent datastore. If no datastores have the dataset and prediction 

574 is allowed, the predicted URI for the first datastore in the list will 

575 be returned. 

576 

577 Parameters 

578 ---------- 

579 ref : `DatasetRef` 

580 Reference to the required Dataset. 

581 predict : `bool` 

582 If `True`, allow URIs to be returned of datasets that have not 

583 been written. 

584 

585 Returns 

586 ------- 

587 uri : `lsst.resources.ResourcePath` 

588 URI pointing to the dataset within the datastore. If the 

589 dataset does not exist in the datastore, and if ``predict`` is 

590 `True`, the URI will be a prediction and will include a URI 

591 fragment "#predicted". 

592 

593 Notes 

594 ----- 

595 If the datastore does not have entities that relate well 

596 to the concept of a URI the returned URI string will be 

597 descriptive. The returned URI is not guaranteed to be obtainable. 

598 

599 Raises 

600 ------ 

601 FileNotFoundError 

602 A URI has been requested for a dataset that does not exist and 

603 guessing is not allowed. 

604 RuntimeError 

605 Raised if a request is made for a single URI but multiple URIs 

606 are associated with this dataset. 

607 """ 

608 log.debug("Requesting URI for %s", ref) 

609 primary, components = self.getURIs(ref, predict) 

610 if primary is None or components: 610 ↛ 611line 610 didn't jump to line 611, because the condition on line 610 was never true

611 raise RuntimeError( 

612 f"Dataset ({ref}) includes distinct URIs for components. Use Datastore.getURIs() instead." 

613 ) 

614 return primary 

615 

616 def retrieveArtifacts( 

617 self, 

618 refs: Iterable[DatasetRef], 

619 destination: ResourcePath, 

620 transfer: str = "auto", 

621 preserve_path: bool = True, 

622 overwrite: bool = False, 

623 ) -> List[ResourcePath]: 

624 """Retrieve the file artifacts associated with the supplied refs. 

625 

626 Parameters 

627 ---------- 

628 refs : iterable of `DatasetRef` 

629 The datasets for which file artifacts are to be retrieved. 

630 A single ref can result in multiple files. The refs must 

631 be resolved. 

632 destination : `lsst.resources.ResourcePath` 

633 Location to write the file artifacts. 

634 transfer : `str`, optional 

635 Method to use to transfer the artifacts. Must be one of the options 

636 supported by `lsst.resources.ResourcePath.transfer_from()`. 

637 "move" is not allowed. 

638 preserve_path : `bool`, optional 

639 If `True` the full path of the file artifact within the datastore 

640 is preserved. If `False` the final file component of the path 

641 is used. 

642 overwrite : `bool`, optional 

643 If `True` allow transfers to overwrite existing files at the 

644 destination. 

645 

646 Returns 

647 ------- 

648 targets : `list` of `lsst.resources.ResourcePath` 

649 URIs of file artifacts in destination location. Order is not 

650 preserved. 

651 """ 

652 if not destination.isdir(): 652 ↛ 653line 652 didn't jump to line 653, because the condition on line 652 was never true

653 raise ValueError(f"Destination location must refer to a directory. Given {destination}") 

654 

655 # Using getURIs is not feasible since it becomes difficult to 

656 # determine the path within the datastore later on. For now 

657 # follow getURIs implementation approach. 

658 

659 pending = set(refs) 

660 

661 # There is a question as to whether an exception should be raised 

662 # early if some of the refs are missing, or whether files should be 

663 # transferred until a problem is hit. Prefer to complain up front. 

664 # Use the datastore integer as primary key. 

665 grouped_by_datastore: Dict[int, Set[DatasetRef]] = {} 

666 

667 for number, datastore in enumerate(self.datastores): 

668 if datastore.isEphemeral: 

669 # In the future we will want to distinguish in-memory from 

670 # caching datastore since using an on-disk local 

671 # cache is exactly what we should be doing. 

672 continue 

673 datastore_refs = {ref for ref in pending if datastore.exists(ref)} 

674 

675 if datastore_refs: 

676 grouped_by_datastore[number] = datastore_refs 

677 

678 # Remove these from the pending list so that we do not bother 

679 # looking for them any more. 

680 pending = pending - datastore_refs 

681 

682 if pending: 682 ↛ 683line 682 didn't jump to line 683, because the condition on line 682 was never true

683 raise RuntimeError(f"Some datasets were not found in any datastores: {pending}") 

684 

685 # Now do the transfer. 

686 targets: List[ResourcePath] = [] 

687 for number, datastore_refs in grouped_by_datastore.items(): 

688 targets.extend( 

689 self.datastores[number].retrieveArtifacts( 

690 datastore_refs, 

691 destination, 

692 transfer=transfer, 

693 preserve_path=preserve_path, 

694 overwrite=overwrite, 

695 ) 

696 ) 

697 

698 return targets 

699 

700 def remove(self, ref: DatasetRef) -> None: 

701 """Indicate to the datastore that a dataset can be removed. 

702 

703 The dataset will be removed from each datastore. The dataset is 

704 not required to exist in every child datastore. 

705 

706 Parameters 

707 ---------- 

708 ref : `DatasetRef` 

709 Reference to the required dataset. 

710 

711 Raises 

712 ------ 

713 FileNotFoundError 

714 Attempt to remove a dataset that does not exist. Raised if none 

715 of the child datastores removed the dataset. 

716 """ 

717 log.debug("Removing %s", ref) 

718 self.trash(ref, ignore_errors=False) 

719 self.emptyTrash(ignore_errors=False) 

720 

721 def forget(self, refs: Iterable[DatasetRef]) -> None: 

722 for datastore in tuple(self.datastores): 

723 datastore.forget(refs) 

724 

725 def trash(self, ref: Union[DatasetRef, Iterable[DatasetRef]], ignore_errors: bool = True) -> None: 

726 if isinstance(ref, DatasetRef): 

727 ref_label = str(ref) 

728 else: 

729 ref_label = "bulk datasets" 

730 

731 log.debug("Trashing %s", ref_label) 

732 

733 counter = 0 

734 for datastore in self.datastores: 

735 try: 

736 datastore.trash(ref, ignore_errors=ignore_errors) 

737 counter += 1 

738 except FileNotFoundError: 

739 pass 

740 

741 if counter == 0: 

742 err_msg = f"Could not mark for removal from any child datastore: {ref_label}" 

743 if ignore_errors: 743 ↛ 744line 743 didn't jump to line 744, because the condition on line 743 was never true

744 log.warning(err_msg) 

745 else: 

746 raise FileNotFoundError(err_msg) 

747 

748 def emptyTrash(self, ignore_errors: bool = True) -> None: 

749 for datastore in self.datastores: 

750 datastore.emptyTrash(ignore_errors=ignore_errors) 

751 

752 def transfer(self, inputDatastore: Datastore, ref: DatasetRef) -> None: 

753 """Retrieve a dataset from an input `Datastore`, 

754 and store the result in this `Datastore`. 

755 

756 Parameters 

757 ---------- 

758 inputDatastore : `Datastore` 

759 The external `Datastore` from which to retreive the Dataset. 

760 ref : `DatasetRef` 

761 Reference to the required dataset in the input data store. 

762 

763 Returns 

764 ------- 

765 results : `list` 

766 List containing the return value from the ``put()`` to each 

767 child datastore. 

768 """ 

769 assert inputDatastore is not self # unless we want it for renames? 

770 inMemoryDataset = inputDatastore.get(ref) 

771 self.put(inMemoryDataset, ref) 

772 

773 def validateConfiguration( 

774 self, entities: Iterable[Union[DatasetRef, DatasetType, StorageClass]], logFailures: bool = False 

775 ) -> None: 

776 """Validate some of the configuration for this datastore. 

777 

778 Parameters 

779 ---------- 

780 entities : iterable of `DatasetRef`, `DatasetType`, or `StorageClass` 

781 Entities to test against this configuration. Can be differing 

782 types. 

783 logFailures : `bool`, optional 

784 If `True`, output a log message for every validation error 

785 detected. 

786 

787 Raises 

788 ------ 

789 DatastoreValidationError 

790 Raised if there is a validation problem with a configuration. 

791 All the problems are reported in a single exception. 

792 

793 Notes 

794 ----- 

795 This method checks each datastore in turn. 

796 """ 

797 

798 # Need to catch each of the datastore outputs and ensure that 

799 # all are tested. 

800 failures = [] 

801 for datastore in self.datastores: 

802 try: 

803 datastore.validateConfiguration(entities, logFailures=logFailures) 

804 except DatastoreValidationError as e: 

805 if logFailures: 805 ↛ 807line 805 didn't jump to line 807, because the condition on line 805 was never false

806 log.critical("Datastore %s failed validation", datastore.name) 

807 failures.append(f"Datastore {self.name}: {e}") 

808 

809 if failures: 

810 msg = ";\n".join(failures) 

811 raise DatastoreValidationError(msg) 

812 

813 def validateKey(self, lookupKey: LookupKey, entity: Union[DatasetRef, DatasetType, StorageClass]) -> None: 

814 # Docstring is inherited from base class 

815 failures = [] 

816 for datastore in self.datastores: 

817 try: 

818 datastore.validateKey(lookupKey, entity) 

819 except DatastoreValidationError as e: 

820 failures.append(f"Datastore {self.name}: {e}") 

821 

822 if failures: 

823 msg = ";\n".join(failures) 

824 raise DatastoreValidationError(msg) 

825 

826 def getLookupKeys(self) -> Set[LookupKey]: 

827 # Docstring is inherited from base class 

828 keys = set() 

829 for datastore in self.datastores: 

830 keys.update(datastore.getLookupKeys()) 

831 

832 keys.update(self.constraints.getLookupKeys()) 

833 for p in self.datastoreConstraints: 

834 if p is not None: 834 ↛ 835line 834 didn't jump to line 835, because the condition on line 834 was never true

835 keys.update(p.getLookupKeys()) 

836 

837 return keys 

838 

839 def needs_expanded_data_ids( 

840 self, 

841 transfer: Optional[str], 

842 entity: Optional[Union[DatasetRef, DatasetType, StorageClass]] = None, 

843 ) -> bool: 

844 # Docstring inherited. 

845 # We can't safely use `self.datastoreConstraints` with `entity` to 

846 # check whether a child datastore would even want to ingest this 

847 # dataset, because we don't want to filter out datastores that might 

848 # need an expanded data ID based in incomplete information (e.g. we 

849 # pass a StorageClass, but the constraint dispatches on DatasetType). 

850 # So we pessimistically check if any datastore would need an expanded 

851 # data ID for this transfer mode. 

852 return any(datastore.needs_expanded_data_ids(transfer) for datastore in self.datastores) 852 ↛ exitline 852 didn't finish the generator expression on line 852

853 

854 def import_records(self, data: Mapping[str, DatastoreRecordData]) -> None: 

855 # Docstring inherited from the base class. 

856 

857 for datastore in self.datastores: 

858 datastore.import_records(data) 

859 

860 def export_records(self, refs: Iterable[DatasetIdRef]) -> Mapping[str, DatastoreRecordData]: 

861 # Docstring inherited from the base class. 

862 

863 all_records: Dict[str, DatastoreRecordData] = {} 

864 

865 # Merge all sub-datastore records into one structure 

866 for datastore in self.datastores: 

867 sub_records = datastore.export_records(refs) 

868 for name, record_data in sub_records.items(): 

869 # All datastore names must be unique in a chain. 

870 if name in all_records: 870 ↛ 871line 870 didn't jump to line 871, because the condition on line 870 was never true

871 raise ValueError("Non-unique datastore name found in datastore {datastore}") 

872 all_records[name] = record_data 

873 

874 return all_records