Coverage for python/lsst/daf/butler/datastores/chainedDatastore.py: 91%

319 statements  

« prev     ^ index     » next       coverage.py v6.4.1, created at 2022-06-23 02:26 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24"""Chained datastore.""" 

25 

26__all__ = ("ChainedDatastore",) 

27 

28import itertools 

29import logging 

30import time 

31import warnings 

32from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Mapping, Optional, Sequence, Set, Tuple, Union 

33 

34from lsst.daf.butler import ( 

35 Constraints, 

36 DatasetRef, 

37 DatasetRefURIs, 

38 DatasetTypeNotSupportedError, 

39 Datastore, 

40 DatastoreConfig, 

41 DatastoreRecordData, 

42 DatastoreValidationError, 

43 FileDataset, 

44) 

45from lsst.resources import ResourcePath 

46from lsst.utils import doImportType 

47 

48if TYPE_CHECKING: 48 ↛ 49line 48 didn't jump to line 49, because the condition on line 48 was never true

49 from lsst.daf.butler import Config, DatasetType, LookupKey, StorageClass 

50 from lsst.daf.butler.registry.interfaces import DatasetIdRef, DatastoreRegistryBridgeManager 

51 

52log = logging.getLogger(__name__) 

53 

54 

55class _IngestPrepData(Datastore.IngestPrepData): 

56 """Helper class for ChainedDatastore ingest implementation. 

57 

58 Parameters 

59 ---------- 

60 children : `list` of `tuple` 

61 Pairs of `Datastore`, `IngestPrepData` for all child datastores. 

62 """ 

63 

64 def __init__(self, children: List[Tuple[Datastore, Datastore.IngestPrepData]]): 

65 super().__init__(itertools.chain.from_iterable(data.refs.values() for _, data in children)) 

66 self.children = children 

67 

68 

69class ChainedDatastore(Datastore): 

70 """Chained Datastores to allow read and writes from multiple datastores. 

71 

72 A ChainedDatastore is configured with multiple datastore configurations. 

73 A ``put()`` is always sent to each datastore. A ``get()`` 

74 operation is sent to each datastore in turn and the first datastore 

75 to return a valid dataset is used. 

76 

77 Parameters 

78 ---------- 

79 config : `DatastoreConfig` or `str` 

80 Configuration. This configuration must include a ``datastores`` field 

81 as a sequence of datastore configurations. The order in this sequence 

82 indicates the order to use for read operations. 

83 bridgeManager : `DatastoreRegistryBridgeManager` 

84 Object that manages the interface between `Registry` and datastores. 

85 butlerRoot : `str`, optional 

86 New datastore root to use to override the configuration value. This 

87 root is sent to each child datastore. 

88 

89 Notes 

90 ----- 

91 ChainedDatastore never supports `None` or `"move"` as an `ingest` transfer 

92 mode. It supports `"copy"`, `"symlink"`, `"relsymlink"` 

93 and `"hardlink"` if and only if all its child datastores do. 

94 """ 

95 

96 defaultConfigFile = "datastores/chainedDatastore.yaml" 

97 """Path to configuration defaults. Accessed within the ``configs`` resource 

98 or relative to a search path. Can be None if no defaults specified. 

99 """ 

100 

101 containerKey = "datastores" 

102 """Key to specify where child datastores are configured.""" 

103 

104 datastores: List[Datastore] 

105 """All the child datastores known to this datastore.""" 

106 

107 datastoreConstraints: Sequence[Optional[Constraints]] 

108 """Constraints to be applied to each of the child datastores.""" 

109 

110 @classmethod 

111 def setConfigRoot(cls, root: str, config: Config, full: Config, overwrite: bool = True) -> None: 

112 """Set any filesystem-dependent config options for child Datastores to 

113 be appropriate for a new empty repository with the given root. 

114 

115 Parameters 

116 ---------- 

117 root : `str` 

118 Filesystem path to the root of the data repository. 

119 config : `Config` 

120 A `Config` to update. Only the subset understood by 

121 this component will be updated. Will not expand 

122 defaults. 

123 full : `Config` 

124 A complete config with all defaults expanded that can be 

125 converted to a `DatastoreConfig`. Read-only and will not be 

126 modified by this method. 

127 Repository-specific options that should not be obtained 

128 from defaults when Butler instances are constructed 

129 should be copied from ``full`` to ``config``. 

130 overwrite : `bool`, optional 

131 If `False`, do not modify a value in ``config`` if the value 

132 already exists. Default is always to overwrite with the provided 

133 ``root``. 

134 

135 Notes 

136 ----- 

137 If a keyword is explicitly defined in the supplied ``config`` it 

138 will not be overridden by this method if ``overwrite`` is `False`. 

139 This allows explicit values set in external configs to be retained. 

140 """ 

141 

142 # Extract the part of the config we care about updating 

143 datastoreConfig = DatastoreConfig(config, mergeDefaults=False) 

144 

145 # And the subset of the full config that we can use for reference. 

146 # Do not bother with defaults because we are told this already has 

147 # them. 

148 fullDatastoreConfig = DatastoreConfig(full, mergeDefaults=False) 

149 

150 # Loop over each datastore config and pass the subsets to the 

151 # child datastores to process. 

152 

153 containerKey = cls.containerKey 

154 for idx, (child, fullChild) in enumerate( 

155 zip(datastoreConfig[containerKey], fullDatastoreConfig[containerKey]) 

156 ): 

157 childConfig = DatastoreConfig(child, mergeDefaults=False) 

158 fullChildConfig = DatastoreConfig(fullChild, mergeDefaults=False) 

159 datastoreClass = doImportType(fullChildConfig["cls"]) 

160 if not issubclass(datastoreClass, Datastore): 160 ↛ 161line 160 didn't jump to line 161, because the condition on line 160 was never true

161 raise TypeError(f"Imported child class {fullChildConfig['cls']} is not a Datastore") 

162 newroot = "{}/{}_{}".format(root, datastoreClass.__qualname__, idx) 

163 datastoreClass.setConfigRoot(newroot, childConfig, fullChildConfig, overwrite=overwrite) 

164 

165 # Reattach to parent 

166 datastoreConfig[containerKey, idx] = childConfig 

167 

168 # Reattach modified datastore config to parent 

169 # If this has a datastore key we attach there, otherwise we assume 

170 # this information goes at the top of the config hierarchy. 

171 if DatastoreConfig.component in config: 

172 config[DatastoreConfig.component] = datastoreConfig 

173 else: 

174 config.update(datastoreConfig) 

175 

176 return 

177 

178 def __init__( 

179 self, 

180 config: Union[Config, str], 

181 bridgeManager: DatastoreRegistryBridgeManager, 

182 butlerRoot: str = None, 

183 ): 

184 super().__init__(config, bridgeManager) 

185 

186 # Scan for child datastores and instantiate them with the same registry 

187 self.datastores = [] 

188 for c in self.config["datastores"]: 

189 c = DatastoreConfig(c) 

190 datastoreType = doImportType(c["cls"]) 

191 if not issubclass(datastoreType, Datastore): 191 ↛ 192line 191 didn't jump to line 192, because the condition on line 191 was never true

192 raise TypeError(f"Imported child class {c['cls']} is not a Datastore") 

193 datastore = datastoreType(c, bridgeManager, butlerRoot=butlerRoot) 

194 log.debug("Creating child datastore %s", datastore.name) 

195 self.datastores.append(datastore) 

196 

197 # Name ourself based on our children 

198 if self.datastores: 198 ↛ 203line 198 didn't jump to line 203, because the condition on line 198 was never false

199 # We must set the names explicitly 

200 self._names = [d.name for d in self.datastores] 

201 childNames = ",".join(self.names) 

202 else: 

203 childNames = "(empty@{})".format(time.time()) 

204 self._names = [childNames] 

205 self.name = "{}[{}]".format(type(self).__qualname__, childNames) 

206 

207 # We declare we are ephemeral if all our child datastores declare 

208 # they are ephemeral 

209 isEphemeral = True 

210 for d in self.datastores: 

211 if not d.isEphemeral: 

212 isEphemeral = False 

213 break 

214 self.isEphemeral = isEphemeral 

215 

216 # per-datastore override constraints 

217 if "datastore_constraints" in self.config: 

218 overrides = self.config["datastore_constraints"] 

219 

220 if len(overrides) != len(self.datastores): 220 ↛ 221line 220 didn't jump to line 221, because the condition on line 220 was never true

221 raise DatastoreValidationError( 

222 f"Number of registered datastores ({len(self.datastores)})" 

223 " differs from number of constraints overrides" 

224 f" {len(overrides)}" 

225 ) 

226 

227 self.datastoreConstraints = [ 

228 Constraints(c.get("constraints"), universe=bridgeManager.universe) for c in overrides 

229 ] 

230 

231 else: 

232 self.datastoreConstraints = (None,) * len(self.datastores) 

233 

234 log.debug("Created %s (%s)", self.name, ("ephemeral" if self.isEphemeral else "permanent")) 

235 

236 @property 

237 def names(self) -> Tuple[str, ...]: 

238 return tuple(self._names) 

239 

240 def __str__(self) -> str: 

241 chainName = ", ".join(str(ds) for ds in self.datastores) 

242 return chainName 

243 

244 def knows(self, ref: DatasetRef) -> bool: 

245 """Check if the dataset is known to any of the datastores. 

246 

247 Does not check for existence of any artifact. 

248 

249 Parameters 

250 ---------- 

251 ref : `DatasetRef` 

252 Reference to the required dataset. 

253 

254 Returns 

255 ------- 

256 exists : `bool` 

257 `True` if the dataset is known to the datastore. 

258 """ 

259 for datastore in self.datastores: 

260 if datastore.knows(ref): 

261 log.debug("%s known to datastore %s", ref, datastore.name) 

262 return True 

263 return False 

264 

265 def mexists( 

266 self, refs: Iterable[DatasetRef], artifact_existence: Optional[Dict[ResourcePath, bool]] = None 

267 ) -> Dict[DatasetRef, bool]: 

268 """Check the existence of multiple datasets at once. 

269 

270 Parameters 

271 ---------- 

272 refs : iterable of `DatasetRef` 

273 The datasets to be checked. 

274 artifact_existence : `dict` [`lsst.resources.ResourcePath`, `bool`] 

275 Optional mapping of datastore artifact to existence. Updated by 

276 this method with details of all artifacts tested. Can be `None` 

277 if the caller is not interested. 

278 

279 Returns 

280 ------- 

281 existence : `dict` of [`DatasetRef`, `bool`] 

282 Mapping from dataset to boolean indicating existence in any 

283 of the child datastores. 

284 """ 

285 dataset_existence: Dict[DatasetRef, bool] = {} 

286 for datastore in self.datastores: 

287 dataset_existence.update(datastore.mexists(refs, artifact_existence=artifact_existence)) 

288 

289 # For next datastore no point asking about ones we know 

290 # exist already. No special exemption for ephemeral datastores. 

291 refs = [ref for ref, exists in dataset_existence.items() if not exists] 

292 

293 return dataset_existence 

294 

295 def exists(self, ref: DatasetRef) -> bool: 

296 """Check if the dataset exists in one of the datastores. 

297 

298 Parameters 

299 ---------- 

300 ref : `DatasetRef` 

301 Reference to the required dataset. 

302 

303 Returns 

304 ------- 

305 exists : `bool` 

306 `True` if the entity exists in one of the child datastores. 

307 """ 

308 for datastore in self.datastores: 

309 if datastore.exists(ref): 

310 log.debug("Found %s in datastore %s", ref, datastore.name) 

311 return True 

312 return False 

313 

314 def get(self, ref: DatasetRef, parameters: Optional[Mapping[str, Any]] = None) -> Any: 

315 """Load an InMemoryDataset from the store. 

316 

317 The dataset is returned from the first datastore that has 

318 the dataset. 

319 

320 Parameters 

321 ---------- 

322 ref : `DatasetRef` 

323 Reference to the required Dataset. 

324 parameters : `dict` 

325 `StorageClass`-specific parameters that specify, for example, 

326 a slice of the dataset to be loaded. 

327 

328 Returns 

329 ------- 

330 inMemoryDataset : `object` 

331 Requested dataset or slice thereof as an InMemoryDataset. 

332 

333 Raises 

334 ------ 

335 FileNotFoundError 

336 Requested dataset can not be retrieved. 

337 TypeError 

338 Return value from formatter has unexpected type. 

339 ValueError 

340 Formatter failed to process the dataset. 

341 """ 

342 

343 for datastore in self.datastores: 

344 try: 

345 inMemoryObject = datastore.get(ref, parameters) 

346 log.debug("Found dataset %s in datastore %s", ref, datastore.name) 

347 return inMemoryObject 

348 except FileNotFoundError: 

349 pass 

350 

351 raise FileNotFoundError("Dataset {} could not be found in any of the datastores".format(ref)) 

352 

353 def put(self, inMemoryDataset: Any, ref: DatasetRef) -> None: 

354 """Write a InMemoryDataset with a given `DatasetRef` to each 

355 datastore. 

356 

357 The put() to child datastores can fail with 

358 `DatasetTypeNotSupportedError`. The put() for this datastore will be 

359 deemed to have succeeded so long as at least one child datastore 

360 accepted the inMemoryDataset. 

361 

362 Parameters 

363 ---------- 

364 inMemoryDataset : `object` 

365 The dataset to store. 

366 ref : `DatasetRef` 

367 Reference to the associated Dataset. 

368 

369 Raises 

370 ------ 

371 TypeError 

372 Supplied object and storage class are inconsistent. 

373 DatasetTypeNotSupportedError 

374 All datastores reported `DatasetTypeNotSupportedError`. 

375 """ 

376 log.debug("Put %s", ref) 

377 

378 # Confirm that we can accept this dataset 

379 if not self.constraints.isAcceptable(ref): 

380 # Raise rather than use boolean return value. 

381 raise DatasetTypeNotSupportedError( 

382 f"Dataset {ref} has been rejected by this datastore via configuration." 

383 ) 

384 

385 isPermanent = False 

386 nsuccess = 0 

387 npermanent = 0 

388 nephemeral = 0 

389 for datastore, constraints in zip(self.datastores, self.datastoreConstraints): 

390 if constraints is not None and not constraints.isAcceptable(ref): 

391 log.debug("Datastore %s skipping put via configuration for ref %s", datastore.name, ref) 

392 continue 

393 

394 if datastore.isEphemeral: 

395 nephemeral += 1 

396 else: 

397 npermanent += 1 

398 try: 

399 datastore.put(inMemoryDataset, ref) 

400 nsuccess += 1 

401 if not datastore.isEphemeral: 

402 isPermanent = True 

403 except DatasetTypeNotSupportedError: 

404 pass 

405 

406 if nsuccess == 0: 

407 raise DatasetTypeNotSupportedError(f"None of the chained datastores supported ref {ref}") 

408 

409 if not isPermanent and npermanent > 0: 409 ↛ 410line 409 didn't jump to line 410, because the condition on line 409 was never true

410 warnings.warn(f"Put of {ref} only succeeded in ephemeral databases", stacklevel=2) 

411 

412 if self._transaction is not None: 

413 self._transaction.registerUndo("put", self.remove, ref) 

414 

415 def _overrideTransferMode(self, *datasets: Any, transfer: Optional[str] = None) -> Optional[str]: 

416 # Docstring inherited from base class. 

417 if transfer != "auto": 

418 return transfer 

419 # Ask each datastore what they think auto means 

420 transfers = {d._overrideTransferMode(*datasets, transfer=transfer) for d in self.datastores} 

421 

422 # Remove any untranslated "auto" values 

423 transfers.discard(transfer) 

424 

425 if len(transfers) == 1: 425 ↛ 426line 425 didn't jump to line 426, because the condition on line 425 was never true

426 return transfers.pop() 

427 if not transfers: 427 ↛ 431line 427 didn't jump to line 431, because the condition on line 427 was never false

428 # Everything reported "auto" 

429 return transfer 

430 

431 raise RuntimeError( 

432 "Chained datastore does not yet support different transfer modes" 

433 f" from 'auto' in each child datastore (wanted {transfers})" 

434 ) 

435 

436 def _prepIngest(self, *datasets: FileDataset, transfer: Optional[str] = None) -> _IngestPrepData: 

437 # Docstring inherited from Datastore._prepIngest. 

438 if transfer is None or transfer == "move": 

439 raise NotImplementedError("ChainedDatastore does not support transfer=None or transfer='move'.") 

440 

441 def isDatasetAcceptable(dataset: FileDataset, *, name: str, constraints: Constraints) -> bool: 

442 acceptable = [ref for ref in dataset.refs if constraints.isAcceptable(ref)] 

443 if not acceptable: 

444 log.debug( 

445 "Datastore %s skipping ingest via configuration for refs %s", 

446 name, 

447 ", ".join(str(ref) for ref in dataset.refs), 

448 ) 

449 return False 

450 else: 

451 return True 

452 

453 # Filter down to just datasets the chained datastore's own 

454 # configuration accepts. 

455 okForParent: List[FileDataset] = [ 

456 dataset 

457 for dataset in datasets 

458 if isDatasetAcceptable(dataset, name=self.name, constraints=self.constraints) 

459 ] 

460 

461 # Iterate over nested datastores and call _prepIngest on each. 

462 # Save the results to a list: 

463 children: List[Tuple[Datastore, Datastore.IngestPrepData]] = [] 

464 # ...and remember whether all of the failures are due to 

465 # NotImplementedError being raised. 

466 allFailuresAreNotImplementedError = True 

467 for datastore, constraints in zip(self.datastores, self.datastoreConstraints): 

468 okForChild: List[FileDataset] 

469 if constraints is not None: 

470 okForChild = [ 

471 dataset 

472 for dataset in okForParent 

473 if isDatasetAcceptable(dataset, name=datastore.name, constraints=constraints) 

474 ] 

475 else: 

476 okForChild = okForParent 

477 try: 

478 prepDataForChild = datastore._prepIngest(*okForChild, transfer=transfer) 

479 except NotImplementedError: 

480 log.debug( 

481 "Skipping ingest for datastore %s because transfer mode %s is not supported.", 

482 datastore.name, 

483 transfer, 

484 ) 

485 continue 

486 allFailuresAreNotImplementedError = False 

487 children.append((datastore, prepDataForChild)) 

488 if allFailuresAreNotImplementedError: 

489 raise NotImplementedError(f"No child datastore supports transfer mode {transfer}.") 

490 return _IngestPrepData(children=children) 

491 

492 def _finishIngest( 

493 self, 

494 prepData: _IngestPrepData, 

495 *, 

496 transfer: Optional[str] = None, 

497 record_validation_info: bool = True, 

498 ) -> None: 

499 # Docstring inherited from Datastore._finishIngest. 

500 for datastore, prepDataForChild in prepData.children: 

501 datastore._finishIngest( 

502 prepDataForChild, transfer=transfer, record_validation_info=record_validation_info 

503 ) 

504 

505 def getManyURIs( 

506 self, 

507 refs: Iterable[DatasetRef], 

508 predict: bool = False, 

509 allow_missing: bool = False, 

510 ) -> Dict[DatasetRef, DatasetRefURIs]: 

511 # Docstring inherited 

512 

513 uris: Dict[DatasetRef, DatasetRefURIs] = {} 

514 missing_refs = set(refs) 

515 

516 # If predict is True we don't want to predict a dataset in the first 

517 # datastore if it actually exists in a later datastore, so in that 

518 # case check all datastores with predict=False first, and then try 

519 # again with predict=True. 

520 for p in (False, True) if predict else (False,): 

521 if not missing_refs: 

522 break 

523 for datastore in self.datastores: 

524 got_uris = datastore.getManyURIs(missing_refs, p, allow_missing=True) 

525 missing_refs -= got_uris.keys() 

526 uris.update(got_uris) 

527 if not missing_refs: 

528 break 

529 

530 if missing_refs and not allow_missing: 

531 raise FileNotFoundError(f"Dataset(s) {missing_refs} not in this datastore.") 

532 

533 return uris 

534 

535 def getURIs(self, ref: DatasetRef, predict: bool = False) -> DatasetRefURIs: 

536 """Return URIs associated with dataset. 

537 

538 Parameters 

539 ---------- 

540 ref : `DatasetRef` 

541 Reference to the required dataset. 

542 predict : `bool`, optional 

543 If the datastore does not know about the dataset, should it 

544 return a predicted URI or not? 

545 

546 Returns 

547 ------- 

548 uris : `DatasetRefURIs` 

549 The URI to the primary artifact associated with this dataset (if 

550 the dataset was disassembled within the datastore this may be 

551 `None`), and the URIs to any components associated with the dataset 

552 artifact. (can be empty if there are no components). 

553 

554 Notes 

555 ----- 

556 The returned URI is from the first datastore in the list that has 

557 the dataset with preference given to the first dataset coming from 

558 a permanent datastore. If no datastores have the dataset and prediction 

559 is allowed, the predicted URI for the first datastore in the list will 

560 be returned. 

561 """ 

562 log.debug("Requesting URIs for %s", ref) 

563 predictedUri: Optional[DatasetRefURIs] = None 

564 predictedEphemeralUri: Optional[DatasetRefURIs] = None 

565 firstEphemeralUri: Optional[DatasetRefURIs] = None 

566 for datastore in self.datastores: 

567 if datastore.exists(ref): 

568 if not datastore.isEphemeral: 

569 uri = datastore.getURIs(ref) 

570 log.debug("Retrieved non-ephemeral URI: %s", uri) 

571 return uri 

572 elif not firstEphemeralUri: 

573 firstEphemeralUri = datastore.getURIs(ref) 

574 elif predict: 

575 if not predictedUri and not datastore.isEphemeral: 

576 predictedUri = datastore.getURIs(ref, predict) 

577 elif not predictedEphemeralUri and datastore.isEphemeral: 

578 predictedEphemeralUri = datastore.getURIs(ref, predict) 

579 

580 if firstEphemeralUri: 

581 log.debug("Retrieved ephemeral URI: %s", firstEphemeralUri) 

582 return firstEphemeralUri 

583 

584 if predictedUri: 

585 log.debug("Retrieved predicted URI: %s", predictedUri) 

586 return predictedUri 

587 

588 if predictedEphemeralUri: 

589 log.debug("Retrieved predicted ephemeral URI: %s", predictedEphemeralUri) 

590 return predictedEphemeralUri 

591 

592 raise FileNotFoundError("Dataset {} not in any datastore".format(ref)) 

593 

594 def getURI(self, ref: DatasetRef, predict: bool = False) -> ResourcePath: 

595 """URI to the Dataset. 

596 

597 The returned URI is from the first datastore in the list that has 

598 the dataset with preference given to the first dataset coming from 

599 a permanent datastore. If no datastores have the dataset and prediction 

600 is allowed, the predicted URI for the first datastore in the list will 

601 be returned. 

602 

603 Parameters 

604 ---------- 

605 ref : `DatasetRef` 

606 Reference to the required Dataset. 

607 predict : `bool` 

608 If `True`, allow URIs to be returned of datasets that have not 

609 been written. 

610 

611 Returns 

612 ------- 

613 uri : `lsst.resources.ResourcePath` 

614 URI pointing to the dataset within the datastore. If the 

615 dataset does not exist in the datastore, and if ``predict`` is 

616 `True`, the URI will be a prediction and will include a URI 

617 fragment "#predicted". 

618 

619 Notes 

620 ----- 

621 If the datastore does not have entities that relate well 

622 to the concept of a URI the returned URI string will be 

623 descriptive. The returned URI is not guaranteed to be obtainable. 

624 

625 Raises 

626 ------ 

627 FileNotFoundError 

628 A URI has been requested for a dataset that does not exist and 

629 guessing is not allowed. 

630 RuntimeError 

631 Raised if a request is made for a single URI but multiple URIs 

632 are associated with this dataset. 

633 """ 

634 log.debug("Requesting URI for %s", ref) 

635 primary, components = self.getURIs(ref, predict) 

636 if primary is None or components: 636 ↛ 637line 636 didn't jump to line 637, because the condition on line 636 was never true

637 raise RuntimeError( 

638 f"Dataset ({ref}) includes distinct URIs for components. Use Datastore.getURIs() instead." 

639 ) 

640 return primary 

641 

642 def retrieveArtifacts( 

643 self, 

644 refs: Iterable[DatasetRef], 

645 destination: ResourcePath, 

646 transfer: str = "auto", 

647 preserve_path: bool = True, 

648 overwrite: bool = False, 

649 ) -> List[ResourcePath]: 

650 """Retrieve the file artifacts associated with the supplied refs. 

651 

652 Parameters 

653 ---------- 

654 refs : iterable of `DatasetRef` 

655 The datasets for which file artifacts are to be retrieved. 

656 A single ref can result in multiple files. The refs must 

657 be resolved. 

658 destination : `lsst.resources.ResourcePath` 

659 Location to write the file artifacts. 

660 transfer : `str`, optional 

661 Method to use to transfer the artifacts. Must be one of the options 

662 supported by `lsst.resources.ResourcePath.transfer_from()`. 

663 "move" is not allowed. 

664 preserve_path : `bool`, optional 

665 If `True` the full path of the file artifact within the datastore 

666 is preserved. If `False` the final file component of the path 

667 is used. 

668 overwrite : `bool`, optional 

669 If `True` allow transfers to overwrite existing files at the 

670 destination. 

671 

672 Returns 

673 ------- 

674 targets : `list` of `lsst.resources.ResourcePath` 

675 URIs of file artifacts in destination location. Order is not 

676 preserved. 

677 """ 

678 if not destination.isdir(): 678 ↛ 679line 678 didn't jump to line 679, because the condition on line 678 was never true

679 raise ValueError(f"Destination location must refer to a directory. Given {destination}") 

680 

681 # Using getURIs is not feasible since it becomes difficult to 

682 # determine the path within the datastore later on. For now 

683 # follow getURIs implementation approach. 

684 

685 pending = set(refs) 

686 

687 # There is a question as to whether an exception should be raised 

688 # early if some of the refs are missing, or whether files should be 

689 # transferred until a problem is hit. Prefer to complain up front. 

690 # Use the datastore integer as primary key. 

691 grouped_by_datastore: Dict[int, Set[DatasetRef]] = {} 

692 

693 for number, datastore in enumerate(self.datastores): 

694 if datastore.isEphemeral: 

695 # In the future we will want to distinguish in-memory from 

696 # caching datastore since using an on-disk local 

697 # cache is exactly what we should be doing. 

698 continue 

699 datastore_refs = {ref for ref in pending if datastore.exists(ref)} 

700 

701 if datastore_refs: 

702 grouped_by_datastore[number] = datastore_refs 

703 

704 # Remove these from the pending list so that we do not bother 

705 # looking for them any more. 

706 pending = pending - datastore_refs 

707 

708 if pending: 708 ↛ 709line 708 didn't jump to line 709, because the condition on line 708 was never true

709 raise RuntimeError(f"Some datasets were not found in any datastores: {pending}") 

710 

711 # Now do the transfer. 

712 targets: List[ResourcePath] = [] 

713 for number, datastore_refs in grouped_by_datastore.items(): 

714 targets.extend( 

715 self.datastores[number].retrieveArtifacts( 

716 datastore_refs, 

717 destination, 

718 transfer=transfer, 

719 preserve_path=preserve_path, 

720 overwrite=overwrite, 

721 ) 

722 ) 

723 

724 return targets 

725 

726 def remove(self, ref: DatasetRef) -> None: 

727 """Indicate to the datastore that a dataset can be removed. 

728 

729 The dataset will be removed from each datastore. The dataset is 

730 not required to exist in every child datastore. 

731 

732 Parameters 

733 ---------- 

734 ref : `DatasetRef` 

735 Reference to the required dataset. 

736 

737 Raises 

738 ------ 

739 FileNotFoundError 

740 Attempt to remove a dataset that does not exist. Raised if none 

741 of the child datastores removed the dataset. 

742 """ 

743 log.debug("Removing %s", ref) 

744 self.trash(ref, ignore_errors=False) 

745 self.emptyTrash(ignore_errors=False) 

746 

747 def forget(self, refs: Iterable[DatasetRef]) -> None: 

748 for datastore in tuple(self.datastores): 

749 datastore.forget(refs) 

750 

751 def trash(self, ref: Union[DatasetRef, Iterable[DatasetRef]], ignore_errors: bool = True) -> None: 

752 if isinstance(ref, DatasetRef): 

753 ref_label = str(ref) 

754 else: 

755 ref_label = "bulk datasets" 

756 

757 log.debug("Trashing %s", ref_label) 

758 

759 counter = 0 

760 for datastore in self.datastores: 

761 try: 

762 datastore.trash(ref, ignore_errors=ignore_errors) 

763 counter += 1 

764 except FileNotFoundError: 

765 pass 

766 

767 if counter == 0: 

768 err_msg = f"Could not mark for removal from any child datastore: {ref_label}" 

769 if ignore_errors: 769 ↛ 770line 769 didn't jump to line 770, because the condition on line 769 was never true

770 log.warning(err_msg) 

771 else: 

772 raise FileNotFoundError(err_msg) 

773 

774 def emptyTrash(self, ignore_errors: bool = True) -> None: 

775 for datastore in self.datastores: 

776 datastore.emptyTrash(ignore_errors=ignore_errors) 

777 

778 def transfer(self, inputDatastore: Datastore, ref: DatasetRef) -> None: 

779 """Retrieve a dataset from an input `Datastore`, 

780 and store the result in this `Datastore`. 

781 

782 Parameters 

783 ---------- 

784 inputDatastore : `Datastore` 

785 The external `Datastore` from which to retreive the Dataset. 

786 ref : `DatasetRef` 

787 Reference to the required dataset in the input data store. 

788 

789 Returns 

790 ------- 

791 results : `list` 

792 List containing the return value from the ``put()`` to each 

793 child datastore. 

794 """ 

795 assert inputDatastore is not self # unless we want it for renames? 

796 inMemoryDataset = inputDatastore.get(ref) 

797 self.put(inMemoryDataset, ref) 

798 

799 def validateConfiguration( 

800 self, entities: Iterable[Union[DatasetRef, DatasetType, StorageClass]], logFailures: bool = False 

801 ) -> None: 

802 """Validate some of the configuration for this datastore. 

803 

804 Parameters 

805 ---------- 

806 entities : iterable of `DatasetRef`, `DatasetType`, or `StorageClass` 

807 Entities to test against this configuration. Can be differing 

808 types. 

809 logFailures : `bool`, optional 

810 If `True`, output a log message for every validation error 

811 detected. 

812 

813 Raises 

814 ------ 

815 DatastoreValidationError 

816 Raised if there is a validation problem with a configuration. 

817 All the problems are reported in a single exception. 

818 

819 Notes 

820 ----- 

821 This method checks each datastore in turn. 

822 """ 

823 

824 # Need to catch each of the datastore outputs and ensure that 

825 # all are tested. 

826 failures = [] 

827 for datastore in self.datastores: 

828 try: 

829 datastore.validateConfiguration(entities, logFailures=logFailures) 

830 except DatastoreValidationError as e: 

831 if logFailures: 831 ↛ 833line 831 didn't jump to line 833, because the condition on line 831 was never false

832 log.critical("Datastore %s failed validation", datastore.name) 

833 failures.append(f"Datastore {self.name}: {e}") 

834 

835 if failures: 

836 msg = ";\n".join(failures) 

837 raise DatastoreValidationError(msg) 

838 

839 def validateKey(self, lookupKey: LookupKey, entity: Union[DatasetRef, DatasetType, StorageClass]) -> None: 

840 # Docstring is inherited from base class 

841 failures = [] 

842 for datastore in self.datastores: 

843 try: 

844 datastore.validateKey(lookupKey, entity) 

845 except DatastoreValidationError as e: 

846 failures.append(f"Datastore {self.name}: {e}") 

847 

848 if failures: 

849 msg = ";\n".join(failures) 

850 raise DatastoreValidationError(msg) 

851 

852 def getLookupKeys(self) -> Set[LookupKey]: 

853 # Docstring is inherited from base class 

854 keys = set() 

855 for datastore in self.datastores: 

856 keys.update(datastore.getLookupKeys()) 

857 

858 keys.update(self.constraints.getLookupKeys()) 

859 for p in self.datastoreConstraints: 

860 if p is not None: 860 ↛ 861line 860 didn't jump to line 861, because the condition on line 860 was never true

861 keys.update(p.getLookupKeys()) 

862 

863 return keys 

864 

865 def needs_expanded_data_ids( 

866 self, 

867 transfer: Optional[str], 

868 entity: Optional[Union[DatasetRef, DatasetType, StorageClass]] = None, 

869 ) -> bool: 

870 # Docstring inherited. 

871 # We can't safely use `self.datastoreConstraints` with `entity` to 

872 # check whether a child datastore would even want to ingest this 

873 # dataset, because we don't want to filter out datastores that might 

874 # need an expanded data ID based in incomplete information (e.g. we 

875 # pass a StorageClass, but the constraint dispatches on DatasetType). 

876 # So we pessimistically check if any datastore would need an expanded 

877 # data ID for this transfer mode. 

878 return any(datastore.needs_expanded_data_ids(transfer) for datastore in self.datastores) 878 ↛ exitline 878 didn't finish the generator expression on line 878

879 

880 def import_records(self, data: Mapping[str, DatastoreRecordData]) -> None: 

881 # Docstring inherited from the base class. 

882 

883 for datastore in self.datastores: 

884 datastore.import_records(data) 

885 

886 def export_records(self, refs: Iterable[DatasetIdRef]) -> Mapping[str, DatastoreRecordData]: 

887 # Docstring inherited from the base class. 

888 

889 all_records: Dict[str, DatastoreRecordData] = {} 

890 

891 # Merge all sub-datastore records into one structure 

892 for datastore in self.datastores: 

893 sub_records = datastore.export_records(refs) 

894 for name, record_data in sub_records.items(): 

895 # All datastore names must be unique in a chain. 

896 if name in all_records: 896 ↛ 897line 896 didn't jump to line 897, because the condition on line 896 was never true

897 raise ValueError("Non-unique datastore name found in datastore {datastore}") 

898 all_records[name] = record_data 

899 

900 return all_records