Coverage for python/lsst/daf/butler/datastores/chainedDatastore.py: 91%

329 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-10-21 02:02 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24"""Chained datastore.""" 

25 

26__all__ = ("ChainedDatastore",) 

27 

28import itertools 

29import logging 

30import time 

31import warnings 

32from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Mapping, Optional, Sequence, Set, Tuple, Union 

33 

34from lsst.daf.butler import ( 

35 Constraints, 

36 DatasetRef, 

37 DatasetRefURIs, 

38 DatasetTypeNotSupportedError, 

39 Datastore, 

40 DatastoreConfig, 

41 DatastoreRecordData, 

42 DatastoreValidationError, 

43 FileDataset, 

44) 

45from lsst.resources import ResourcePath 

46from lsst.utils import doImportType 

47 

48if TYPE_CHECKING: 48 ↛ 49line 48 didn't jump to line 49, because the condition on line 48 was never true

49 from lsst.daf.butler import Config, DatasetType, LookupKey, StorageClass 

50 from lsst.daf.butler.registry.interfaces import DatasetIdRef, DatastoreRegistryBridgeManager 

51 

52log = logging.getLogger(__name__) 

53 

54 

55class _IngestPrepData(Datastore.IngestPrepData): 

56 """Helper class for ChainedDatastore ingest implementation. 

57 

58 Parameters 

59 ---------- 

60 children : `list` of `tuple` 

61 Pairs of `Datastore`, `IngestPrepData` for all child datastores. 

62 """ 

63 

64 def __init__(self, children: List[Tuple[Datastore, Datastore.IngestPrepData, set[ResourcePath]]]): 

65 super().__init__(itertools.chain.from_iterable(data.refs.values() for _, data, _ in children)) 

66 self.children = children 

67 

68 

69class ChainedDatastore(Datastore): 

70 """Chained Datastores to allow read and writes from multiple datastores. 

71 

72 A ChainedDatastore is configured with multiple datastore configurations. 

73 A ``put()`` is always sent to each datastore. A ``get()`` 

74 operation is sent to each datastore in turn and the first datastore 

75 to return a valid dataset is used. 

76 

77 Parameters 

78 ---------- 

79 config : `DatastoreConfig` or `str` 

80 Configuration. This configuration must include a ``datastores`` field 

81 as a sequence of datastore configurations. The order in this sequence 

82 indicates the order to use for read operations. 

83 bridgeManager : `DatastoreRegistryBridgeManager` 

84 Object that manages the interface between `Registry` and datastores. 

85 butlerRoot : `str`, optional 

86 New datastore root to use to override the configuration value. This 

87 root is sent to each child datastore. 

88 

89 Notes 

90 ----- 

91 ChainedDatastore never supports `None` or `"move"` as an `ingest` transfer 

92 mode. It supports `"copy"`, `"symlink"`, `"relsymlink"` 

93 and `"hardlink"` if and only if all its child datastores do. 

94 """ 

95 

96 defaultConfigFile = "datastores/chainedDatastore.yaml" 

97 """Path to configuration defaults. Accessed within the ``configs`` resource 

98 or relative to a search path. Can be None if no defaults specified. 

99 """ 

100 

101 containerKey = "datastores" 

102 """Key to specify where child datastores are configured.""" 

103 

104 datastores: List[Datastore] 

105 """All the child datastores known to this datastore.""" 

106 

107 datastoreConstraints: Sequence[Optional[Constraints]] 

108 """Constraints to be applied to each of the child datastores.""" 

109 

110 @classmethod 

111 def setConfigRoot(cls, root: str, config: Config, full: Config, overwrite: bool = True) -> None: 

112 """Set any filesystem-dependent config options for child Datastores to 

113 be appropriate for a new empty repository with the given root. 

114 

115 Parameters 

116 ---------- 

117 root : `str` 

118 Filesystem path to the root of the data repository. 

119 config : `Config` 

120 A `Config` to update. Only the subset understood by 

121 this component will be updated. Will not expand 

122 defaults. 

123 full : `Config` 

124 A complete config with all defaults expanded that can be 

125 converted to a `DatastoreConfig`. Read-only and will not be 

126 modified by this method. 

127 Repository-specific options that should not be obtained 

128 from defaults when Butler instances are constructed 

129 should be copied from ``full`` to ``config``. 

130 overwrite : `bool`, optional 

131 If `False`, do not modify a value in ``config`` if the value 

132 already exists. Default is always to overwrite with the provided 

133 ``root``. 

134 

135 Notes 

136 ----- 

137 If a keyword is explicitly defined in the supplied ``config`` it 

138 will not be overridden by this method if ``overwrite`` is `False`. 

139 This allows explicit values set in external configs to be retained. 

140 """ 

141 

142 # Extract the part of the config we care about updating 

143 datastoreConfig = DatastoreConfig(config, mergeDefaults=False) 

144 

145 # And the subset of the full config that we can use for reference. 

146 # Do not bother with defaults because we are told this already has 

147 # them. 

148 fullDatastoreConfig = DatastoreConfig(full, mergeDefaults=False) 

149 

150 # Loop over each datastore config and pass the subsets to the 

151 # child datastores to process. 

152 

153 containerKey = cls.containerKey 

154 for idx, (child, fullChild) in enumerate( 

155 zip(datastoreConfig[containerKey], fullDatastoreConfig[containerKey]) 

156 ): 

157 childConfig = DatastoreConfig(child, mergeDefaults=False) 

158 fullChildConfig = DatastoreConfig(fullChild, mergeDefaults=False) 

159 datastoreClass = doImportType(fullChildConfig["cls"]) 

160 if not issubclass(datastoreClass, Datastore): 160 ↛ 161line 160 didn't jump to line 161, because the condition on line 160 was never true

161 raise TypeError(f"Imported child class {fullChildConfig['cls']} is not a Datastore") 

162 newroot = "{}/{}_{}".format(root, datastoreClass.__qualname__, idx) 

163 datastoreClass.setConfigRoot(newroot, childConfig, fullChildConfig, overwrite=overwrite) 

164 

165 # Reattach to parent 

166 datastoreConfig[containerKey, idx] = childConfig 

167 

168 # Reattach modified datastore config to parent 

169 # If this has a datastore key we attach there, otherwise we assume 

170 # this information goes at the top of the config hierarchy. 

171 if DatastoreConfig.component in config: 

172 config[DatastoreConfig.component] = datastoreConfig 

173 else: 

174 config.update(datastoreConfig) 

175 

176 return 

177 

178 def __init__( 

179 self, 

180 config: Union[Config, str], 

181 bridgeManager: DatastoreRegistryBridgeManager, 

182 butlerRoot: str = None, 

183 ): 

184 super().__init__(config, bridgeManager) 

185 

186 # Scan for child datastores and instantiate them with the same registry 

187 self.datastores = [] 

188 for c in self.config["datastores"]: 

189 c = DatastoreConfig(c) 

190 datastoreType = doImportType(c["cls"]) 

191 if not issubclass(datastoreType, Datastore): 191 ↛ 192line 191 didn't jump to line 192, because the condition on line 191 was never true

192 raise TypeError(f"Imported child class {c['cls']} is not a Datastore") 

193 datastore = datastoreType(c, bridgeManager, butlerRoot=butlerRoot) 

194 log.debug("Creating child datastore %s", datastore.name) 

195 self.datastores.append(datastore) 

196 

197 # Name ourself based on our children 

198 if self.datastores: 198 ↛ 203line 198 didn't jump to line 203, because the condition on line 198 was never false

199 # We must set the names explicitly 

200 self._names = [d.name for d in self.datastores] 

201 childNames = ",".join(self.names) 

202 else: 

203 childNames = "(empty@{})".format(time.time()) 

204 self._names = [childNames] 

205 self.name = "{}[{}]".format(type(self).__qualname__, childNames) 

206 

207 # We declare we are ephemeral if all our child datastores declare 

208 # they are ephemeral 

209 isEphemeral = True 

210 for d in self.datastores: 

211 if not d.isEphemeral: 

212 isEphemeral = False 

213 break 

214 self.isEphemeral = isEphemeral 

215 

216 # per-datastore override constraints 

217 if "datastore_constraints" in self.config: 

218 overrides = self.config["datastore_constraints"] 

219 

220 if len(overrides) != len(self.datastores): 220 ↛ 221line 220 didn't jump to line 221, because the condition on line 220 was never true

221 raise DatastoreValidationError( 

222 f"Number of registered datastores ({len(self.datastores)})" 

223 " differs from number of constraints overrides" 

224 f" {len(overrides)}" 

225 ) 

226 

227 self.datastoreConstraints = [ 

228 Constraints(c.get("constraints"), universe=bridgeManager.universe) for c in overrides 

229 ] 

230 

231 else: 

232 self.datastoreConstraints = (None,) * len(self.datastores) 

233 

234 log.debug("Created %s (%s)", self.name, ("ephemeral" if self.isEphemeral else "permanent")) 

235 

236 @property 

237 def names(self) -> Tuple[str, ...]: 

238 return tuple(self._names) 

239 

240 def __str__(self) -> str: 

241 chainName = ", ".join(str(ds) for ds in self.datastores) 

242 return chainName 

243 

244 def knows(self, ref: DatasetRef) -> bool: 

245 """Check if the dataset is known to any of the datastores. 

246 

247 Does not check for existence of any artifact. 

248 

249 Parameters 

250 ---------- 

251 ref : `DatasetRef` 

252 Reference to the required dataset. 

253 

254 Returns 

255 ------- 

256 exists : `bool` 

257 `True` if the dataset is known to the datastore. 

258 """ 

259 for datastore in self.datastores: 

260 if datastore.knows(ref): 

261 log.debug("%s known to datastore %s", ref, datastore.name) 

262 return True 

263 return False 

264 

265 def mexists( 

266 self, refs: Iterable[DatasetRef], artifact_existence: Optional[Dict[ResourcePath, bool]] = None 

267 ) -> Dict[DatasetRef, bool]: 

268 """Check the existence of multiple datasets at once. 

269 

270 Parameters 

271 ---------- 

272 refs : iterable of `DatasetRef` 

273 The datasets to be checked. 

274 artifact_existence : `dict` [`lsst.resources.ResourcePath`, `bool`] 

275 Optional mapping of datastore artifact to existence. Updated by 

276 this method with details of all artifacts tested. Can be `None` 

277 if the caller is not interested. 

278 

279 Returns 

280 ------- 

281 existence : `dict` of [`DatasetRef`, `bool`] 

282 Mapping from dataset to boolean indicating existence in any 

283 of the child datastores. 

284 """ 

285 dataset_existence: Dict[DatasetRef, bool] = {} 

286 for datastore in self.datastores: 

287 dataset_existence.update(datastore.mexists(refs, artifact_existence=artifact_existence)) 

288 

289 # For next datastore no point asking about ones we know 

290 # exist already. No special exemption for ephemeral datastores. 

291 refs = [ref for ref, exists in dataset_existence.items() if not exists] 

292 

293 return dataset_existence 

294 

295 def exists(self, ref: DatasetRef) -> bool: 

296 """Check if the dataset exists in one of the datastores. 

297 

298 Parameters 

299 ---------- 

300 ref : `DatasetRef` 

301 Reference to the required dataset. 

302 

303 Returns 

304 ------- 

305 exists : `bool` 

306 `True` if the entity exists in one of the child datastores. 

307 """ 

308 for datastore in self.datastores: 

309 if datastore.exists(ref): 

310 log.debug("Found %s in datastore %s", ref, datastore.name) 

311 return True 

312 return False 

313 

314 def get( 

315 self, 

316 ref: DatasetRef, 

317 parameters: Optional[Mapping[str, Any]] = None, 

318 storageClass: Optional[Union[StorageClass, str]] = None, 

319 ) -> Any: 

320 """Load an InMemoryDataset from the store. 

321 

322 The dataset is returned from the first datastore that has 

323 the dataset. 

324 

325 Parameters 

326 ---------- 

327 ref : `DatasetRef` 

328 Reference to the required Dataset. 

329 parameters : `dict` 

330 `StorageClass`-specific parameters that specify, for example, 

331 a slice of the dataset to be loaded. 

332 storageClass : `StorageClass` or `str`, optional 

333 The storage class to be used to override the Python type 

334 returned by this method. By default the returned type matches 

335 the dataset type definition for this dataset. Specifying a 

336 read `StorageClass` can force a different type to be returned. 

337 This type must be compatible with the original type. 

338 

339 Returns 

340 ------- 

341 inMemoryDataset : `object` 

342 Requested dataset or slice thereof as an InMemoryDataset. 

343 

344 Raises 

345 ------ 

346 FileNotFoundError 

347 Requested dataset can not be retrieved. 

348 TypeError 

349 Return value from formatter has unexpected type. 

350 ValueError 

351 Formatter failed to process the dataset. 

352 """ 

353 

354 for datastore in self.datastores: 

355 try: 

356 inMemoryObject = datastore.get(ref, parameters, storageClass=storageClass) 

357 log.debug("Found dataset %s in datastore %s", ref, datastore.name) 

358 return inMemoryObject 

359 except FileNotFoundError: 

360 pass 

361 

362 raise FileNotFoundError("Dataset {} could not be found in any of the datastores".format(ref)) 

363 

364 def put(self, inMemoryDataset: Any, ref: DatasetRef) -> None: 

365 """Write a InMemoryDataset with a given `DatasetRef` to each 

366 datastore. 

367 

368 The put() to child datastores can fail with 

369 `DatasetTypeNotSupportedError`. The put() for this datastore will be 

370 deemed to have succeeded so long as at least one child datastore 

371 accepted the inMemoryDataset. 

372 

373 Parameters 

374 ---------- 

375 inMemoryDataset : `object` 

376 The dataset to store. 

377 ref : `DatasetRef` 

378 Reference to the associated Dataset. 

379 

380 Raises 

381 ------ 

382 TypeError 

383 Supplied object and storage class are inconsistent. 

384 DatasetTypeNotSupportedError 

385 All datastores reported `DatasetTypeNotSupportedError`. 

386 """ 

387 log.debug("Put %s", ref) 

388 

389 # Confirm that we can accept this dataset 

390 if not self.constraints.isAcceptable(ref): 

391 # Raise rather than use boolean return value. 

392 raise DatasetTypeNotSupportedError( 

393 f"Dataset {ref} has been rejected by this datastore via configuration." 

394 ) 

395 

396 isPermanent = False 

397 nsuccess = 0 

398 npermanent = 0 

399 nephemeral = 0 

400 for datastore, constraints in zip(self.datastores, self.datastoreConstraints): 

401 if constraints is not None and not constraints.isAcceptable(ref): 

402 log.debug("Datastore %s skipping put via configuration for ref %s", datastore.name, ref) 

403 continue 

404 

405 if datastore.isEphemeral: 

406 nephemeral += 1 

407 else: 

408 npermanent += 1 

409 try: 

410 datastore.put(inMemoryDataset, ref) 

411 nsuccess += 1 

412 if not datastore.isEphemeral: 

413 isPermanent = True 

414 except DatasetTypeNotSupportedError: 

415 pass 

416 

417 if nsuccess == 0: 

418 raise DatasetTypeNotSupportedError(f"None of the chained datastores supported ref {ref}") 

419 

420 if not isPermanent and npermanent > 0: 420 ↛ 421line 420 didn't jump to line 421, because the condition on line 420 was never true

421 warnings.warn(f"Put of {ref} only succeeded in ephemeral databases", stacklevel=2) 

422 

423 if self._transaction is not None: 

424 self._transaction.registerUndo("put", self.remove, ref) 

425 

426 def _overrideTransferMode(self, *datasets: Any, transfer: Optional[str] = None) -> Optional[str]: 

427 # Docstring inherited from base class. 

428 if transfer != "auto": 

429 return transfer 

430 # Ask each datastore what they think auto means 

431 transfers = {d._overrideTransferMode(*datasets, transfer=transfer) for d in self.datastores} 

432 

433 # Remove any untranslated "auto" values 

434 transfers.discard(transfer) 

435 

436 if len(transfers) == 1: 436 ↛ 437line 436 didn't jump to line 437, because the condition on line 436 was never true

437 return transfers.pop() 

438 if not transfers: 438 ↛ 442line 438 didn't jump to line 442, because the condition on line 438 was never false

439 # Everything reported "auto" 

440 return transfer 

441 

442 raise RuntimeError( 

443 "Chained datastore does not yet support different transfer modes" 

444 f" from 'auto' in each child datastore (wanted {transfers})" 

445 ) 

446 

447 def _prepIngest(self, *datasets: FileDataset, transfer: Optional[str] = None) -> _IngestPrepData: 

448 # Docstring inherited from Datastore._prepIngest. 

449 if transfer is None: 

450 raise NotImplementedError("ChainedDatastore does not support transfer=None.") 

451 

452 def isDatasetAcceptable(dataset: FileDataset, *, name: str, constraints: Constraints) -> bool: 

453 acceptable = [ref for ref in dataset.refs if constraints.isAcceptable(ref)] 

454 if not acceptable: 

455 log.debug( 

456 "Datastore %s skipping ingest via configuration for refs %s", 

457 name, 

458 ", ".join(str(ref) for ref in dataset.refs), 

459 ) 

460 return False 

461 else: 

462 return True 

463 

464 # Filter down to just datasets the chained datastore's own 

465 # configuration accepts. 

466 okForParent: List[FileDataset] = [ 

467 dataset 

468 for dataset in datasets 

469 if isDatasetAcceptable(dataset, name=self.name, constraints=self.constraints) 

470 ] 

471 

472 # Iterate over nested datastores and call _prepIngest on each. 

473 # Save the results to a list: 

474 children: List[Tuple[Datastore, Datastore.IngestPrepData, set[ResourcePath]]] = [] 

475 # ...and remember whether all of the failures are due to 

476 # NotImplementedError being raised. 

477 allFailuresAreNotImplementedError = True 

478 for datastore, constraints in zip(self.datastores, self.datastoreConstraints): 

479 okForChild: List[FileDataset] 

480 if constraints is not None: 

481 okForChild = [ 

482 dataset 

483 for dataset in okForParent 

484 if isDatasetAcceptable(dataset, name=datastore.name, constraints=constraints) 

485 ] 

486 else: 

487 okForChild = okForParent 

488 try: 

489 prepDataForChild = datastore._prepIngest(*okForChild, transfer=transfer) 

490 except NotImplementedError: 

491 log.debug( 

492 "Skipping ingest for datastore %s because transfer mode %s is not supported.", 

493 datastore.name, 

494 transfer, 

495 ) 

496 continue 

497 allFailuresAreNotImplementedError = False 

498 if okForChild: 

499 # Do not store for later if a datastore has rejected 

500 # everything. 

501 # Include the source paths if this is a "move". It's clearer 

502 # to find the paths now rather than try to infer how 

503 # each datastore has stored them in the internal prep class. 

504 paths = ( 

505 {ResourcePath(dataset.path) for dataset in okForChild} if transfer == "move" else set() 

506 ) 

507 children.append((datastore, prepDataForChild, paths)) 

508 if allFailuresAreNotImplementedError: 

509 raise NotImplementedError(f"No child datastore supports transfer mode {transfer}.") 

510 return _IngestPrepData(children=children) 

511 

512 def _finishIngest( 

513 self, 

514 prepData: _IngestPrepData, 

515 *, 

516 transfer: Optional[str] = None, 

517 record_validation_info: bool = True, 

518 ) -> None: 

519 # Docstring inherited from Datastore._finishIngest. 

520 # For "move" we must use "copy" and then delete the input 

521 # data at the end. This has no rollback option if the ingest 

522 # subsequently fails. If there is only one active datastore 

523 # accepting any files we can leave it as "move" 

524 actual_transfer: str | None 

525 if transfer == "move" and len(prepData.children) > 1: 

526 actual_transfer = "copy" 

527 else: 

528 actual_transfer = transfer 

529 to_be_deleted: set[ResourcePath] = set() 

530 for datastore, prepDataForChild, paths in prepData.children: 

531 datastore._finishIngest( 

532 prepDataForChild, transfer=actual_transfer, record_validation_info=record_validation_info 

533 ) 

534 to_be_deleted.update(paths) 

535 if actual_transfer != transfer: 

536 # These datasets were copied but now need to be deleted. 

537 # This can not be rolled back. 

538 for uri in to_be_deleted: 

539 uri.remove() 

540 

541 def getManyURIs( 

542 self, 

543 refs: Iterable[DatasetRef], 

544 predict: bool = False, 

545 allow_missing: bool = False, 

546 ) -> Dict[DatasetRef, DatasetRefURIs]: 

547 # Docstring inherited 

548 

549 uris: Dict[DatasetRef, DatasetRefURIs] = {} 

550 missing_refs = set(refs) 

551 

552 # If predict is True we don't want to predict a dataset in the first 

553 # datastore if it actually exists in a later datastore, so in that 

554 # case check all datastores with predict=False first, and then try 

555 # again with predict=True. 

556 for p in (False, True) if predict else (False,): 

557 if not missing_refs: 

558 break 

559 for datastore in self.datastores: 

560 got_uris = datastore.getManyURIs(missing_refs, p, allow_missing=True) 

561 missing_refs -= got_uris.keys() 

562 uris.update(got_uris) 

563 if not missing_refs: 

564 break 

565 

566 if missing_refs and not allow_missing: 

567 raise FileNotFoundError(f"Dataset(s) {missing_refs} not in this datastore.") 

568 

569 return uris 

570 

571 def getURIs(self, ref: DatasetRef, predict: bool = False) -> DatasetRefURIs: 

572 """Return URIs associated with dataset. 

573 

574 Parameters 

575 ---------- 

576 ref : `DatasetRef` 

577 Reference to the required dataset. 

578 predict : `bool`, optional 

579 If the datastore does not know about the dataset, should it 

580 return a predicted URI or not? 

581 

582 Returns 

583 ------- 

584 uris : `DatasetRefURIs` 

585 The URI to the primary artifact associated with this dataset (if 

586 the dataset was disassembled within the datastore this may be 

587 `None`), and the URIs to any components associated with the dataset 

588 artifact. (can be empty if there are no components). 

589 

590 Notes 

591 ----- 

592 The returned URI is from the first datastore in the list that has 

593 the dataset with preference given to the first dataset coming from 

594 a permanent datastore. If no datastores have the dataset and prediction 

595 is allowed, the predicted URI for the first datastore in the list will 

596 be returned. 

597 """ 

598 log.debug("Requesting URIs for %s", ref) 

599 predictedUri: Optional[DatasetRefURIs] = None 

600 predictedEphemeralUri: Optional[DatasetRefURIs] = None 

601 firstEphemeralUri: Optional[DatasetRefURIs] = None 

602 for datastore in self.datastores: 

603 if datastore.exists(ref): 

604 if not datastore.isEphemeral: 

605 uri = datastore.getURIs(ref) 

606 log.debug("Retrieved non-ephemeral URI: %s", uri) 

607 return uri 

608 elif not firstEphemeralUri: 

609 firstEphemeralUri = datastore.getURIs(ref) 

610 elif predict: 

611 if not predictedUri and not datastore.isEphemeral: 

612 predictedUri = datastore.getURIs(ref, predict) 

613 elif not predictedEphemeralUri and datastore.isEphemeral: 

614 predictedEphemeralUri = datastore.getURIs(ref, predict) 

615 

616 if firstEphemeralUri: 

617 log.debug("Retrieved ephemeral URI: %s", firstEphemeralUri) 

618 return firstEphemeralUri 

619 

620 if predictedUri: 

621 log.debug("Retrieved predicted URI: %s", predictedUri) 

622 return predictedUri 

623 

624 if predictedEphemeralUri: 

625 log.debug("Retrieved predicted ephemeral URI: %s", predictedEphemeralUri) 

626 return predictedEphemeralUri 

627 

628 raise FileNotFoundError("Dataset {} not in any datastore".format(ref)) 

629 

630 def getURI(self, ref: DatasetRef, predict: bool = False) -> ResourcePath: 

631 """URI to the Dataset. 

632 

633 The returned URI is from the first datastore in the list that has 

634 the dataset with preference given to the first dataset coming from 

635 a permanent datastore. If no datastores have the dataset and prediction 

636 is allowed, the predicted URI for the first datastore in the list will 

637 be returned. 

638 

639 Parameters 

640 ---------- 

641 ref : `DatasetRef` 

642 Reference to the required Dataset. 

643 predict : `bool` 

644 If `True`, allow URIs to be returned of datasets that have not 

645 been written. 

646 

647 Returns 

648 ------- 

649 uri : `lsst.resources.ResourcePath` 

650 URI pointing to the dataset within the datastore. If the 

651 dataset does not exist in the datastore, and if ``predict`` is 

652 `True`, the URI will be a prediction and will include a URI 

653 fragment "#predicted". 

654 

655 Notes 

656 ----- 

657 If the datastore does not have entities that relate well 

658 to the concept of a URI the returned URI string will be 

659 descriptive. The returned URI is not guaranteed to be obtainable. 

660 

661 Raises 

662 ------ 

663 FileNotFoundError 

664 A URI has been requested for a dataset that does not exist and 

665 guessing is not allowed. 

666 RuntimeError 

667 Raised if a request is made for a single URI but multiple URIs 

668 are associated with this dataset. 

669 """ 

670 log.debug("Requesting URI for %s", ref) 

671 primary, components = self.getURIs(ref, predict) 

672 if primary is None or components: 672 ↛ 673line 672 didn't jump to line 673, because the condition on line 672 was never true

673 raise RuntimeError( 

674 f"Dataset ({ref}) includes distinct URIs for components. Use Datastore.getURIs() instead." 

675 ) 

676 return primary 

677 

678 def retrieveArtifacts( 

679 self, 

680 refs: Iterable[DatasetRef], 

681 destination: ResourcePath, 

682 transfer: str = "auto", 

683 preserve_path: bool = True, 

684 overwrite: bool = False, 

685 ) -> List[ResourcePath]: 

686 """Retrieve the file artifacts associated with the supplied refs. 

687 

688 Parameters 

689 ---------- 

690 refs : iterable of `DatasetRef` 

691 The datasets for which file artifacts are to be retrieved. 

692 A single ref can result in multiple files. The refs must 

693 be resolved. 

694 destination : `lsst.resources.ResourcePath` 

695 Location to write the file artifacts. 

696 transfer : `str`, optional 

697 Method to use to transfer the artifacts. Must be one of the options 

698 supported by `lsst.resources.ResourcePath.transfer_from()`. 

699 "move" is not allowed. 

700 preserve_path : `bool`, optional 

701 If `True` the full path of the file artifact within the datastore 

702 is preserved. If `False` the final file component of the path 

703 is used. 

704 overwrite : `bool`, optional 

705 If `True` allow transfers to overwrite existing files at the 

706 destination. 

707 

708 Returns 

709 ------- 

710 targets : `list` of `lsst.resources.ResourcePath` 

711 URIs of file artifacts in destination location. Order is not 

712 preserved. 

713 """ 

714 if not destination.isdir(): 714 ↛ 715line 714 didn't jump to line 715, because the condition on line 714 was never true

715 raise ValueError(f"Destination location must refer to a directory. Given {destination}") 

716 

717 # Using getURIs is not feasible since it becomes difficult to 

718 # determine the path within the datastore later on. For now 

719 # follow getURIs implementation approach. 

720 

721 pending = set(refs) 

722 

723 # There is a question as to whether an exception should be raised 

724 # early if some of the refs are missing, or whether files should be 

725 # transferred until a problem is hit. Prefer to complain up front. 

726 # Use the datastore integer as primary key. 

727 grouped_by_datastore: Dict[int, Set[DatasetRef]] = {} 

728 

729 for number, datastore in enumerate(self.datastores): 

730 if datastore.isEphemeral: 

731 # In the future we will want to distinguish in-memory from 

732 # caching datastore since using an on-disk local 

733 # cache is exactly what we should be doing. 

734 continue 

735 datastore_refs = {ref for ref in pending if datastore.exists(ref)} 

736 

737 if datastore_refs: 

738 grouped_by_datastore[number] = datastore_refs 

739 

740 # Remove these from the pending list so that we do not bother 

741 # looking for them any more. 

742 pending = pending - datastore_refs 

743 

744 if pending: 744 ↛ 745line 744 didn't jump to line 745, because the condition on line 744 was never true

745 raise RuntimeError(f"Some datasets were not found in any datastores: {pending}") 

746 

747 # Now do the transfer. 

748 targets: List[ResourcePath] = [] 

749 for number, datastore_refs in grouped_by_datastore.items(): 

750 targets.extend( 

751 self.datastores[number].retrieveArtifacts( 

752 datastore_refs, 

753 destination, 

754 transfer=transfer, 

755 preserve_path=preserve_path, 

756 overwrite=overwrite, 

757 ) 

758 ) 

759 

760 return targets 

761 

762 def remove(self, ref: DatasetRef) -> None: 

763 """Indicate to the datastore that a dataset can be removed. 

764 

765 The dataset will be removed from each datastore. The dataset is 

766 not required to exist in every child datastore. 

767 

768 Parameters 

769 ---------- 

770 ref : `DatasetRef` 

771 Reference to the required dataset. 

772 

773 Raises 

774 ------ 

775 FileNotFoundError 

776 Attempt to remove a dataset that does not exist. Raised if none 

777 of the child datastores removed the dataset. 

778 """ 

779 log.debug("Removing %s", ref) 

780 self.trash(ref, ignore_errors=False) 

781 self.emptyTrash(ignore_errors=False) 

782 

783 def forget(self, refs: Iterable[DatasetRef]) -> None: 

784 for datastore in tuple(self.datastores): 

785 datastore.forget(refs) 

786 

787 def trash(self, ref: Union[DatasetRef, Iterable[DatasetRef]], ignore_errors: bool = True) -> None: 

788 if isinstance(ref, DatasetRef): 

789 ref_label = str(ref) 

790 else: 

791 ref_label = "bulk datasets" 

792 

793 log.debug("Trashing %s", ref_label) 

794 

795 counter = 0 

796 for datastore in self.datastores: 

797 try: 

798 datastore.trash(ref, ignore_errors=ignore_errors) 

799 counter += 1 

800 except FileNotFoundError: 

801 pass 

802 

803 if counter == 0: 

804 err_msg = f"Could not mark for removal from any child datastore: {ref_label}" 

805 if ignore_errors: 805 ↛ 806line 805 didn't jump to line 806, because the condition on line 805 was never true

806 log.warning(err_msg) 

807 else: 

808 raise FileNotFoundError(err_msg) 

809 

810 def emptyTrash(self, ignore_errors: bool = True) -> None: 

811 for datastore in self.datastores: 

812 datastore.emptyTrash(ignore_errors=ignore_errors) 

813 

814 def transfer(self, inputDatastore: Datastore, ref: DatasetRef) -> None: 

815 """Retrieve a dataset from an input `Datastore`, 

816 and store the result in this `Datastore`. 

817 

818 Parameters 

819 ---------- 

820 inputDatastore : `Datastore` 

821 The external `Datastore` from which to retreive the Dataset. 

822 ref : `DatasetRef` 

823 Reference to the required dataset in the input data store. 

824 

825 Returns 

826 ------- 

827 results : `list` 

828 List containing the return value from the ``put()`` to each 

829 child datastore. 

830 """ 

831 assert inputDatastore is not self # unless we want it for renames? 

832 inMemoryDataset = inputDatastore.get(ref) 

833 self.put(inMemoryDataset, ref) 

834 

835 def validateConfiguration( 

836 self, entities: Iterable[Union[DatasetRef, DatasetType, StorageClass]], logFailures: bool = False 

837 ) -> None: 

838 """Validate some of the configuration for this datastore. 

839 

840 Parameters 

841 ---------- 

842 entities : iterable of `DatasetRef`, `DatasetType`, or `StorageClass` 

843 Entities to test against this configuration. Can be differing 

844 types. 

845 logFailures : `bool`, optional 

846 If `True`, output a log message for every validation error 

847 detected. 

848 

849 Raises 

850 ------ 

851 DatastoreValidationError 

852 Raised if there is a validation problem with a configuration. 

853 All the problems are reported in a single exception. 

854 

855 Notes 

856 ----- 

857 This method checks each datastore in turn. 

858 """ 

859 

860 # Need to catch each of the datastore outputs and ensure that 

861 # all are tested. 

862 failures = [] 

863 for datastore in self.datastores: 

864 try: 

865 datastore.validateConfiguration(entities, logFailures=logFailures) 

866 except DatastoreValidationError as e: 

867 if logFailures: 867 ↛ 869line 867 didn't jump to line 869, because the condition on line 867 was never false

868 log.critical("Datastore %s failed validation", datastore.name) 

869 failures.append(f"Datastore {self.name}: {e}") 

870 

871 if failures: 

872 msg = ";\n".join(failures) 

873 raise DatastoreValidationError(msg) 

874 

875 def validateKey(self, lookupKey: LookupKey, entity: Union[DatasetRef, DatasetType, StorageClass]) -> None: 

876 # Docstring is inherited from base class 

877 failures = [] 

878 for datastore in self.datastores: 

879 try: 

880 datastore.validateKey(lookupKey, entity) 

881 except DatastoreValidationError as e: 

882 failures.append(f"Datastore {self.name}: {e}") 

883 

884 if failures: 

885 msg = ";\n".join(failures) 

886 raise DatastoreValidationError(msg) 

887 

888 def getLookupKeys(self) -> Set[LookupKey]: 

889 # Docstring is inherited from base class 

890 keys = set() 

891 for datastore in self.datastores: 

892 keys.update(datastore.getLookupKeys()) 

893 

894 keys.update(self.constraints.getLookupKeys()) 

895 for p in self.datastoreConstraints: 

896 if p is not None: 896 ↛ 897line 896 didn't jump to line 897, because the condition on line 896 was never true

897 keys.update(p.getLookupKeys()) 

898 

899 return keys 

900 

901 def needs_expanded_data_ids( 

902 self, 

903 transfer: Optional[str], 

904 entity: Optional[Union[DatasetRef, DatasetType, StorageClass]] = None, 

905 ) -> bool: 

906 # Docstring inherited. 

907 # We can't safely use `self.datastoreConstraints` with `entity` to 

908 # check whether a child datastore would even want to ingest this 

909 # dataset, because we don't want to filter out datastores that might 

910 # need an expanded data ID based in incomplete information (e.g. we 

911 # pass a StorageClass, but the constraint dispatches on DatasetType). 

912 # So we pessimistically check if any datastore would need an expanded 

913 # data ID for this transfer mode. 

914 return any(datastore.needs_expanded_data_ids(transfer) for datastore in self.datastores) 914 ↛ exitline 914 didn't finish the generator expression on line 914

915 

916 def import_records(self, data: Mapping[str, DatastoreRecordData]) -> None: 

917 # Docstring inherited from the base class. 

918 

919 for datastore in self.datastores: 

920 datastore.import_records(data) 

921 

922 def export_records(self, refs: Iterable[DatasetIdRef]) -> Mapping[str, DatastoreRecordData]: 

923 # Docstring inherited from the base class. 

924 

925 all_records: Dict[str, DatastoreRecordData] = {} 

926 

927 # Merge all sub-datastore records into one structure 

928 for datastore in self.datastores: 

929 sub_records = datastore.export_records(refs) 

930 for name, record_data in sub_records.items(): 

931 # All datastore names must be unique in a chain. 

932 if name in all_records: 932 ↛ 933line 932 didn't jump to line 933, because the condition on line 932 was never true

933 raise ValueError("Non-unique datastore name found in datastore {datastore}") 

934 all_records[name] = record_data 

935 

936 return all_records