Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24"""Chained datastore.""" 

25 

26__all__ = ("ChainedDatastore",) 

27 

28import time 

29import logging 

30import warnings 

31import itertools 

32from typing import ( 

33 TYPE_CHECKING, 

34 Any, 

35 Dict, 

36 List, 

37 Iterable, 

38 Mapping, 

39 Optional, 

40 Sequence, 

41 Set, 

42 Tuple, 

43 Union, 

44) 

45 

46from lsst.utils import doImport 

47from lsst.daf.butler import ButlerURI, Datastore, DatastoreConfig, DatasetTypeNotSupportedError, \ 

48 DatastoreValidationError, Constraints, FileDataset, DatasetRef 

49 

50if TYPE_CHECKING: 50 ↛ 51line 50 didn't jump to line 51, because the condition on line 50 was never true

51 from lsst.daf.butler import Config, DatasetType, LookupKey, StorageClass 

52 from lsst.daf.butler.registry.interfaces import DatastoreRegistryBridgeManager 

53 

54log = logging.getLogger(__name__) 

55 

56 

57class _IngestPrepData(Datastore.IngestPrepData): 

58 """Helper class for ChainedDatastore ingest implementation. 

59 

60 Parameters 

61 ---------- 

62 children : `list` of `tuple` 

63 Pairs of `Datastore`, `IngestPrepData` for all child datastores. 

64 """ 

65 def __init__(self, children: List[Tuple[Datastore, Datastore.IngestPrepData]]): 

66 super().__init__(itertools.chain.from_iterable(data.refs.values() for _, data in children)) 

67 self.children = children 

68 

69 

70class ChainedDatastore(Datastore): 

71 """Chained Datastores to allow read and writes from multiple datastores. 

72 

73 A ChainedDatastore is configured with multiple datastore configurations. 

74 A ``put()`` is always sent to each datastore. A ``get()`` 

75 operation is sent to each datastore in turn and the first datastore 

76 to return a valid dataset is used. 

77 

78 Parameters 

79 ---------- 

80 config : `DatastoreConfig` or `str` 

81 Configuration. This configuration must include a ``datastores`` field 

82 as a sequence of datastore configurations. The order in this sequence 

83 indicates the order to use for read operations. 

84 bridgeManager : `DatastoreRegistryBridgeManager` 

85 Object that manages the interface between `Registry` and datastores. 

86 butlerRoot : `str`, optional 

87 New datastore root to use to override the configuration value. This 

88 root is sent to each child datastore. 

89 

90 Notes 

91 ----- 

92 ChainedDatastore never supports `None` or `"move"` as an `ingest` transfer 

93 mode. It supports `"copy"`, `"symlink"`, `"relsymlink"` 

94 and `"hardlink"` if and only if all its child datastores do. 

95 """ 

96 

97 defaultConfigFile = "datastores/chainedDatastore.yaml" 

98 """Path to configuration defaults. Accessed within the ``configs`` resource 

99 or relative to a search path. Can be None if no defaults specified. 

100 """ 

101 

102 containerKey = "datastores" 

103 """Key to specify where child datastores are configured.""" 

104 

105 datastores: List[Datastore] 

106 """All the child datastores known to this datastore.""" 

107 

108 datastoreConstraints: Sequence[Optional[Constraints]] 

109 """Constraints to be applied to each of the child datastores.""" 

110 

111 @classmethod 

112 def setConfigRoot(cls, root: str, config: Config, full: Config, overwrite: bool = True) -> None: 

113 """Set any filesystem-dependent config options for child Datastores to 

114 be appropriate for a new empty repository with the given root. 

115 

116 Parameters 

117 ---------- 

118 root : `str` 

119 Filesystem path to the root of the data repository. 

120 config : `Config` 

121 A `Config` to update. Only the subset understood by 

122 this component will be updated. Will not expand 

123 defaults. 

124 full : `Config` 

125 A complete config with all defaults expanded that can be 

126 converted to a `DatastoreConfig`. Read-only and will not be 

127 modified by this method. 

128 Repository-specific options that should not be obtained 

129 from defaults when Butler instances are constructed 

130 should be copied from ``full`` to ``config``. 

131 overwrite : `bool`, optional 

132 If `False`, do not modify a value in ``config`` if the value 

133 already exists. Default is always to overwrite with the provided 

134 ``root``. 

135 

136 Notes 

137 ----- 

138 If a keyword is explicitly defined in the supplied ``config`` it 

139 will not be overridden by this method if ``overwrite`` is `False`. 

140 This allows explicit values set in external configs to be retained. 

141 """ 

142 

143 # Extract the part of the config we care about updating 

144 datastoreConfig = DatastoreConfig(config, mergeDefaults=False) 

145 

146 # And the subset of the full config that we can use for reference. 

147 # Do not bother with defaults because we are told this already has 

148 # them. 

149 fullDatastoreConfig = DatastoreConfig(full, mergeDefaults=False) 

150 

151 # Loop over each datastore config and pass the subsets to the 

152 # child datastores to process. 

153 

154 containerKey = cls.containerKey 

155 for idx, (child, fullChild) in enumerate(zip(datastoreConfig[containerKey], 

156 fullDatastoreConfig[containerKey])): 

157 childConfig = DatastoreConfig(child, mergeDefaults=False) 

158 fullChildConfig = DatastoreConfig(fullChild, mergeDefaults=False) 

159 datastoreClass = doImport(fullChildConfig["cls"]) 

160 newroot = "{}/{}_{}".format(root, datastoreClass.__qualname__, idx) 

161 datastoreClass.setConfigRoot(newroot, childConfig, fullChildConfig, overwrite=overwrite) 

162 

163 # Reattach to parent 

164 datastoreConfig[containerKey, idx] = childConfig 

165 

166 # Reattach modified datastore config to parent 

167 # If this has a datastore key we attach there, otherwise we assume 

168 # this information goes at the top of the config hierarchy. 

169 if DatastoreConfig.component in config: 

170 config[DatastoreConfig.component] = datastoreConfig 

171 else: 

172 config.update(datastoreConfig) 

173 

174 return 

175 

176 def __init__(self, config: Union[Config, str], bridgeManager: DatastoreRegistryBridgeManager, 

177 butlerRoot: str = None): 

178 super().__init__(config, bridgeManager) 

179 

180 # Scan for child datastores and instantiate them with the same registry 

181 self.datastores = [] 

182 for c in self.config["datastores"]: 

183 c = DatastoreConfig(c) 

184 datastoreType = doImport(c["cls"]) 

185 datastore = datastoreType(c, bridgeManager, butlerRoot=butlerRoot) 

186 log.debug("Creating child datastore %s", datastore.name) 

187 self.datastores.append(datastore) 

188 

189 # Name ourself based on our children 

190 if self.datastores: 190 ↛ 195line 190 didn't jump to line 195, because the condition on line 190 was never false

191 # We must set the names explicitly 

192 self._names = [d.name for d in self.datastores] 

193 childNames = ",".join(self.names) 

194 else: 

195 childNames = "(empty@{})".format(time.time()) 

196 self._names = [childNames] 

197 self.name = "{}[{}]".format(type(self).__qualname__, childNames) 

198 

199 # We declare we are ephemeral if all our child datastores declare 

200 # they are ephemeral 

201 isEphemeral = True 

202 for d in self.datastores: 

203 if not d.isEphemeral: 

204 isEphemeral = False 

205 break 

206 self.isEphemeral = isEphemeral 

207 

208 # per-datastore override constraints 

209 if "datastore_constraints" in self.config: 

210 overrides = self.config["datastore_constraints"] 

211 

212 if len(overrides) != len(self.datastores): 212 ↛ 213line 212 didn't jump to line 213, because the condition on line 212 was never true

213 raise DatastoreValidationError(f"Number of registered datastores ({len(self.datastores)})" 

214 " differs from number of constraints overrides" 

215 f" {len(overrides)}") 

216 

217 self.datastoreConstraints = [Constraints(c.get("constraints"), universe=bridgeManager.universe) 

218 for c in overrides] 

219 

220 else: 

221 self.datastoreConstraints = (None,) * len(self.datastores) 

222 

223 log.debug("Created %s (%s)", self.name, ("ephemeral" if self.isEphemeral else "permanent")) 

224 

225 @property 

226 def names(self) -> Tuple[str, ...]: 

227 return tuple(self._names) 

228 

229 def __str__(self) -> str: 

230 chainName = ", ".join(str(ds) for ds in self.datastores) 

231 return chainName 

232 

233 def knows(self, ref: DatasetRef) -> bool: 

234 """Check if the dataset is known to any of the datastores. 

235 

236 Does not check for existence of any artifact. 

237 

238 Parameters 

239 ---------- 

240 ref : `DatasetRef` 

241 Reference to the required dataset. 

242 

243 Returns 

244 ------- 

245 exists : `bool` 

246 `True` if the dataset is known to the datastore. 

247 """ 

248 for datastore in self.datastores: 

249 if datastore.knows(ref): 

250 log.debug("%s known to datastore %s", ref, datastore.name) 

251 return True 

252 return False 

253 

254 def exists(self, ref: DatasetRef) -> bool: 

255 """Check if the dataset exists in one of the datastores. 

256 

257 Parameters 

258 ---------- 

259 ref : `DatasetRef` 

260 Reference to the required dataset. 

261 

262 Returns 

263 ------- 

264 exists : `bool` 

265 `True` if the entity exists in one of the child datastores. 

266 """ 

267 for datastore in self.datastores: 

268 if datastore.exists(ref): 

269 log.debug("Found %s in datastore %s", ref, datastore.name) 

270 return True 

271 return False 

272 

273 def get(self, ref: DatasetRef, parameters: Optional[Mapping[str, Any]] = None) -> Any: 

274 """Load an InMemoryDataset from the store. 

275 

276 The dataset is returned from the first datastore that has 

277 the dataset. 

278 

279 Parameters 

280 ---------- 

281 ref : `DatasetRef` 

282 Reference to the required Dataset. 

283 parameters : `dict` 

284 `StorageClass`-specific parameters that specify, for example, 

285 a slice of the dataset to be loaded. 

286 

287 Returns 

288 ------- 

289 inMemoryDataset : `object` 

290 Requested dataset or slice thereof as an InMemoryDataset. 

291 

292 Raises 

293 ------ 

294 FileNotFoundError 

295 Requested dataset can not be retrieved. 

296 TypeError 

297 Return value from formatter has unexpected type. 

298 ValueError 

299 Formatter failed to process the dataset. 

300 """ 

301 

302 for datastore in self.datastores: 

303 try: 

304 inMemoryObject = datastore.get(ref, parameters) 

305 log.debug("Found dataset %s in datastore %s", ref, datastore.name) 

306 return inMemoryObject 

307 except FileNotFoundError: 

308 pass 

309 

310 raise FileNotFoundError("Dataset {} could not be found in any of the datastores".format(ref)) 

311 

312 def put(self, inMemoryDataset: Any, ref: DatasetRef) -> None: 

313 """Write a InMemoryDataset with a given `DatasetRef` to each 

314 datastore. 

315 

316 The put() to child datastores can fail with 

317 `DatasetTypeNotSupportedError`. The put() for this datastore will be 

318 deemed to have succeeded so long as at least one child datastore 

319 accepted the inMemoryDataset. 

320 

321 Parameters 

322 ---------- 

323 inMemoryDataset : `object` 

324 The dataset to store. 

325 ref : `DatasetRef` 

326 Reference to the associated Dataset. 

327 

328 Raises 

329 ------ 

330 TypeError 

331 Supplied object and storage class are inconsistent. 

332 DatasetTypeNotSupportedError 

333 All datastores reported `DatasetTypeNotSupportedError`. 

334 """ 

335 log.debug("Put %s", ref) 

336 

337 # Confirm that we can accept this dataset 

338 if not self.constraints.isAcceptable(ref): 

339 # Raise rather than use boolean return value. 

340 raise DatasetTypeNotSupportedError(f"Dataset {ref} has been rejected by this datastore via" 

341 " configuration.") 

342 

343 isPermanent = False 

344 nsuccess = 0 

345 npermanent = 0 

346 nephemeral = 0 

347 for datastore, constraints in zip(self.datastores, self.datastoreConstraints): 

348 if constraints is not None and not constraints.isAcceptable(ref): 

349 log.debug("Datastore %s skipping put via configuration for ref %s", 

350 datastore.name, ref) 

351 continue 

352 

353 if datastore.isEphemeral: 

354 nephemeral += 1 

355 else: 

356 npermanent += 1 

357 try: 

358 datastore.put(inMemoryDataset, ref) 

359 nsuccess += 1 

360 if not datastore.isEphemeral: 

361 isPermanent = True 

362 except DatasetTypeNotSupportedError: 

363 pass 

364 

365 if nsuccess == 0: 

366 raise DatasetTypeNotSupportedError(f"None of the chained datastores supported ref {ref}") 

367 

368 if not isPermanent and npermanent > 0: 368 ↛ 369line 368 didn't jump to line 369, because the condition on line 368 was never true

369 warnings.warn(f"Put of {ref} only succeeded in ephemeral databases", stacklevel=2) 

370 

371 if self._transaction is not None: 

372 self._transaction.registerUndo('put', self.remove, ref) 

373 

374 def _overrideTransferMode(self, *datasets: Any, transfer: Optional[str] = None) -> Optional[str]: 

375 # Docstring inherited from base class. 

376 if transfer != "auto": 

377 return transfer 

378 # Ask each datastore what they think auto means 

379 transfers = {d._overrideTransferMode(*datasets, transfer=transfer) for d in self.datastores} 

380 

381 # Remove any untranslated "auto" values 

382 transfers.discard(transfer) 

383 

384 if len(transfers) == 1: 384 ↛ 385line 384 didn't jump to line 385, because the condition on line 384 was never true

385 return transfers.pop() 

386 if not transfers: 386 ↛ 390line 386 didn't jump to line 390, because the condition on line 386 was never false

387 # Everything reported "auto" 

388 return transfer 

389 

390 raise RuntimeError("Chained datastore does not yet support different transfer modes" 

391 f" from 'auto' in each child datastore (wanted {transfers})") 

392 

393 def _prepIngest(self, *datasets: FileDataset, transfer: Optional[str] = None) -> _IngestPrepData: 

394 # Docstring inherited from Datastore._prepIngest. 

395 if transfer is None or transfer == "move": 

396 raise NotImplementedError("ChainedDatastore does not support transfer=None or transfer='move'.") 

397 

398 def isDatasetAcceptable(dataset: FileDataset, *, name: str, constraints: Constraints) -> bool: 

399 acceptable = [ref for ref in dataset.refs if constraints.isAcceptable(ref)] 

400 if not acceptable: 

401 log.debug("Datastore %s skipping ingest via configuration for refs %s", 

402 name, ", ".join(str(ref) for ref in dataset.refs)) 

403 return False 

404 else: 

405 return True 

406 

407 # Filter down to just datasets the chained datastore's own 

408 # configuration accepts. 

409 okForParent: List[FileDataset] = [dataset for dataset in datasets 

410 if isDatasetAcceptable(dataset, name=self.name, 

411 constraints=self.constraints)] 

412 

413 # Iterate over nested datastores and call _prepIngest on each. 

414 # Save the results to a list: 

415 children: List[Tuple[Datastore, Datastore.IngestPrepData]] = [] 

416 # ...and remember whether all of the failures are due to 

417 # NotImplementedError being raised. 

418 allFailuresAreNotImplementedError = True 

419 for datastore, constraints in zip(self.datastores, self.datastoreConstraints): 

420 okForChild: List[FileDataset] 

421 if constraints is not None: 

422 okForChild = [dataset for dataset in okForParent 

423 if isDatasetAcceptable(dataset, name=datastore.name, 

424 constraints=constraints)] 

425 else: 

426 okForChild = okForParent 

427 try: 

428 prepDataForChild = datastore._prepIngest(*okForChild, transfer=transfer) 

429 except NotImplementedError: 

430 log.debug("Skipping ingest for datastore %s because transfer " 

431 "mode %s is not supported.", datastore.name, transfer) 

432 continue 

433 allFailuresAreNotImplementedError = False 

434 children.append((datastore, prepDataForChild)) 

435 if allFailuresAreNotImplementedError: 

436 raise NotImplementedError(f"No child datastore supports transfer mode {transfer}.") 

437 return _IngestPrepData(children=children) 

438 

439 def _finishIngest(self, prepData: _IngestPrepData, *, transfer: Optional[str] = None) -> None: 

440 # Docstring inherited from Datastore._finishIngest. 

441 for datastore, prepDataForChild in prepData.children: 

442 datastore._finishIngest(prepDataForChild, transfer=transfer) 

443 

444 def getURIs(self, ref: DatasetRef, 

445 predict: bool = False) -> Tuple[Optional[ButlerURI], Dict[str, ButlerURI]]: 

446 """Return URIs associated with dataset. 

447 

448 Parameters 

449 ---------- 

450 ref : `DatasetRef` 

451 Reference to the required dataset. 

452 predict : `bool`, optional 

453 If the datastore does not know about the dataset, should it 

454 return a predicted URI or not? 

455 

456 Returns 

457 ------- 

458 primary : `ButlerURI` 

459 The URI to the primary artifact associated with this dataset. 

460 If the dataset was disassembled within the datastore this 

461 may be `None`. 

462 components : `dict` 

463 URIs to any components associated with the dataset artifact. 

464 Can be empty if there are no components. 

465 

466 Notes 

467 ----- 

468 The returned URI is from the first datastore in the list that has 

469 the dataset with preference given to the first dataset coming from 

470 a permanent datastore. If no datastores have the dataset and prediction 

471 is allowed, the predicted URI for the first datastore in the list will 

472 be returned. 

473 """ 

474 DatastoreURIs = Tuple[Optional[ButlerURI], Dict[str, ButlerURI]] 

475 log.debug("Requesting URIs for %s", ref) 

476 predictedUri: Optional[DatastoreURIs] = None 

477 predictedEphemeralUri: Optional[DatastoreURIs] = None 

478 firstEphemeralUri: Optional[DatastoreURIs] = None 

479 for datastore in self.datastores: 

480 if datastore.exists(ref): 

481 if not datastore.isEphemeral: 

482 uri = datastore.getURIs(ref) 

483 log.debug("Retrieved non-ephemeral URI: %s", uri) 

484 return uri 

485 elif not firstEphemeralUri: 

486 firstEphemeralUri = datastore.getURIs(ref) 

487 elif predict: 

488 if not predictedUri and not datastore.isEphemeral: 

489 predictedUri = datastore.getURIs(ref, predict) 

490 elif not predictedEphemeralUri and datastore.isEphemeral: 

491 predictedEphemeralUri = datastore.getURIs(ref, predict) 

492 

493 if firstEphemeralUri: 

494 log.debug("Retrieved ephemeral URI: %s", firstEphemeralUri) 

495 return firstEphemeralUri 

496 

497 if predictedUri: 

498 log.debug("Retrieved predicted URI: %s", predictedUri) 

499 return predictedUri 

500 

501 if predictedEphemeralUri: 

502 log.debug("Retrieved predicted ephemeral URI: %s", predictedEphemeralUri) 

503 return predictedEphemeralUri 

504 

505 raise FileNotFoundError("Dataset {} not in any datastore".format(ref)) 

506 

507 def getURI(self, ref: DatasetRef, predict: bool = False) -> ButlerURI: 

508 """URI to the Dataset. 

509 

510 The returned URI is from the first datastore in the list that has 

511 the dataset with preference given to the first dataset coming from 

512 a permanent datastore. If no datastores have the dataset and prediction 

513 is allowed, the predicted URI for the first datastore in the list will 

514 be returned. 

515 

516 Parameters 

517 ---------- 

518 ref : `DatasetRef` 

519 Reference to the required Dataset. 

520 predict : `bool` 

521 If `True`, allow URIs to be returned of datasets that have not 

522 been written. 

523 

524 Returns 

525 ------- 

526 uri : `ButlerURI` 

527 URI pointing to the dataset within the datastore. If the 

528 dataset does not exist in the datastore, and if ``predict`` is 

529 `True`, the URI will be a prediction and will include a URI 

530 fragment "#predicted". 

531 

532 Notes 

533 ----- 

534 If the datastore does not have entities that relate well 

535 to the concept of a URI the returned URI string will be 

536 descriptive. The returned URI is not guaranteed to be obtainable. 

537 

538 Raises 

539 ------ 

540 FileNotFoundError 

541 A URI has been requested for a dataset that does not exist and 

542 guessing is not allowed. 

543 RuntimeError 

544 Raised if a request is made for a single URI but multiple URIs 

545 are associated with this dataset. 

546 """ 

547 log.debug("Requesting URI for %s", ref) 

548 primary, components = self.getURIs(ref, predict) 

549 if primary is None or components: 549 ↛ 550line 549 didn't jump to line 550, because the condition on line 549 was never true

550 raise RuntimeError(f"Dataset ({ref}) includes distinct URIs for components. " 

551 "Use Dataastore.getURIs() instead.") 

552 return primary 

553 

554 def retrieveArtifacts(self, refs: Iterable[DatasetRef], 

555 destination: ButlerURI, transfer: str = "auto", 

556 preserve_path: bool = True, 

557 overwrite: bool = False) -> List[ButlerURI]: 

558 """Retrieve the file artifacts associated with the supplied refs. 

559 

560 Parameters 

561 ---------- 

562 refs : iterable of `DatasetRef` 

563 The datasets for which file artifacts are to be retrieved. 

564 A single ref can result in multiple files. The refs must 

565 be resolved. 

566 destination : `ButlerURI` 

567 Location to write the file artifacts. 

568 transfer : `str`, optional 

569 Method to use to transfer the artifacts. Must be one of the options 

570 supported by `ButlerURI.transfer_from()`. "move" is not allowed. 

571 preserve_path : `bool`, optional 

572 If `True` the full path of the file artifact within the datastore 

573 is preserved. If `False` the final file component of the path 

574 is used. 

575 overwrite : `bool`, optional 

576 If `True` allow transfers to overwrite existing files at the 

577 destination. 

578 

579 Returns 

580 ------- 

581 targets : `list` of `ButlerURI` 

582 URIs of file artifacts in destination location. Order is not 

583 preserved. 

584 """ 

585 if not destination.isdir(): 585 ↛ 586line 585 didn't jump to line 586, because the condition on line 585 was never true

586 raise ValueError(f"Destination location must refer to a directory. Given {destination}") 

587 

588 # Using getURIs is not feasible since it becomes difficult to 

589 # determine the path within the datastore later on. For now 

590 # follow getURIs implementation approach. 

591 

592 pending = set(refs) 

593 

594 # There is a question as to whether an exception should be raised 

595 # early if some of the refs are missing, or whether files should be 

596 # transferred until a problem is hit. Prefer to complain up front. 

597 # Use the datastore integer as primary key. 

598 grouped_by_datastore: Dict[int, Set[DatasetRef]] = {} 

599 

600 for number, datastore in enumerate(self.datastores): 

601 if datastore.isEphemeral: 

602 # In the future we will want to distinguish in-memory from 

603 # caching datastore since using an on-disk local 

604 # cache is exactly what we should be doing. 

605 continue 

606 datastore_refs = {ref for ref in pending if datastore.exists(ref)} 

607 

608 if datastore_refs: 

609 grouped_by_datastore[number] = datastore_refs 

610 

611 # Remove these from the pending list so that we do not bother 

612 # looking for them any more. 

613 pending = pending - datastore_refs 

614 

615 if pending: 615 ↛ 616line 615 didn't jump to line 616, because the condition on line 615 was never true

616 raise RuntimeError(f"Some datasets were not found in any datastores: {pending}") 

617 

618 # Now do the transfer. 

619 targets: List[ButlerURI] = [] 

620 for number, datastore_refs in grouped_by_datastore.items(): 

621 targets.extend(self.datastores[number].retrieveArtifacts(datastore_refs, destination, 

622 transfer=transfer, 

623 preserve_path=preserve_path, 

624 overwrite=overwrite)) 

625 

626 return targets 

627 

628 def remove(self, ref: DatasetRef) -> None: 

629 """Indicate to the datastore that a dataset can be removed. 

630 

631 The dataset will be removed from each datastore. The dataset is 

632 not required to exist in every child datastore. 

633 

634 Parameters 

635 ---------- 

636 ref : `DatasetRef` 

637 Reference to the required dataset. 

638 

639 Raises 

640 ------ 

641 FileNotFoundError 

642 Attempt to remove a dataset that does not exist. Raised if none 

643 of the child datastores removed the dataset. 

644 """ 

645 log.debug(f"Removing {ref}") 

646 self.trash(ref, ignore_errors=False) 

647 self.emptyTrash(ignore_errors=False) 

648 

649 def forget(self, refs: Iterable[DatasetRef]) -> None: 

650 for datastore in tuple(self.datastores): 

651 datastore.forget(refs) 

652 

653 def trash(self, ref: Union[DatasetRef, Iterable[DatasetRef]], ignore_errors: bool = True) -> None: 

654 if isinstance(ref, DatasetRef): 

655 ref_label = str(ref) 

656 else: 

657 ref_label = "bulk datasets" 

658 

659 log.debug("Trashing %s", ref_label) 

660 

661 counter = 0 

662 for datastore in self.datastores: 

663 try: 

664 datastore.trash(ref, ignore_errors=ignore_errors) 

665 counter += 1 

666 except FileNotFoundError: 

667 pass 

668 

669 if counter == 0: 

670 err_msg = f"Could not mark for removal from any child datastore: {ref_label}" 

671 if ignore_errors: 671 ↛ 672line 671 didn't jump to line 672, because the condition on line 671 was never true

672 log.warning(err_msg) 

673 else: 

674 raise FileNotFoundError(err_msg) 

675 

676 def emptyTrash(self, ignore_errors: bool = True) -> None: 

677 for datastore in self.datastores: 

678 datastore.emptyTrash(ignore_errors=ignore_errors) 

679 

680 def transfer(self, inputDatastore: Datastore, ref: DatasetRef) -> None: 

681 """Retrieve a dataset from an input `Datastore`, 

682 and store the result in this `Datastore`. 

683 

684 Parameters 

685 ---------- 

686 inputDatastore : `Datastore` 

687 The external `Datastore` from which to retreive the Dataset. 

688 ref : `DatasetRef` 

689 Reference to the required dataset in the input data store. 

690 

691 Returns 

692 ------- 

693 results : `list` 

694 List containing the return value from the ``put()`` to each 

695 child datastore. 

696 """ 

697 assert inputDatastore is not self # unless we want it for renames? 

698 inMemoryDataset = inputDatastore.get(ref) 

699 self.put(inMemoryDataset, ref) 

700 

701 def validateConfiguration(self, entities: Iterable[Union[DatasetRef, DatasetType, StorageClass]], 

702 logFailures: bool = False) -> None: 

703 """Validate some of the configuration for this datastore. 

704 

705 Parameters 

706 ---------- 

707 entities : iterable of `DatasetRef`, `DatasetType`, or `StorageClass` 

708 Entities to test against this configuration. Can be differing 

709 types. 

710 logFailures : `bool`, optional 

711 If `True`, output a log message for every validation error 

712 detected. 

713 

714 Raises 

715 ------ 

716 DatastoreValidationError 

717 Raised if there is a validation problem with a configuration. 

718 All the problems are reported in a single exception. 

719 

720 Notes 

721 ----- 

722 This method checks each datastore in turn. 

723 """ 

724 

725 # Need to catch each of the datastore outputs and ensure that 

726 # all are tested. 

727 failures = [] 

728 for datastore in self.datastores: 

729 try: 

730 datastore.validateConfiguration(entities, logFailures=logFailures) 

731 except DatastoreValidationError as e: 

732 if logFailures: 732 ↛ 734line 732 didn't jump to line 734, because the condition on line 732 was never false

733 log.critical("Datastore %s failed validation", datastore.name) 

734 failures.append(f"Datastore {self.name}: {e}") 

735 

736 if failures: 

737 msg = ";\n".join(failures) 

738 raise DatastoreValidationError(msg) 

739 

740 def validateKey(self, lookupKey: LookupKey, 

741 entity: Union[DatasetRef, DatasetType, StorageClass]) -> None: 

742 # Docstring is inherited from base class 

743 failures = [] 

744 for datastore in self.datastores: 

745 try: 

746 datastore.validateKey(lookupKey, entity) 

747 except DatastoreValidationError as e: 

748 failures.append(f"Datastore {self.name}: {e}") 

749 

750 if failures: 

751 msg = ";\n".join(failures) 

752 raise DatastoreValidationError(msg) 

753 

754 def getLookupKeys(self) -> Set[LookupKey]: 

755 # Docstring is inherited from base class 

756 keys = set() 

757 for datastore in self.datastores: 

758 keys.update(datastore.getLookupKeys()) 

759 

760 keys.update(self.constraints.getLookupKeys()) 

761 for p in self.datastoreConstraints: 

762 if p is not None: 762 ↛ 763line 762 didn't jump to line 763, because the condition on line 762 was never true

763 keys.update(p.getLookupKeys()) 

764 

765 return keys 

766 

767 def needs_expanded_data_ids( 

768 self, 

769 transfer: Optional[str], 

770 entity: Optional[Union[DatasetRef, DatasetType, StorageClass]] = None, 

771 ) -> bool: 

772 # Docstring inherited. 

773 # We can't safely use `self.datastoreConstraints` with `entity` to 

774 # check whether a child datastore would even want to ingest this 

775 # dataset, because we don't want to filter out datastores that might 

776 # need an expanded data ID based in incomplete information (e.g. we 

777 # pass a StorageClass, but the constraint dispatches on DatasetType). 

778 # So we pessimistically check if any datastore would need an expanded 

779 # data ID for this transfer mode. 

780 return any(datastore.needs_expanded_data_ids(transfer) for datastore in self.datastores) 780 ↛ exitline 780 didn't finish the generator expression on line 780