Coverage for python/lsst/daf/butler/core/datastore.py: 45%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

200 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Support for generic data stores.""" 

23 

24from __future__ import annotations 

25 

26__all__ = ("DatastoreConfig", "Datastore", "DatastoreValidationError") 

27 

28import contextlib 

29import logging 

30from abc import ABCMeta, abstractmethod 

31from collections import defaultdict 

32from dataclasses import dataclass 

33from typing import ( 

34 TYPE_CHECKING, 

35 Any, 

36 Callable, 

37 ClassVar, 

38 Dict, 

39 Iterable, 

40 Iterator, 

41 List, 

42 Mapping, 

43 Optional, 

44 Set, 

45 Tuple, 

46 Type, 

47 Union, 

48) 

49 

50from lsst.utils import doImportType 

51 

52from .config import Config, ConfigSubset 

53from .constraints import Constraints 

54from .exceptions import DatasetTypeNotSupportedError, ValidationError 

55from .fileDataset import FileDataset 

56from .storageClass import StorageClassFactory 

57 

58if TYPE_CHECKING: 58 ↛ 59line 58 didn't jump to line 59, because the condition on line 58 was never true

59 from lsst.resources import ResourcePath, ResourcePathExpression 

60 

61 from ..registry.interfaces import DatastoreRegistryBridgeManager 

62 from .configSupport import LookupKey 

63 from .datasets import DatasetRef, DatasetType 

64 from .storageClass import StorageClass 

65 

66 

67class DatastoreConfig(ConfigSubset): 

68 """Configuration for Datastores.""" 

69 

70 component = "datastore" 

71 requiredKeys = ("cls",) 

72 defaultConfigFile = "datastore.yaml" 

73 

74 

75class DatastoreValidationError(ValidationError): 

76 """There is a problem with the Datastore configuration.""" 

77 

78 pass 

79 

80 

81@dataclass(frozen=True) 

82class Event: 

83 __slots__ = {"name", "undoFunc", "args", "kwargs"} 

84 name: str 

85 undoFunc: Callable 

86 args: tuple 

87 kwargs: dict 

88 

89 

90class IngestPrepData: 

91 """A helper base class for `Datastore` ingest implementations. 

92 

93 Datastore implementations will generally need a custom implementation of 

94 this class. 

95 

96 Should be accessed as ``Datastore.IngestPrepData`` instead of via direct 

97 import. 

98 

99 Parameters 

100 ---------- 

101 refs : iterable of `DatasetRef` 

102 References for the datasets that can be ingested by this datastore. 

103 """ 

104 

105 def __init__(self, refs: Iterable[DatasetRef]): 

106 self.refs = {ref.id: ref for ref in refs} 

107 

108 

109class DatastoreTransaction: 

110 """Keeps a log of `Datastore` activity and allow rollback. 

111 

112 Parameters 

113 ---------- 

114 parent : `DatastoreTransaction`, optional 

115 The parent transaction (if any) 

116 """ 

117 

118 Event: ClassVar[Type] = Event 

119 

120 parent: Optional["DatastoreTransaction"] 

121 """The parent transaction. (`DatastoreTransaction`, optional)""" 

122 

123 def __init__(self, parent: Optional[DatastoreTransaction] = None): 

124 self.parent = parent 

125 self._log: List[Event] = [] 

126 

127 def registerUndo(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> None: 

128 """Register event with undo function. 

129 

130 Parameters 

131 ---------- 

132 name : `str` 

133 Name of the event. 

134 undoFunc : func 

135 Function to undo this event. 

136 args : `tuple` 

137 Positional arguments to `undoFunc`. 

138 **kwargs 

139 Keyword arguments to `undoFunc`. 

140 """ 

141 self._log.append(self.Event(name, undoFunc, args, kwargs)) 

142 

143 @contextlib.contextmanager 

144 def undoWith(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> Iterator[None]: 

145 """Register undo function if nested operation succeeds. 

146 

147 Calls `registerUndo`. 

148 

149 This can be used to wrap individual undo-able statements within a 

150 DatastoreTransaction block. Multiple statements that can fail 

151 separately should not be part of the same `undoWith` block. 

152 

153 All arguments are forwarded directly to `registerUndo`. 

154 """ 

155 try: 

156 yield None 

157 except BaseException: 

158 raise 

159 else: 

160 self.registerUndo(name, undoFunc, *args, **kwargs) 

161 

162 def rollback(self) -> None: 

163 """Roll back all events in this transaction.""" 

164 log = logging.getLogger(__name__) 

165 while self._log: 

166 ev = self._log.pop() 

167 try: 

168 log.debug( 

169 "Rolling back transaction: %s: %s(%s,%s)", 

170 ev.name, 

171 ev.undoFunc, 

172 ",".join(str(a) for a in ev.args), 

173 ",".join(f"{k}={v}" for k, v in ev.kwargs.items()), 

174 ) 

175 except Exception: 

176 # In case we had a problem in stringification of arguments 

177 log.warning("Rolling back transaction: %s", ev.name) 

178 try: 

179 ev.undoFunc(*ev.args, **ev.kwargs) 

180 except BaseException as e: 

181 # Deliberately swallow error that may occur in unrolling 

182 log.warning("Exception: %s caught while unrolling: %s", e, ev.name) 

183 pass 

184 

185 def commit(self) -> None: 

186 """Commit this transaction.""" 

187 if self.parent is None: 

188 # Just forget about the events, they have already happened. 

189 return 

190 else: 

191 # We may still want to events from this transaction as part of 

192 # the parent. 

193 self.parent._log.extend(self._log) 

194 

195 

196class Datastore(metaclass=ABCMeta): 

197 """Datastore interface. 

198 

199 Parameters 

200 ---------- 

201 config : `DatastoreConfig` or `str` 

202 Load configuration either from an existing config instance or by 

203 referring to a configuration file. 

204 bridgeManager : `DatastoreRegistryBridgeManager` 

205 Object that manages the interface between `Registry` and datastores. 

206 butlerRoot : `str`, optional 

207 New datastore root to use to override the configuration value. 

208 """ 

209 

210 defaultConfigFile: ClassVar[Optional[str]] = None 

211 """Path to configuration defaults. Accessed within the ``config`` resource 

212 or relative to a search path. Can be None if no defaults specified. 

213 """ 

214 

215 containerKey: ClassVar[Optional[str]] = None 

216 """Name of the key containing a list of subconfigurations that also 

217 need to be merged with defaults and will likely use different Python 

218 datastore classes (but all using DatastoreConfig). Assumed to be a 

219 list of configurations that can be represented in a DatastoreConfig 

220 and containing a "cls" definition. None indicates that no containers 

221 are expected in this Datastore.""" 

222 

223 isEphemeral: bool = False 

224 """Indicate whether this Datastore is ephemeral or not. An ephemeral 

225 datastore is one where the contents of the datastore will not exist 

226 across process restarts. This value can change per-instance.""" 

227 

228 config: DatastoreConfig 

229 """Configuration used to create Datastore.""" 

230 

231 name: str 

232 """Label associated with this Datastore.""" 

233 

234 storageClassFactory: StorageClassFactory 

235 """Factory for creating storage class instances from name.""" 

236 

237 constraints: Constraints 

238 """Constraints to apply when putting datasets into the datastore.""" 

239 

240 # MyPy does not like for this to be annotated as any kind of type, because 

241 # it can't do static checking on type variables that can change at runtime. 

242 IngestPrepData: ClassVar[Any] = IngestPrepData 

243 """Helper base class for ingest implementations. 

244 """ 

245 

246 @classmethod 

247 @abstractmethod 

248 def setConfigRoot(cls, root: str, config: Config, full: Config, overwrite: bool = True) -> None: 

249 """Set filesystem-dependent config options for this datastore. 

250 

251 The options will be appropriate for a new empty repository with the 

252 given root. 

253 

254 Parameters 

255 ---------- 

256 root : `str` 

257 Filesystem path to the root of the data repository. 

258 config : `Config` 

259 A `Config` to update. Only the subset understood by 

260 this component will be updated. Will not expand 

261 defaults. 

262 full : `Config` 

263 A complete config with all defaults expanded that can be 

264 converted to a `DatastoreConfig`. Read-only and will not be 

265 modified by this method. 

266 Repository-specific options that should not be obtained 

267 from defaults when Butler instances are constructed 

268 should be copied from ``full`` to ``config``. 

269 overwrite : `bool`, optional 

270 If `False`, do not modify a value in ``config`` if the value 

271 already exists. Default is always to overwrite with the provided 

272 ``root``. 

273 

274 Notes 

275 ----- 

276 If a keyword is explicitly defined in the supplied ``config`` it 

277 will not be overridden by this method if ``overwrite`` is `False`. 

278 This allows explicit values set in external configs to be retained. 

279 """ 

280 raise NotImplementedError() 

281 

282 @staticmethod 

283 def fromConfig( 

284 config: Config, 

285 bridgeManager: DatastoreRegistryBridgeManager, 

286 butlerRoot: Optional[ResourcePathExpression] = None, 

287 ) -> "Datastore": 

288 """Create datastore from type specified in config file. 

289 

290 Parameters 

291 ---------- 

292 config : `Config` 

293 Configuration instance. 

294 bridgeManager : `DatastoreRegistryBridgeManager` 

295 Object that manages the interface between `Registry` and 

296 datastores. 

297 butlerRoot : `str`, optional 

298 Butler root directory. 

299 """ 

300 cls = doImportType(config["datastore", "cls"]) 

301 if not issubclass(cls, Datastore): 

302 raise TypeError(f"Imported child class {config['datastore', 'cls']} is not a Datastore") 

303 return cls(config=config, bridgeManager=bridgeManager, butlerRoot=butlerRoot) 

304 

305 def __init__( 

306 self, 

307 config: Union[Config, str], 

308 bridgeManager: DatastoreRegistryBridgeManager, 

309 butlerRoot: Optional[ResourcePathExpression] = None, 

310 ): 

311 self.config = DatastoreConfig(config) 

312 self.name = "ABCDataStore" 

313 self._transaction: Optional[DatastoreTransaction] = None 

314 

315 # All Datastores need storage classes and constraints 

316 self.storageClassFactory = StorageClassFactory() 

317 

318 # And read the constraints list 

319 constraintsConfig = self.config.get("constraints") 

320 self.constraints = Constraints(constraintsConfig, universe=bridgeManager.universe) 

321 

322 def __str__(self) -> str: 

323 return self.name 

324 

325 def __repr__(self) -> str: 

326 return self.name 

327 

328 @property 

329 def names(self) -> Tuple[str, ...]: 

330 """Names associated with this datastore returned as a list. 

331 

332 Can be different to ``name`` for a chaining datastore. 

333 """ 

334 # Default implementation returns solely the name itself 

335 return (self.name,) 

336 

337 @contextlib.contextmanager 

338 def transaction(self) -> Iterator[DatastoreTransaction]: 

339 """Context manager supporting `Datastore` transactions. 

340 

341 Transactions can be nested, and are to be used in combination with 

342 `Registry.transaction`. 

343 """ 

344 self._transaction = DatastoreTransaction(self._transaction) 

345 try: 

346 yield self._transaction 

347 except BaseException: 

348 self._transaction.rollback() 

349 raise 

350 else: 

351 self._transaction.commit() 

352 self._transaction = self._transaction.parent 

353 

354 @abstractmethod 

355 def knows(self, ref: DatasetRef) -> bool: 

356 """Check if the dataset is known to the datastore. 

357 

358 Does not check for existence of any artifact. 

359 

360 Parameters 

361 ---------- 

362 ref : `DatasetRef` 

363 Reference to the required dataset. 

364 

365 Returns 

366 ------- 

367 exists : `bool` 

368 `True` if the dataset is known to the datastore. 

369 """ 

370 raise NotImplementedError() 

371 

372 def mexists( 

373 self, refs: Iterable[DatasetRef], artifact_existence: Optional[Dict[ResourcePath, bool]] = None 

374 ) -> Dict[DatasetRef, bool]: 

375 """Check the existence of multiple datasets at once. 

376 

377 Parameters 

378 ---------- 

379 refs : iterable of `DatasetRef` 

380 The datasets to be checked. 

381 artifact_existence : `dict` [`lsst.resources.ResourcePath`, `bool`] 

382 Optional mapping of datastore artifact to existence. Updated by 

383 this method with details of all artifacts tested. Can be `None` 

384 if the caller is not interested. 

385 

386 Returns 

387 ------- 

388 existence : `dict` of [`DatasetRef`, `bool`] 

389 Mapping from dataset to boolean indicating existence. 

390 """ 

391 existence: Dict[DatasetRef, bool] = {} 

392 # Non-optimized default. 

393 for ref in refs: 

394 existence[ref] = self.exists(ref) 

395 return existence 

396 

397 @abstractmethod 

398 def exists(self, datasetRef: DatasetRef) -> bool: 

399 """Check if the dataset exists in the datastore. 

400 

401 Parameters 

402 ---------- 

403 datasetRef : `DatasetRef` 

404 Reference to the required dataset. 

405 

406 Returns 

407 ------- 

408 exists : `bool` 

409 `True` if the entity exists in the `Datastore`. 

410 """ 

411 raise NotImplementedError("Must be implemented by subclass") 

412 

413 @abstractmethod 

414 def get(self, datasetRef: DatasetRef, parameters: Mapping[str, Any] = None) -> Any: 

415 """Load an `InMemoryDataset` from the store. 

416 

417 Parameters 

418 ---------- 

419 datasetRef : `DatasetRef` 

420 Reference to the required Dataset. 

421 parameters : `dict` 

422 `StorageClass`-specific parameters that specify a slice of the 

423 Dataset to be loaded. 

424 

425 Returns 

426 ------- 

427 inMemoryDataset : `object` 

428 Requested Dataset or slice thereof as an InMemoryDataset. 

429 """ 

430 raise NotImplementedError("Must be implemented by subclass") 

431 

432 @abstractmethod 

433 def put(self, inMemoryDataset: Any, datasetRef: DatasetRef) -> None: 

434 """Write a `InMemoryDataset` with a given `DatasetRef` to the store. 

435 

436 Parameters 

437 ---------- 

438 inMemoryDataset : `object` 

439 The Dataset to store. 

440 datasetRef : `DatasetRef` 

441 Reference to the associated Dataset. 

442 """ 

443 raise NotImplementedError("Must be implemented by subclass") 

444 

445 def _overrideTransferMode(self, *datasets: FileDataset, transfer: Optional[str] = None) -> Optional[str]: 

446 """Allow ingest transfer mode to be defaulted based on datasets. 

447 

448 Parameters 

449 ---------- 

450 datasets : `FileDataset` 

451 Each positional argument is a struct containing information about 

452 a file to be ingested, including its path (either absolute or 

453 relative to the datastore root, if applicable), a complete 

454 `DatasetRef` (with ``dataset_id not None``), and optionally a 

455 formatter class or its fully-qualified string name. If a formatter 

456 is not provided, this method should populate that attribute with 

457 the formatter the datastore would use for `put`. Subclasses are 

458 also permitted to modify the path attribute (typically to put it 

459 in what the datastore considers its standard form). 

460 transfer : `str`, optional 

461 How (and whether) the dataset should be added to the datastore. 

462 See `ingest` for details of transfer modes. 

463 

464 Returns 

465 ------- 

466 newTransfer : `str` 

467 Transfer mode to use. Will be identical to the supplied transfer 

468 mode unless "auto" is used. 

469 """ 

470 if transfer != "auto": 

471 return transfer 

472 raise RuntimeError(f"{transfer} is not allowed without specialization.") 

473 

474 def _prepIngest(self, *datasets: FileDataset, transfer: Optional[str] = None) -> IngestPrepData: 

475 """Process datasets to identify which ones can be ingested. 

476 

477 Parameters 

478 ---------- 

479 datasets : `FileDataset` 

480 Each positional argument is a struct containing information about 

481 a file to be ingested, including its path (either absolute or 

482 relative to the datastore root, if applicable), a complete 

483 `DatasetRef` (with ``dataset_id not None``), and optionally a 

484 formatter class or its fully-qualified string name. If a formatter 

485 is not provided, this method should populate that attribute with 

486 the formatter the datastore would use for `put`. Subclasses are 

487 also permitted to modify the path attribute (typically to put it 

488 in what the datastore considers its standard form). 

489 transfer : `str`, optional 

490 How (and whether) the dataset should be added to the datastore. 

491 See `ingest` for details of transfer modes. 

492 

493 Returns 

494 ------- 

495 data : `IngestPrepData` 

496 An instance of a subclass of `IngestPrepData`, used to pass 

497 arbitrary data from `_prepIngest` to `_finishIngest`. This should 

498 include only the datasets this datastore can actually ingest; 

499 others should be silently ignored (`Datastore.ingest` will inspect 

500 `IngestPrepData.refs` and raise `DatasetTypeNotSupportedError` if 

501 necessary). 

502 

503 Raises 

504 ------ 

505 NotImplementedError 

506 Raised if the datastore does not support the given transfer mode 

507 (including the case where ingest is not supported at all). 

508 FileNotFoundError 

509 Raised if one of the given files does not exist. 

510 FileExistsError 

511 Raised if transfer is not `None` but the (internal) location the 

512 file would be moved to is already occupied. 

513 

514 Notes 

515 ----- 

516 This method (along with `_finishIngest`) should be implemented by 

517 subclasses to provide ingest support instead of implementing `ingest` 

518 directly. 

519 

520 `_prepIngest` should not modify the data repository or given files in 

521 any way; all changes should be deferred to `_finishIngest`. 

522 

523 When possible, exceptions should be raised in `_prepIngest` instead of 

524 `_finishIngest`. `NotImplementedError` exceptions that indicate that 

525 the transfer mode is not supported must be raised by `_prepIngest` 

526 instead of `_finishIngest`. 

527 """ 

528 raise NotImplementedError(f"Datastore {self} does not support direct file-based ingest.") 

529 

530 def _finishIngest(self, prepData: IngestPrepData, *, transfer: Optional[str] = None) -> None: 

531 """Complete an ingest operation. 

532 

533 Parameters 

534 ---------- 

535 data : `IngestPrepData` 

536 An instance of a subclass of `IngestPrepData`. Guaranteed to be 

537 the direct result of a call to `_prepIngest` on this datastore. 

538 transfer : `str`, optional 

539 How (and whether) the dataset should be added to the datastore. 

540 See `ingest` for details of transfer modes. 

541 

542 Raises 

543 ------ 

544 FileNotFoundError 

545 Raised if one of the given files does not exist. 

546 FileExistsError 

547 Raised if transfer is not `None` but the (internal) location the 

548 file would be moved to is already occupied. 

549 

550 Notes 

551 ----- 

552 This method (along with `_prepIngest`) should be implemented by 

553 subclasses to provide ingest support instead of implementing `ingest` 

554 directly. 

555 """ 

556 raise NotImplementedError(f"Datastore {self} does not support direct file-based ingest.") 

557 

558 def ingest(self, *datasets: FileDataset, transfer: Optional[str] = None) -> None: 

559 """Ingest one or more files into the datastore. 

560 

561 Parameters 

562 ---------- 

563 datasets : `FileDataset` 

564 Each positional argument is a struct containing information about 

565 a file to be ingested, including its path (either absolute or 

566 relative to the datastore root, if applicable), a complete 

567 `DatasetRef` (with ``dataset_id not None``), and optionally a 

568 formatter class or its fully-qualified string name. If a formatter 

569 is not provided, the one the datastore would use for ``put`` on 

570 that dataset is assumed. 

571 transfer : `str`, optional 

572 How (and whether) the dataset should be added to the datastore. 

573 If `None` (default), the file must already be in a location 

574 appropriate for the datastore (e.g. within its root directory), 

575 and will not be modified. Other choices include "move", "copy", 

576 "link", "symlink", "relsymlink", and "hardlink". "link" is a 

577 special transfer mode that will first try to make a hardlink and 

578 if that fails a symlink will be used instead. "relsymlink" creates 

579 a relative symlink rather than use an absolute path. 

580 Most datastores do not support all transfer modes. 

581 "auto" is a special option that will let the 

582 data store choose the most natural option for itself. 

583 

584 Raises 

585 ------ 

586 NotImplementedError 

587 Raised if the datastore does not support the given transfer mode 

588 (including the case where ingest is not supported at all). 

589 DatasetTypeNotSupportedError 

590 Raised if one or more files to be ingested have a dataset type that 

591 is not supported by the datastore. 

592 FileNotFoundError 

593 Raised if one of the given files does not exist. 

594 FileExistsError 

595 Raised if transfer is not `None` but the (internal) location the 

596 file would be moved to is already occupied. 

597 

598 Notes 

599 ----- 

600 Subclasses should implement `_prepIngest` and `_finishIngest` instead 

601 of implementing `ingest` directly. Datastores that hold and 

602 delegate to child datastores may want to call those methods as well. 

603 

604 Subclasses are encouraged to document their supported transfer modes 

605 in their class documentation. 

606 """ 

607 # Allow a datastore to select a default transfer mode 

608 transfer = self._overrideTransferMode(*datasets, transfer=transfer) 

609 prepData = self._prepIngest(*datasets, transfer=transfer) 

610 refs = {ref.id: ref for dataset in datasets for ref in dataset.refs} 

611 if None in refs: 

612 # Find the file for the error message. There may be multiple 

613 # bad refs so look for all of them. 

614 unresolved_paths = {} 

615 for dataset in datasets: 

616 unresolved = [] 

617 for ref in dataset.refs: 

618 if ref.id is None: 

619 unresolved.append(ref) 

620 if unresolved: 

621 unresolved_paths[dataset.path] = unresolved 

622 raise RuntimeError( 

623 "Attempt to ingest unresolved DatasetRef from: " 

624 + ",".join(f"{p}: ({[str(r) for r in ref]})" for p, ref in unresolved_paths.items()) 

625 ) 

626 if refs.keys() != prepData.refs.keys(): 

627 unsupported = refs.keys() - prepData.refs.keys() 

628 # Group unsupported refs by DatasetType for an informative 

629 # but still concise error message. 

630 byDatasetType = defaultdict(list) 

631 for datasetId in unsupported: 

632 ref = refs[datasetId] 

633 byDatasetType[ref.datasetType].append(ref) 

634 raise DatasetTypeNotSupportedError( 

635 "DatasetType(s) not supported in ingest: " 

636 + ", ".join(f"{k.name} ({len(v)} dataset(s))" for k, v in byDatasetType.items()) 

637 ) 

638 self._finishIngest(prepData, transfer=transfer) 

639 

640 def transfer_from( 

641 self, 

642 source_datastore: Datastore, 

643 refs: Iterable[DatasetRef], 

644 local_refs: Optional[Iterable[DatasetRef]] = None, 

645 transfer: str = "auto", 

646 artifact_existence: Optional[Dict[ResourcePath, bool]] = None, 

647 ) -> None: 

648 """Transfer dataset artifacts from another datastore to this one. 

649 

650 Parameters 

651 ---------- 

652 source_datastore : `Datastore` 

653 The datastore from which to transfer artifacts. That datastore 

654 must be compatible with this datastore receiving the artifacts. 

655 refs : iterable of `DatasetRef` 

656 The datasets to transfer from the source datastore. 

657 local_refs : iterable of `DatasetRef`, optional 

658 The dataset refs associated with the registry associated with 

659 this datastore. Can be `None` if the source and target datastore 

660 are using UUIDs. 

661 transfer : `str`, optional 

662 How (and whether) the dataset should be added to the datastore. 

663 Choices include "move", "copy", 

664 "link", "symlink", "relsymlink", and "hardlink". "link" is a 

665 special transfer mode that will first try to make a hardlink and 

666 if that fails a symlink will be used instead. "relsymlink" creates 

667 a relative symlink rather than use an absolute path. 

668 Most datastores do not support all transfer modes. 

669 "auto" (the default) is a special option that will let the 

670 data store choose the most natural option for itself. 

671 If the source location and transfer location are identical the 

672 transfer mode will be ignored. 

673 artifact_existence : `dict` [`lsst.resources.ResourcePath`, `bool`] 

674 Optional mapping of datastore artifact to existence. Updated by 

675 this method with details of all artifacts tested. Can be `None` 

676 if the caller is not interested. 

677 

678 Raises 

679 ------ 

680 TypeError 

681 Raised if the two datastores are not compatible. 

682 """ 

683 if type(self) is not type(source_datastore): 

684 raise TypeError( 

685 f"Datastore mismatch between this datastore ({type(self)}) and the " 

686 f"source datastore ({type(source_datastore)})." 

687 ) 

688 

689 raise NotImplementedError(f"Datastore {type(self)} must implement a transfer_from method.") 

690 

691 @abstractmethod 

692 def getURIs( 

693 self, datasetRef: DatasetRef, predict: bool = False 

694 ) -> Tuple[Optional[ResourcePath], Dict[str, ResourcePath]]: 

695 """Return URIs associated with dataset. 

696 

697 Parameters 

698 ---------- 

699 ref : `DatasetRef` 

700 Reference to the required dataset. 

701 predict : `bool`, optional 

702 If the datastore does not know about the dataset, should it 

703 return a predicted URI or not? 

704 

705 Returns 

706 ------- 

707 primary : `lsst.resources.ResourcePath` 

708 The URI to the primary artifact associated with this dataset. 

709 If the dataset was disassembled within the datastore this 

710 may be `None`. 

711 components : `dict` 

712 URIs to any components associated with the dataset artifact. 

713 Can be empty if there are no components. 

714 """ 

715 raise NotImplementedError() 

716 

717 @abstractmethod 

718 def getURI(self, datasetRef: DatasetRef, predict: bool = False) -> ResourcePath: 

719 """URI to the Dataset. 

720 

721 Parameters 

722 ---------- 

723 datasetRef : `DatasetRef` 

724 Reference to the required Dataset. 

725 predict : `bool` 

726 If `True` attempt to predict the URI for a dataset if it does 

727 not exist in datastore. 

728 

729 Returns 

730 ------- 

731 uri : `str` 

732 URI string pointing to the Dataset within the datastore. If the 

733 Dataset does not exist in the datastore, the URI may be a guess. 

734 If the datastore does not have entities that relate well 

735 to the concept of a URI the returned URI string will be 

736 descriptive. The returned URI is not guaranteed to be obtainable. 

737 

738 Raises 

739 ------ 

740 FileNotFoundError 

741 A URI has been requested for a dataset that does not exist and 

742 guessing is not allowed. 

743 """ 

744 raise NotImplementedError("Must be implemented by subclass") 

745 

746 @abstractmethod 

747 def retrieveArtifacts( 

748 self, 

749 refs: Iterable[DatasetRef], 

750 destination: ResourcePath, 

751 transfer: str = "auto", 

752 preserve_path: bool = True, 

753 overwrite: bool = False, 

754 ) -> List[ResourcePath]: 

755 """Retrieve the artifacts associated with the supplied refs. 

756 

757 Parameters 

758 ---------- 

759 refs : iterable of `DatasetRef` 

760 The datasets for which artifacts are to be retrieved. 

761 A single ref can result in multiple artifacts. The refs must 

762 be resolved. 

763 destination : `lsst.resources.ResourcePath` 

764 Location to write the artifacts. 

765 transfer : `str`, optional 

766 Method to use to transfer the artifacts. Must be one of the options 

767 supported by `lsst.resources.ResourcePath.transfer_from()`. 

768 "move" is not allowed. 

769 preserve_path : `bool`, optional 

770 If `True` the full path of the artifact within the datastore 

771 is preserved. If `False` the final file component of the path 

772 is used. 

773 overwrite : `bool`, optional 

774 If `True` allow transfers to overwrite existing files at the 

775 destination. 

776 

777 Returns 

778 ------- 

779 targets : `list` of `lsst.resources.ResourcePath` 

780 URIs of file artifacts in destination location. Order is not 

781 preserved. 

782 

783 Notes 

784 ----- 

785 For non-file datastores the artifacts written to the destination 

786 may not match the representation inside the datastore. For example 

787 a hierarchichal data structure in a NoSQL database may well be stored 

788 as a JSON file. 

789 """ 

790 raise NotImplementedError() 

791 

792 @abstractmethod 

793 def remove(self, datasetRef: DatasetRef) -> None: 

794 """Indicate to the Datastore that a Dataset can be removed. 

795 

796 Parameters 

797 ---------- 

798 datasetRef : `DatasetRef` 

799 Reference to the required Dataset. 

800 

801 Raises 

802 ------ 

803 FileNotFoundError 

804 When Dataset does not exist. 

805 

806 Notes 

807 ----- 

808 Some Datastores may implement this method as a silent no-op to 

809 disable Dataset deletion through standard interfaces. 

810 """ 

811 raise NotImplementedError("Must be implemented by subclass") 

812 

813 @abstractmethod 

814 def forget(self, refs: Iterable[DatasetRef]) -> None: 

815 """Indicate to the Datastore that it should remove all records of the 

816 given datasets, without actually deleting them. 

817 

818 Parameters 

819 ---------- 

820 refs : `Iterable` [ `DatasetRef` ] 

821 References to the datasets being forgotten. 

822 

823 Notes 

824 ----- 

825 Asking a datastore to forget a `DatasetRef` it does not hold should be 

826 a silent no-op, not an error. 

827 """ 

828 raise NotImplementedError("Must be implemented by subclass") 

829 

830 @abstractmethod 

831 def trash(self, ref: Union[DatasetRef, Iterable[DatasetRef]], ignore_errors: bool = True) -> None: 

832 """Indicate to the Datastore that a Dataset can be moved to the trash. 

833 

834 Parameters 

835 ---------- 

836 ref : `DatasetRef` or iterable thereof 

837 Reference(s) to the required Dataset. 

838 ignore_errors : `bool`, optional 

839 Determine whether errors should be ignored. When multiple 

840 refs are being trashed there will be no per-ref check. 

841 

842 Raises 

843 ------ 

844 FileNotFoundError 

845 When Dataset does not exist and errors are not ignored. Only 

846 checked if a single ref is supplied (and not in a list). 

847 

848 Notes 

849 ----- 

850 Some Datastores may implement this method as a silent no-op to 

851 disable Dataset deletion through standard interfaces. 

852 """ 

853 raise NotImplementedError("Must be implemented by subclass") 

854 

855 @abstractmethod 

856 def emptyTrash(self, ignore_errors: bool = True) -> None: 

857 """Remove all datasets from the trash. 

858 

859 Parameters 

860 ---------- 

861 ignore_errors : `bool`, optional 

862 Determine whether errors should be ignored. 

863 

864 Notes 

865 ----- 

866 Some Datastores may implement this method as a silent no-op to 

867 disable Dataset deletion through standard interfaces. 

868 """ 

869 raise NotImplementedError("Must be implemented by subclass") 

870 

871 @abstractmethod 

872 def transfer(self, inputDatastore: Datastore, datasetRef: DatasetRef) -> None: 

873 """Transfer a dataset from another datastore to this datastore. 

874 

875 Parameters 

876 ---------- 

877 inputDatastore : `Datastore` 

878 The external `Datastore` from which to retrieve the Dataset. 

879 datasetRef : `DatasetRef` 

880 Reference to the required Dataset. 

881 """ 

882 raise NotImplementedError("Must be implemented by subclass") 

883 

884 def export( 

885 self, refs: Iterable[DatasetRef], *, directory: Optional[str] = None, transfer: Optional[str] = None 

886 ) -> Iterable[FileDataset]: 

887 """Export datasets for transfer to another data repository. 

888 

889 Parameters 

890 ---------- 

891 refs : iterable of `DatasetRef` 

892 Dataset references to be exported. 

893 directory : `str`, optional 

894 Path to a directory that should contain files corresponding to 

895 output datasets. Ignored if ``transfer`` is `None`. 

896 transfer : `str`, optional 

897 Mode that should be used to move datasets out of the repository. 

898 Valid options are the same as those of the ``transfer`` argument 

899 to ``ingest``, and datastores may similarly signal that a transfer 

900 mode is not supported by raising `NotImplementedError`. 

901 

902 Returns 

903 ------- 

904 dataset : iterable of `DatasetTransfer` 

905 Structs containing information about the exported datasets, in the 

906 same order as ``refs``. 

907 

908 Raises 

909 ------ 

910 NotImplementedError 

911 Raised if the given transfer mode is not supported. 

912 """ 

913 raise NotImplementedError(f"Transfer mode {transfer} not supported.") 

914 

915 @abstractmethod 

916 def validateConfiguration( 

917 self, entities: Iterable[Union[DatasetRef, DatasetType, StorageClass]], logFailures: bool = False 

918 ) -> None: 

919 """Validate some of the configuration for this datastore. 

920 

921 Parameters 

922 ---------- 

923 entities : iterable of `DatasetRef`, `DatasetType`, or `StorageClass` 

924 Entities to test against this configuration. Can be differing 

925 types. 

926 logFailures : `bool`, optional 

927 If `True`, output a log message for every validation error 

928 detected. 

929 

930 Raises 

931 ------ 

932 DatastoreValidationError 

933 Raised if there is a validation problem with a configuration. 

934 

935 Notes 

936 ----- 

937 Which parts of the configuration are validated is at the discretion 

938 of each Datastore implementation. 

939 """ 

940 raise NotImplementedError("Must be implemented by subclass") 

941 

942 @abstractmethod 

943 def validateKey(self, lookupKey: LookupKey, entity: Union[DatasetRef, DatasetType, StorageClass]) -> None: 

944 """Validate a specific look up key with supplied entity. 

945 

946 Parameters 

947 ---------- 

948 lookupKey : `LookupKey` 

949 Key to use to retrieve information from the datastore 

950 configuration. 

951 entity : `DatasetRef`, `DatasetType`, or `StorageClass` 

952 Entity to compare with configuration retrieved using the 

953 specified lookup key. 

954 

955 Raises 

956 ------ 

957 DatastoreValidationError 

958 Raised if there is a problem with the combination of entity 

959 and lookup key. 

960 

961 Notes 

962 ----- 

963 Bypasses the normal selection priorities by allowing a key that 

964 would normally not be selected to be validated. 

965 """ 

966 raise NotImplementedError("Must be implemented by subclass") 

967 

968 @abstractmethod 

969 def getLookupKeys(self) -> Set[LookupKey]: 

970 """Return all the lookup keys relevant to this datastore. 

971 

972 Returns 

973 ------- 

974 keys : `set` of `LookupKey` 

975 The keys stored internally for looking up information based 

976 on `DatasetType` name or `StorageClass`. 

977 """ 

978 raise NotImplementedError("Must be implemented by subclass") 

979 

980 def needs_expanded_data_ids( 

981 self, 

982 transfer: Optional[str], 

983 entity: Optional[Union[DatasetRef, DatasetType, StorageClass]] = None, 

984 ) -> bool: 

985 """Test whether this datastore needs expanded data IDs to ingest. 

986 

987 Parameters 

988 ---------- 

989 transfer : `str` or `None` 

990 Transfer mode for ingest. 

991 entity, optional 

992 Object representing what will be ingested. If not provided (or not 

993 specific enough), `True` may be returned even if expanded data 

994 IDs aren't necessary. 

995 

996 Returns 

997 ------- 

998 needed : `bool` 

999 If `True`, expanded data IDs may be needed. `False` only if 

1000 expansion definitely isn't necessary. 

1001 """ 

1002 return True