Coverage for python/lsst/daf/butler/core/datastore.py: 47%

216 statements  

« prev     ^ index     » next       coverage.py v6.4.1, created at 2022-06-17 02:08 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Support for generic data stores.""" 

23 

24from __future__ import annotations 

25 

26__all__ = ("DatastoreConfig", "Datastore", "DatastoreValidationError") 

27 

28import contextlib 

29import dataclasses 

30import logging 

31from abc import ABCMeta, abstractmethod 

32from collections import defaultdict 

33from typing import ( 

34 TYPE_CHECKING, 

35 Any, 

36 Callable, 

37 ClassVar, 

38 Dict, 

39 Iterable, 

40 Iterator, 

41 List, 

42 Mapping, 

43 Optional, 

44 Set, 

45 Tuple, 

46 Type, 

47 Union, 

48) 

49 

50from lsst.utils import doImportType 

51 

52from .config import Config, ConfigSubset 

53from .constraints import Constraints 

54from .exceptions import DatasetTypeNotSupportedError, ValidationError 

55from .fileDataset import FileDataset 

56from .storageClass import StorageClassFactory 

57 

58if TYPE_CHECKING: 58 ↛ 59line 58 didn't jump to line 59, because the condition on line 58 was never true

59 from lsst.resources import ResourcePath, ResourcePathExpression 

60 

61 from ..registry.interfaces import DatasetIdRef, DatastoreRegistryBridgeManager 

62 from .configSupport import LookupKey 

63 from .datasets import DatasetRef, DatasetType 

64 from .datastoreRecordData import DatastoreRecordData 

65 from .storageClass import StorageClass 

66 

67 

68class DatastoreConfig(ConfigSubset): 

69 """Configuration for Datastores.""" 

70 

71 component = "datastore" 

72 requiredKeys = ("cls",) 

73 defaultConfigFile = "datastore.yaml" 

74 

75 

76class DatastoreValidationError(ValidationError): 

77 """There is a problem with the Datastore configuration.""" 

78 

79 pass 

80 

81 

82@dataclasses.dataclass(frozen=True) 

83class Event: 

84 __slots__ = {"name", "undoFunc", "args", "kwargs"} 

85 name: str 

86 undoFunc: Callable 

87 args: tuple 

88 kwargs: dict 

89 

90 

91class IngestPrepData: 

92 """A helper base class for `Datastore` ingest implementations. 

93 

94 Datastore implementations will generally need a custom implementation of 

95 this class. 

96 

97 Should be accessed as ``Datastore.IngestPrepData`` instead of via direct 

98 import. 

99 

100 Parameters 

101 ---------- 

102 refs : iterable of `DatasetRef` 

103 References for the datasets that can be ingested by this datastore. 

104 """ 

105 

106 def __init__(self, refs: Iterable[DatasetRef]): 

107 self.refs = {ref.id: ref for ref in refs} 

108 

109 

110class DatastoreTransaction: 

111 """Keeps a log of `Datastore` activity and allow rollback. 

112 

113 Parameters 

114 ---------- 

115 parent : `DatastoreTransaction`, optional 

116 The parent transaction (if any) 

117 """ 

118 

119 Event: ClassVar[Type] = Event 

120 

121 parent: Optional[DatastoreTransaction] 

122 """The parent transaction. (`DatastoreTransaction`, optional)""" 

123 

124 def __init__(self, parent: Optional[DatastoreTransaction] = None): 

125 self.parent = parent 

126 self._log: List[Event] = [] 

127 

128 def registerUndo(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> None: 

129 """Register event with undo function. 

130 

131 Parameters 

132 ---------- 

133 name : `str` 

134 Name of the event. 

135 undoFunc : func 

136 Function to undo this event. 

137 args : `tuple` 

138 Positional arguments to `undoFunc`. 

139 **kwargs 

140 Keyword arguments to `undoFunc`. 

141 """ 

142 self._log.append(self.Event(name, undoFunc, args, kwargs)) 

143 

144 @contextlib.contextmanager 

145 def undoWith(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> Iterator[None]: 

146 """Register undo function if nested operation succeeds. 

147 

148 Calls `registerUndo`. 

149 

150 This can be used to wrap individual undo-able statements within a 

151 DatastoreTransaction block. Multiple statements that can fail 

152 separately should not be part of the same `undoWith` block. 

153 

154 All arguments are forwarded directly to `registerUndo`. 

155 """ 

156 try: 

157 yield None 

158 except BaseException: 

159 raise 

160 else: 

161 self.registerUndo(name, undoFunc, *args, **kwargs) 

162 

163 def rollback(self) -> None: 

164 """Roll back all events in this transaction.""" 

165 log = logging.getLogger(__name__) 

166 while self._log: 

167 ev = self._log.pop() 

168 try: 

169 log.debug( 

170 "Rolling back transaction: %s: %s(%s,%s)", 

171 ev.name, 

172 ev.undoFunc, 

173 ",".join(str(a) for a in ev.args), 

174 ",".join(f"{k}={v}" for k, v in ev.kwargs.items()), 

175 ) 

176 except Exception: 

177 # In case we had a problem in stringification of arguments 

178 log.warning("Rolling back transaction: %s", ev.name) 

179 try: 

180 ev.undoFunc(*ev.args, **ev.kwargs) 

181 except BaseException as e: 

182 # Deliberately swallow error that may occur in unrolling 

183 log.warning("Exception: %s caught while unrolling: %s", e, ev.name) 

184 pass 

185 

186 def commit(self) -> None: 

187 """Commit this transaction.""" 

188 if self.parent is None: 

189 # Just forget about the events, they have already happened. 

190 return 

191 else: 

192 # We may still want to events from this transaction as part of 

193 # the parent. 

194 self.parent._log.extend(self._log) 

195 

196 

197class Datastore(metaclass=ABCMeta): 

198 """Datastore interface. 

199 

200 Parameters 

201 ---------- 

202 config : `DatastoreConfig` or `str` 

203 Load configuration either from an existing config instance or by 

204 referring to a configuration file. 

205 bridgeManager : `DatastoreRegistryBridgeManager` 

206 Object that manages the interface between `Registry` and datastores. 

207 butlerRoot : `str`, optional 

208 New datastore root to use to override the configuration value. 

209 """ 

210 

211 defaultConfigFile: ClassVar[Optional[str]] = None 

212 """Path to configuration defaults. Accessed within the ``config`` resource 

213 or relative to a search path. Can be None if no defaults specified. 

214 """ 

215 

216 containerKey: ClassVar[Optional[str]] = None 

217 """Name of the key containing a list of subconfigurations that also 

218 need to be merged with defaults and will likely use different Python 

219 datastore classes (but all using DatastoreConfig). Assumed to be a 

220 list of configurations that can be represented in a DatastoreConfig 

221 and containing a "cls" definition. None indicates that no containers 

222 are expected in this Datastore.""" 

223 

224 isEphemeral: bool = False 

225 """Indicate whether this Datastore is ephemeral or not. An ephemeral 

226 datastore is one where the contents of the datastore will not exist 

227 across process restarts. This value can change per-instance.""" 

228 

229 config: DatastoreConfig 

230 """Configuration used to create Datastore.""" 

231 

232 name: str 

233 """Label associated with this Datastore.""" 

234 

235 storageClassFactory: StorageClassFactory 

236 """Factory for creating storage class instances from name.""" 

237 

238 constraints: Constraints 

239 """Constraints to apply when putting datasets into the datastore.""" 

240 

241 # MyPy does not like for this to be annotated as any kind of type, because 

242 # it can't do static checking on type variables that can change at runtime. 

243 IngestPrepData: ClassVar[Any] = IngestPrepData 

244 """Helper base class for ingest implementations. 

245 """ 

246 

247 @classmethod 

248 @abstractmethod 

249 def setConfigRoot(cls, root: str, config: Config, full: Config, overwrite: bool = True) -> None: 

250 """Set filesystem-dependent config options for this datastore. 

251 

252 The options will be appropriate for a new empty repository with the 

253 given root. 

254 

255 Parameters 

256 ---------- 

257 root : `str` 

258 Filesystem path to the root of the data repository. 

259 config : `Config` 

260 A `Config` to update. Only the subset understood by 

261 this component will be updated. Will not expand 

262 defaults. 

263 full : `Config` 

264 A complete config with all defaults expanded that can be 

265 converted to a `DatastoreConfig`. Read-only and will not be 

266 modified by this method. 

267 Repository-specific options that should not be obtained 

268 from defaults when Butler instances are constructed 

269 should be copied from ``full`` to ``config``. 

270 overwrite : `bool`, optional 

271 If `False`, do not modify a value in ``config`` if the value 

272 already exists. Default is always to overwrite with the provided 

273 ``root``. 

274 

275 Notes 

276 ----- 

277 If a keyword is explicitly defined in the supplied ``config`` it 

278 will not be overridden by this method if ``overwrite`` is `False`. 

279 This allows explicit values set in external configs to be retained. 

280 """ 

281 raise NotImplementedError() 

282 

283 @staticmethod 

284 def fromConfig( 

285 config: Config, 

286 bridgeManager: DatastoreRegistryBridgeManager, 

287 butlerRoot: Optional[ResourcePathExpression] = None, 

288 ) -> "Datastore": 

289 """Create datastore from type specified in config file. 

290 

291 Parameters 

292 ---------- 

293 config : `Config` 

294 Configuration instance. 

295 bridgeManager : `DatastoreRegistryBridgeManager` 

296 Object that manages the interface between `Registry` and 

297 datastores. 

298 butlerRoot : `str`, optional 

299 Butler root directory. 

300 """ 

301 cls = doImportType(config["datastore", "cls"]) 

302 if not issubclass(cls, Datastore): 

303 raise TypeError(f"Imported child class {config['datastore', 'cls']} is not a Datastore") 

304 return cls(config=config, bridgeManager=bridgeManager, butlerRoot=butlerRoot) 

305 

306 def __init__( 

307 self, 

308 config: Union[Config, str], 

309 bridgeManager: DatastoreRegistryBridgeManager, 

310 butlerRoot: Optional[ResourcePathExpression] = None, 

311 ): 

312 self.config = DatastoreConfig(config) 

313 self.name = "ABCDataStore" 

314 self._transaction: Optional[DatastoreTransaction] = None 

315 

316 # All Datastores need storage classes and constraints 

317 self.storageClassFactory = StorageClassFactory() 

318 

319 # And read the constraints list 

320 constraintsConfig = self.config.get("constraints") 

321 self.constraints = Constraints(constraintsConfig, universe=bridgeManager.universe) 

322 

323 def __str__(self) -> str: 

324 return self.name 

325 

326 def __repr__(self) -> str: 

327 return self.name 

328 

329 @property 

330 def names(self) -> Tuple[str, ...]: 

331 """Names associated with this datastore returned as a list. 

332 

333 Can be different to ``name`` for a chaining datastore. 

334 """ 

335 # Default implementation returns solely the name itself 

336 return (self.name,) 

337 

338 @contextlib.contextmanager 

339 def transaction(self) -> Iterator[DatastoreTransaction]: 

340 """Context manager supporting `Datastore` transactions. 

341 

342 Transactions can be nested, and are to be used in combination with 

343 `Registry.transaction`. 

344 """ 

345 self._transaction = DatastoreTransaction(self._transaction) 

346 try: 

347 yield self._transaction 

348 except BaseException: 

349 self._transaction.rollback() 

350 raise 

351 else: 

352 self._transaction.commit() 

353 self._transaction = self._transaction.parent 

354 

355 @abstractmethod 

356 def knows(self, ref: DatasetRef) -> bool: 

357 """Check if the dataset is known to the datastore. 

358 

359 Does not check for existence of any artifact. 

360 

361 Parameters 

362 ---------- 

363 ref : `DatasetRef` 

364 Reference to the required dataset. 

365 

366 Returns 

367 ------- 

368 exists : `bool` 

369 `True` if the dataset is known to the datastore. 

370 """ 

371 raise NotImplementedError() 

372 

373 def mexists( 

374 self, refs: Iterable[DatasetRef], artifact_existence: Optional[Dict[ResourcePath, bool]] = None 

375 ) -> Dict[DatasetRef, bool]: 

376 """Check the existence of multiple datasets at once. 

377 

378 Parameters 

379 ---------- 

380 refs : iterable of `DatasetRef` 

381 The datasets to be checked. 

382 artifact_existence : `dict` [`lsst.resources.ResourcePath`, `bool`] 

383 Optional mapping of datastore artifact to existence. Updated by 

384 this method with details of all artifacts tested. Can be `None` 

385 if the caller is not interested. 

386 

387 Returns 

388 ------- 

389 existence : `dict` of [`DatasetRef`, `bool`] 

390 Mapping from dataset to boolean indicating existence. 

391 """ 

392 existence: Dict[DatasetRef, bool] = {} 

393 # Non-optimized default. 

394 for ref in refs: 

395 existence[ref] = self.exists(ref) 

396 return existence 

397 

398 @abstractmethod 

399 def exists(self, datasetRef: DatasetRef) -> bool: 

400 """Check if the dataset exists in the datastore. 

401 

402 Parameters 

403 ---------- 

404 datasetRef : `DatasetRef` 

405 Reference to the required dataset. 

406 

407 Returns 

408 ------- 

409 exists : `bool` 

410 `True` if the entity exists in the `Datastore`. 

411 """ 

412 raise NotImplementedError("Must be implemented by subclass") 

413 

414 @abstractmethod 

415 def get(self, datasetRef: DatasetRef, parameters: Mapping[str, Any] = None) -> Any: 

416 """Load an `InMemoryDataset` from the store. 

417 

418 Parameters 

419 ---------- 

420 datasetRef : `DatasetRef` 

421 Reference to the required Dataset. 

422 parameters : `dict` 

423 `StorageClass`-specific parameters that specify a slice of the 

424 Dataset to be loaded. 

425 

426 Returns 

427 ------- 

428 inMemoryDataset : `object` 

429 Requested Dataset or slice thereof as an InMemoryDataset. 

430 """ 

431 raise NotImplementedError("Must be implemented by subclass") 

432 

433 @abstractmethod 

434 def put(self, inMemoryDataset: Any, datasetRef: DatasetRef) -> None: 

435 """Write a `InMemoryDataset` with a given `DatasetRef` to the store. 

436 

437 Parameters 

438 ---------- 

439 inMemoryDataset : `object` 

440 The Dataset to store. 

441 datasetRef : `DatasetRef` 

442 Reference to the associated Dataset. 

443 """ 

444 raise NotImplementedError("Must be implemented by subclass") 

445 

446 def _overrideTransferMode(self, *datasets: FileDataset, transfer: Optional[str] = None) -> Optional[str]: 

447 """Allow ingest transfer mode to be defaulted based on datasets. 

448 

449 Parameters 

450 ---------- 

451 datasets : `FileDataset` 

452 Each positional argument is a struct containing information about 

453 a file to be ingested, including its path (either absolute or 

454 relative to the datastore root, if applicable), a complete 

455 `DatasetRef` (with ``dataset_id not None``), and optionally a 

456 formatter class or its fully-qualified string name. If a formatter 

457 is not provided, this method should populate that attribute with 

458 the formatter the datastore would use for `put`. Subclasses are 

459 also permitted to modify the path attribute (typically to put it 

460 in what the datastore considers its standard form). 

461 transfer : `str`, optional 

462 How (and whether) the dataset should be added to the datastore. 

463 See `ingest` for details of transfer modes. 

464 

465 Returns 

466 ------- 

467 newTransfer : `str` 

468 Transfer mode to use. Will be identical to the supplied transfer 

469 mode unless "auto" is used. 

470 """ 

471 if transfer != "auto": 

472 return transfer 

473 raise RuntimeError(f"{transfer} is not allowed without specialization.") 

474 

475 def _prepIngest(self, *datasets: FileDataset, transfer: Optional[str] = None) -> IngestPrepData: 

476 """Process datasets to identify which ones can be ingested. 

477 

478 Parameters 

479 ---------- 

480 datasets : `FileDataset` 

481 Each positional argument is a struct containing information about 

482 a file to be ingested, including its path (either absolute or 

483 relative to the datastore root, if applicable), a complete 

484 `DatasetRef` (with ``dataset_id not None``), and optionally a 

485 formatter class or its fully-qualified string name. If a formatter 

486 is not provided, this method should populate that attribute with 

487 the formatter the datastore would use for `put`. Subclasses are 

488 also permitted to modify the path attribute (typically to put it 

489 in what the datastore considers its standard form). 

490 transfer : `str`, optional 

491 How (and whether) the dataset should be added to the datastore. 

492 See `ingest` for details of transfer modes. 

493 

494 Returns 

495 ------- 

496 data : `IngestPrepData` 

497 An instance of a subclass of `IngestPrepData`, used to pass 

498 arbitrary data from `_prepIngest` to `_finishIngest`. This should 

499 include only the datasets this datastore can actually ingest; 

500 others should be silently ignored (`Datastore.ingest` will inspect 

501 `IngestPrepData.refs` and raise `DatasetTypeNotSupportedError` if 

502 necessary). 

503 

504 Raises 

505 ------ 

506 NotImplementedError 

507 Raised if the datastore does not support the given transfer mode 

508 (including the case where ingest is not supported at all). 

509 FileNotFoundError 

510 Raised if one of the given files does not exist. 

511 FileExistsError 

512 Raised if transfer is not `None` but the (internal) location the 

513 file would be moved to is already occupied. 

514 

515 Notes 

516 ----- 

517 This method (along with `_finishIngest`) should be implemented by 

518 subclasses to provide ingest support instead of implementing `ingest` 

519 directly. 

520 

521 `_prepIngest` should not modify the data repository or given files in 

522 any way; all changes should be deferred to `_finishIngest`. 

523 

524 When possible, exceptions should be raised in `_prepIngest` instead of 

525 `_finishIngest`. `NotImplementedError` exceptions that indicate that 

526 the transfer mode is not supported must be raised by `_prepIngest` 

527 instead of `_finishIngest`. 

528 """ 

529 raise NotImplementedError(f"Datastore {self} does not support direct file-based ingest.") 

530 

531 def _finishIngest( 

532 self, prepData: IngestPrepData, *, transfer: Optional[str] = None, record_validation_info: bool = True 

533 ) -> None: 

534 """Complete an ingest operation. 

535 

536 Parameters 

537 ---------- 

538 data : `IngestPrepData` 

539 An instance of a subclass of `IngestPrepData`. Guaranteed to be 

540 the direct result of a call to `_prepIngest` on this datastore. 

541 transfer : `str`, optional 

542 How (and whether) the dataset should be added to the datastore. 

543 See `ingest` for details of transfer modes. 

544 record_validation_info : `bool`, optional 

545 If `True`, the default, the datastore can record validation 

546 information associated with the file. If `False` the datastore 

547 will not attempt to track any information such as checksums 

548 or file sizes. This can be useful if such information is tracked 

549 in an external system or if the file is to be compressed in place. 

550 It is up to the datastore whether this parameter is relevant. 

551 

552 Raises 

553 ------ 

554 FileNotFoundError 

555 Raised if one of the given files does not exist. 

556 FileExistsError 

557 Raised if transfer is not `None` but the (internal) location the 

558 file would be moved to is already occupied. 

559 

560 Notes 

561 ----- 

562 This method (along with `_prepIngest`) should be implemented by 

563 subclasses to provide ingest support instead of implementing `ingest` 

564 directly. 

565 """ 

566 raise NotImplementedError(f"Datastore {self} does not support direct file-based ingest.") 

567 

568 def ingest( 

569 self, *datasets: FileDataset, transfer: Optional[str] = None, record_validation_info: bool = True 

570 ) -> None: 

571 """Ingest one or more files into the datastore. 

572 

573 Parameters 

574 ---------- 

575 datasets : `FileDataset` 

576 Each positional argument is a struct containing information about 

577 a file to be ingested, including its path (either absolute or 

578 relative to the datastore root, if applicable), a complete 

579 `DatasetRef` (with ``dataset_id not None``), and optionally a 

580 formatter class or its fully-qualified string name. If a formatter 

581 is not provided, the one the datastore would use for ``put`` on 

582 that dataset is assumed. 

583 transfer : `str`, optional 

584 How (and whether) the dataset should be added to the datastore. 

585 If `None` (default), the file must already be in a location 

586 appropriate for the datastore (e.g. within its root directory), 

587 and will not be modified. Other choices include "move", "copy", 

588 "link", "symlink", "relsymlink", and "hardlink". "link" is a 

589 special transfer mode that will first try to make a hardlink and 

590 if that fails a symlink will be used instead. "relsymlink" creates 

591 a relative symlink rather than use an absolute path. 

592 Most datastores do not support all transfer modes. 

593 "auto" is a special option that will let the 

594 data store choose the most natural option for itself. 

595 record_validation_info : `bool`, optional 

596 If `True`, the default, the datastore can record validation 

597 information associated with the file. If `False` the datastore 

598 will not attempt to track any information such as checksums 

599 or file sizes. This can be useful if such information is tracked 

600 in an external system or if the file is to be compressed in place. 

601 It is up to the datastore whether this parameter is relevant. 

602 

603 Raises 

604 ------ 

605 NotImplementedError 

606 Raised if the datastore does not support the given transfer mode 

607 (including the case where ingest is not supported at all). 

608 DatasetTypeNotSupportedError 

609 Raised if one or more files to be ingested have a dataset type that 

610 is not supported by the datastore. 

611 FileNotFoundError 

612 Raised if one of the given files does not exist. 

613 FileExistsError 

614 Raised if transfer is not `None` but the (internal) location the 

615 file would be moved to is already occupied. 

616 

617 Notes 

618 ----- 

619 Subclasses should implement `_prepIngest` and `_finishIngest` instead 

620 of implementing `ingest` directly. Datastores that hold and 

621 delegate to child datastores may want to call those methods as well. 

622 

623 Subclasses are encouraged to document their supported transfer modes 

624 in their class documentation. 

625 """ 

626 # Allow a datastore to select a default transfer mode 

627 transfer = self._overrideTransferMode(*datasets, transfer=transfer) 

628 prepData = self._prepIngest(*datasets, transfer=transfer) 

629 refs = {ref.id: ref for dataset in datasets for ref in dataset.refs} 

630 if None in refs: 

631 # Find the file for the error message. There may be multiple 

632 # bad refs so look for all of them. 

633 unresolved_paths = {} 

634 for dataset in datasets: 

635 unresolved = [] 

636 for ref in dataset.refs: 

637 if ref.id is None: 

638 unresolved.append(ref) 

639 if unresolved: 

640 unresolved_paths[dataset.path] = unresolved 

641 raise RuntimeError( 

642 "Attempt to ingest unresolved DatasetRef from: " 

643 + ",".join(f"{p}: ({[str(r) for r in ref]})" for p, ref in unresolved_paths.items()) 

644 ) 

645 if refs.keys() != prepData.refs.keys(): 

646 unsupported = refs.keys() - prepData.refs.keys() 

647 # Group unsupported refs by DatasetType for an informative 

648 # but still concise error message. 

649 byDatasetType = defaultdict(list) 

650 for datasetId in unsupported: 

651 ref = refs[datasetId] 

652 byDatasetType[ref.datasetType].append(ref) 

653 raise DatasetTypeNotSupportedError( 

654 "DatasetType(s) not supported in ingest: " 

655 + ", ".join(f"{k.name} ({len(v)} dataset(s))" for k, v in byDatasetType.items()) 

656 ) 

657 self._finishIngest(prepData, transfer=transfer, record_validation_info=record_validation_info) 

658 

659 def transfer_from( 

660 self, 

661 source_datastore: Datastore, 

662 refs: Iterable[DatasetRef], 

663 local_refs: Optional[Iterable[DatasetRef]] = None, 

664 transfer: str = "auto", 

665 artifact_existence: Optional[Dict[ResourcePath, bool]] = None, 

666 ) -> None: 

667 """Transfer dataset artifacts from another datastore to this one. 

668 

669 Parameters 

670 ---------- 

671 source_datastore : `Datastore` 

672 The datastore from which to transfer artifacts. That datastore 

673 must be compatible with this datastore receiving the artifacts. 

674 refs : iterable of `DatasetRef` 

675 The datasets to transfer from the source datastore. 

676 local_refs : iterable of `DatasetRef`, optional 

677 The dataset refs associated with the registry associated with 

678 this datastore. Can be `None` if the source and target datastore 

679 are using UUIDs. 

680 transfer : `str`, optional 

681 How (and whether) the dataset should be added to the datastore. 

682 Choices include "move", "copy", 

683 "link", "symlink", "relsymlink", and "hardlink". "link" is a 

684 special transfer mode that will first try to make a hardlink and 

685 if that fails a symlink will be used instead. "relsymlink" creates 

686 a relative symlink rather than use an absolute path. 

687 Most datastores do not support all transfer modes. 

688 "auto" (the default) is a special option that will let the 

689 data store choose the most natural option for itself. 

690 If the source location and transfer location are identical the 

691 transfer mode will be ignored. 

692 artifact_existence : `dict` [`lsst.resources.ResourcePath`, `bool`] 

693 Optional mapping of datastore artifact to existence. Updated by 

694 this method with details of all artifacts tested. Can be `None` 

695 if the caller is not interested. 

696 

697 Raises 

698 ------ 

699 TypeError 

700 Raised if the two datastores are not compatible. 

701 """ 

702 if type(self) is not type(source_datastore): 

703 raise TypeError( 

704 f"Datastore mismatch between this datastore ({type(self)}) and the " 

705 f"source datastore ({type(source_datastore)})." 

706 ) 

707 

708 raise NotImplementedError(f"Datastore {type(self)} must implement a transfer_from method.") 

709 

710 @abstractmethod 

711 def getURIs( 

712 self, datasetRef: DatasetRef, predict: bool = False 

713 ) -> Tuple[Optional[ResourcePath], Dict[str, ResourcePath]]: 

714 """Return URIs associated with dataset. 

715 

716 Parameters 

717 ---------- 

718 ref : `DatasetRef` 

719 Reference to the required dataset. 

720 predict : `bool`, optional 

721 If the datastore does not know about the dataset, should it 

722 return a predicted URI or not? 

723 

724 Returns 

725 ------- 

726 primary : `lsst.resources.ResourcePath` 

727 The URI to the primary artifact associated with this dataset. 

728 If the dataset was disassembled within the datastore this 

729 may be `None`. 

730 components : `dict` 

731 URIs to any components associated with the dataset artifact. 

732 Can be empty if there are no components. 

733 """ 

734 raise NotImplementedError() 

735 

736 @abstractmethod 

737 def getURI(self, datasetRef: DatasetRef, predict: bool = False) -> ResourcePath: 

738 """URI to the Dataset. 

739 

740 Parameters 

741 ---------- 

742 datasetRef : `DatasetRef` 

743 Reference to the required Dataset. 

744 predict : `bool` 

745 If `True` attempt to predict the URI for a dataset if it does 

746 not exist in datastore. 

747 

748 Returns 

749 ------- 

750 uri : `str` 

751 URI string pointing to the Dataset within the datastore. If the 

752 Dataset does not exist in the datastore, the URI may be a guess. 

753 If the datastore does not have entities that relate well 

754 to the concept of a URI the returned URI string will be 

755 descriptive. The returned URI is not guaranteed to be obtainable. 

756 

757 Raises 

758 ------ 

759 FileNotFoundError 

760 A URI has been requested for a dataset that does not exist and 

761 guessing is not allowed. 

762 """ 

763 raise NotImplementedError("Must be implemented by subclass") 

764 

765 @abstractmethod 

766 def retrieveArtifacts( 

767 self, 

768 refs: Iterable[DatasetRef], 

769 destination: ResourcePath, 

770 transfer: str = "auto", 

771 preserve_path: bool = True, 

772 overwrite: bool = False, 

773 ) -> List[ResourcePath]: 

774 """Retrieve the artifacts associated with the supplied refs. 

775 

776 Parameters 

777 ---------- 

778 refs : iterable of `DatasetRef` 

779 The datasets for which artifacts are to be retrieved. 

780 A single ref can result in multiple artifacts. The refs must 

781 be resolved. 

782 destination : `lsst.resources.ResourcePath` 

783 Location to write the artifacts. 

784 transfer : `str`, optional 

785 Method to use to transfer the artifacts. Must be one of the options 

786 supported by `lsst.resources.ResourcePath.transfer_from()`. 

787 "move" is not allowed. 

788 preserve_path : `bool`, optional 

789 If `True` the full path of the artifact within the datastore 

790 is preserved. If `False` the final file component of the path 

791 is used. 

792 overwrite : `bool`, optional 

793 If `True` allow transfers to overwrite existing files at the 

794 destination. 

795 

796 Returns 

797 ------- 

798 targets : `list` of `lsst.resources.ResourcePath` 

799 URIs of file artifacts in destination location. Order is not 

800 preserved. 

801 

802 Notes 

803 ----- 

804 For non-file datastores the artifacts written to the destination 

805 may not match the representation inside the datastore. For example 

806 a hierarchichal data structure in a NoSQL database may well be stored 

807 as a JSON file. 

808 """ 

809 raise NotImplementedError() 

810 

811 @abstractmethod 

812 def remove(self, datasetRef: DatasetRef) -> None: 

813 """Indicate to the Datastore that a Dataset can be removed. 

814 

815 Parameters 

816 ---------- 

817 datasetRef : `DatasetRef` 

818 Reference to the required Dataset. 

819 

820 Raises 

821 ------ 

822 FileNotFoundError 

823 When Dataset does not exist. 

824 

825 Notes 

826 ----- 

827 Some Datastores may implement this method as a silent no-op to 

828 disable Dataset deletion through standard interfaces. 

829 """ 

830 raise NotImplementedError("Must be implemented by subclass") 

831 

832 @abstractmethod 

833 def forget(self, refs: Iterable[DatasetRef]) -> None: 

834 """Indicate to the Datastore that it should remove all records of the 

835 given datasets, without actually deleting them. 

836 

837 Parameters 

838 ---------- 

839 refs : `Iterable` [ `DatasetRef` ] 

840 References to the datasets being forgotten. 

841 

842 Notes 

843 ----- 

844 Asking a datastore to forget a `DatasetRef` it does not hold should be 

845 a silent no-op, not an error. 

846 """ 

847 raise NotImplementedError("Must be implemented by subclass") 

848 

849 @abstractmethod 

850 def trash(self, ref: Union[DatasetRef, Iterable[DatasetRef]], ignore_errors: bool = True) -> None: 

851 """Indicate to the Datastore that a Dataset can be moved to the trash. 

852 

853 Parameters 

854 ---------- 

855 ref : `DatasetRef` or iterable thereof 

856 Reference(s) to the required Dataset. 

857 ignore_errors : `bool`, optional 

858 Determine whether errors should be ignored. When multiple 

859 refs are being trashed there will be no per-ref check. 

860 

861 Raises 

862 ------ 

863 FileNotFoundError 

864 When Dataset does not exist and errors are not ignored. Only 

865 checked if a single ref is supplied (and not in a list). 

866 

867 Notes 

868 ----- 

869 Some Datastores may implement this method as a silent no-op to 

870 disable Dataset deletion through standard interfaces. 

871 """ 

872 raise NotImplementedError("Must be implemented by subclass") 

873 

874 @abstractmethod 

875 def emptyTrash(self, ignore_errors: bool = True) -> None: 

876 """Remove all datasets from the trash. 

877 

878 Parameters 

879 ---------- 

880 ignore_errors : `bool`, optional 

881 Determine whether errors should be ignored. 

882 

883 Notes 

884 ----- 

885 Some Datastores may implement this method as a silent no-op to 

886 disable Dataset deletion through standard interfaces. 

887 """ 

888 raise NotImplementedError("Must be implemented by subclass") 

889 

890 @abstractmethod 

891 def transfer(self, inputDatastore: Datastore, datasetRef: DatasetRef) -> None: 

892 """Transfer a dataset from another datastore to this datastore. 

893 

894 Parameters 

895 ---------- 

896 inputDatastore : `Datastore` 

897 The external `Datastore` from which to retrieve the Dataset. 

898 datasetRef : `DatasetRef` 

899 Reference to the required Dataset. 

900 """ 

901 raise NotImplementedError("Must be implemented by subclass") 

902 

903 def export( 

904 self, refs: Iterable[DatasetRef], *, directory: Optional[str] = None, transfer: Optional[str] = None 

905 ) -> Iterable[FileDataset]: 

906 """Export datasets for transfer to another data repository. 

907 

908 Parameters 

909 ---------- 

910 refs : iterable of `DatasetRef` 

911 Dataset references to be exported. 

912 directory : `str`, optional 

913 Path to a directory that should contain files corresponding to 

914 output datasets. Ignored if ``transfer`` is `None`. 

915 transfer : `str`, optional 

916 Mode that should be used to move datasets out of the repository. 

917 Valid options are the same as those of the ``transfer`` argument 

918 to ``ingest``, and datastores may similarly signal that a transfer 

919 mode is not supported by raising `NotImplementedError`. 

920 

921 Returns 

922 ------- 

923 dataset : iterable of `DatasetTransfer` 

924 Structs containing information about the exported datasets, in the 

925 same order as ``refs``. 

926 

927 Raises 

928 ------ 

929 NotImplementedError 

930 Raised if the given transfer mode is not supported. 

931 """ 

932 raise NotImplementedError(f"Transfer mode {transfer} not supported.") 

933 

934 @abstractmethod 

935 def validateConfiguration( 

936 self, entities: Iterable[Union[DatasetRef, DatasetType, StorageClass]], logFailures: bool = False 

937 ) -> None: 

938 """Validate some of the configuration for this datastore. 

939 

940 Parameters 

941 ---------- 

942 entities : iterable of `DatasetRef`, `DatasetType`, or `StorageClass` 

943 Entities to test against this configuration. Can be differing 

944 types. 

945 logFailures : `bool`, optional 

946 If `True`, output a log message for every validation error 

947 detected. 

948 

949 Raises 

950 ------ 

951 DatastoreValidationError 

952 Raised if there is a validation problem with a configuration. 

953 

954 Notes 

955 ----- 

956 Which parts of the configuration are validated is at the discretion 

957 of each Datastore implementation. 

958 """ 

959 raise NotImplementedError("Must be implemented by subclass") 

960 

961 @abstractmethod 

962 def validateKey(self, lookupKey: LookupKey, entity: Union[DatasetRef, DatasetType, StorageClass]) -> None: 

963 """Validate a specific look up key with supplied entity. 

964 

965 Parameters 

966 ---------- 

967 lookupKey : `LookupKey` 

968 Key to use to retrieve information from the datastore 

969 configuration. 

970 entity : `DatasetRef`, `DatasetType`, or `StorageClass` 

971 Entity to compare with configuration retrieved using the 

972 specified lookup key. 

973 

974 Raises 

975 ------ 

976 DatastoreValidationError 

977 Raised if there is a problem with the combination of entity 

978 and lookup key. 

979 

980 Notes 

981 ----- 

982 Bypasses the normal selection priorities by allowing a key that 

983 would normally not be selected to be validated. 

984 """ 

985 raise NotImplementedError("Must be implemented by subclass") 

986 

987 @abstractmethod 

988 def getLookupKeys(self) -> Set[LookupKey]: 

989 """Return all the lookup keys relevant to this datastore. 

990 

991 Returns 

992 ------- 

993 keys : `set` of `LookupKey` 

994 The keys stored internally for looking up information based 

995 on `DatasetType` name or `StorageClass`. 

996 """ 

997 raise NotImplementedError("Must be implemented by subclass") 

998 

999 def needs_expanded_data_ids( 

1000 self, 

1001 transfer: Optional[str], 

1002 entity: Optional[Union[DatasetRef, DatasetType, StorageClass]] = None, 

1003 ) -> bool: 

1004 """Test whether this datastore needs expanded data IDs to ingest. 

1005 

1006 Parameters 

1007 ---------- 

1008 transfer : `str` or `None` 

1009 Transfer mode for ingest. 

1010 entity, optional 

1011 Object representing what will be ingested. If not provided (or not 

1012 specific enough), `True` may be returned even if expanded data 

1013 IDs aren't necessary. 

1014 

1015 Returns 

1016 ------- 

1017 needed : `bool` 

1018 If `True`, expanded data IDs may be needed. `False` only if 

1019 expansion definitely isn't necessary. 

1020 """ 

1021 return True 

1022 

1023 @abstractmethod 

1024 def import_records( 

1025 self, 

1026 data: Mapping[str, DatastoreRecordData], 

1027 ) -> None: 

1028 """Import datastore location and record data from an in-memory data 

1029 structure. 

1030 

1031 Parameters 

1032 ---------- 

1033 data : `Mapping` [ `str`, `DatastoreRecordData` ] 

1034 Datastore records indexed by datastore name. May contain data for 

1035 other `Datastore` instances (generally because they are chained to 

1036 this one), which should be ignored. 

1037 

1038 Notes 

1039 ----- 

1040 Implementations should generally not check that any external resources 

1041 (e.g. files) referred to by these records actually exist, for 

1042 performance reasons; we expect higher-level code to guarantee that they 

1043 do. 

1044 

1045 Implementations are responsible for calling 

1046 `DatastoreRegistryBridge.insert` on all datasets in ``data.locations`` 

1047 where the key is in `names`, as well as loading any opaque table data. 

1048 """ 

1049 raise NotImplementedError() 

1050 

1051 @abstractmethod 

1052 def export_records( 

1053 self, 

1054 refs: Iterable[DatasetIdRef], 

1055 ) -> Mapping[str, DatastoreRecordData]: 

1056 """Export datastore records and locations to an in-memory data 

1057 structure. 

1058 

1059 Parameters 

1060 ---------- 

1061 refs : `Iterable` [ `DatasetIdRef` ] 

1062 Datasets to save. This may include datasets not known to this 

1063 datastore, which should be ignored. 

1064 

1065 Returns 

1066 ------- 

1067 data : `Mapping` [ `str`, `DatastoreRecordData` ] 

1068 Exported datastore records indexed by datastore name. 

1069 """ 

1070 raise NotImplementedError()