Coverage for python/lsst/daf/butler/core/datastore.py: 43%

198 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-12-01 19:55 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Support for generic data stores.""" 

23 

24from __future__ import annotations 

25 

26__all__ = ("DatastoreConfig", "Datastore", "DatastoreValidationError") 

27 

28import contextlib 

29import logging 

30from collections import defaultdict 

31from typing import ( 

32 TYPE_CHECKING, 

33 Any, 

34 Callable, 

35 ClassVar, 

36 Dict, 

37 Iterable, 

38 Iterator, 

39 List, 

40 Mapping, 

41 Optional, 

42 Set, 

43 Tuple, 

44 Type, 

45 Union, 

46) 

47 

48from dataclasses import dataclass 

49from abc import ABCMeta, abstractmethod 

50 

51from lsst.utils import doImport 

52from .config import ConfigSubset, Config 

53from .exceptions import ValidationError, DatasetTypeNotSupportedError 

54from .constraints import Constraints 

55from .storageClass import StorageClassFactory 

56from .fileDataset import FileDataset 

57 

58if TYPE_CHECKING: 58 ↛ 59line 58 didn't jump to line 59, because the condition on line 58 was never true

59 from ..registry.interfaces import DatastoreRegistryBridgeManager 

60 from .datasets import DatasetRef, DatasetType 

61 from .configSupport import LookupKey 

62 from .storageClass import StorageClass 

63 from ._butlerUri import ButlerURI 

64 

65 

66class DatastoreConfig(ConfigSubset): 

67 """Configuration for Datastores.""" 

68 

69 component = "datastore" 

70 requiredKeys = ("cls",) 

71 defaultConfigFile = "datastore.yaml" 

72 

73 

74class DatastoreValidationError(ValidationError): 

75 """There is a problem with the Datastore configuration.""" 

76 

77 pass 

78 

79 

80@dataclass(frozen=True) 

81class Event: 

82 __slots__ = {"name", "undoFunc", "args", "kwargs"} 

83 name: str 

84 undoFunc: Callable 

85 args: tuple 

86 kwargs: dict 

87 

88 

89class IngestPrepData: 

90 """A helper base class for `Datastore` ingest implementations. 

91 

92 Datastore implementations will generally need a custom implementation of 

93 this class. 

94 

95 Should be accessed as ``Datastore.IngestPrepData`` instead of via direct 

96 import. 

97 

98 Parameters 

99 ---------- 

100 refs : iterable of `DatasetRef` 

101 References for the datasets that can be ingested by this datastore. 

102 """ 

103 

104 def __init__(self, refs: Iterable[DatasetRef]): 

105 self.refs = {ref.id: ref for ref in refs} 

106 

107 

108class DatastoreTransaction: 

109 """Keeps a log of `Datastore` activity and allow rollback. 

110 

111 Parameters 

112 ---------- 

113 parent : `DatastoreTransaction`, optional 

114 The parent transaction (if any) 

115 """ 

116 

117 Event: ClassVar[Type] = Event 

118 

119 parent: Optional['DatastoreTransaction'] 

120 """The parent transaction. (`DatastoreTransaction`, optional)""" 

121 

122 def __init__(self, parent: Optional[DatastoreTransaction] = None): 

123 self.parent = parent 

124 self._log: List[Event] = [] 

125 

126 def registerUndo(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> None: 

127 """Register event with undo function. 

128 

129 Parameters 

130 ---------- 

131 name : `str` 

132 Name of the event. 

133 undoFunc : func 

134 Function to undo this event. 

135 args : `tuple` 

136 Positional arguments to `undoFunc`. 

137 **kwargs 

138 Keyword arguments to `undoFunc`. 

139 """ 

140 self._log.append(self.Event(name, undoFunc, args, kwargs)) 

141 

142 @contextlib.contextmanager 

143 def undoWith(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> Iterator[None]: 

144 """Register undo function if nested operation succeeds. 

145 

146 Calls `registerUndo`. 

147 

148 This can be used to wrap individual undo-able statements within a 

149 DatastoreTransaction block. Multiple statements that can fail 

150 separately should not be part of the same `undoWith` block. 

151 

152 All arguments are forwarded directly to `registerUndo`. 

153 """ 

154 try: 

155 yield None 

156 except BaseException: 

157 raise 

158 else: 

159 self.registerUndo(name, undoFunc, *args, **kwargs) 

160 

161 def rollback(self) -> None: 

162 """Roll back all events in this transaction.""" 

163 log = logging.getLogger(__name__) 

164 while self._log: 

165 ev = self._log.pop() 

166 try: 

167 log.debug("Rolling back transaction: %s: %s(%s,%s)", ev.name, 

168 ev.undoFunc, 

169 ",".join(str(a) for a in ev.args), 

170 ",".join(f"{k}={v}" for k, v in ev.kwargs.items())) 

171 except Exception: 

172 # In case we had a problem in stringification of arguments 

173 log.warning("Rolling back transaction: %s", ev.name) 

174 try: 

175 ev.undoFunc(*ev.args, **ev.kwargs) 

176 except BaseException as e: 

177 # Deliberately swallow error that may occur in unrolling 

178 log.warning("Exception: %s caught while unrolling: %s", e, ev.name) 

179 pass 

180 

181 def commit(self) -> None: 

182 """Commit this transaction.""" 

183 if self.parent is None: 

184 # Just forget about the events, they have already happened. 

185 return 

186 else: 

187 # We may still want to events from this transaction as part of 

188 # the parent. 

189 self.parent._log.extend(self._log) 

190 

191 

192class Datastore(metaclass=ABCMeta): 

193 """Datastore interface. 

194 

195 Parameters 

196 ---------- 

197 config : `DatastoreConfig` or `str` 

198 Load configuration either from an existing config instance or by 

199 referring to a configuration file. 

200 bridgeManager : `DatastoreRegistryBridgeManager` 

201 Object that manages the interface between `Registry` and datastores. 

202 butlerRoot : `str`, optional 

203 New datastore root to use to override the configuration value. 

204 """ 

205 

206 defaultConfigFile: ClassVar[Optional[str]] = None 

207 """Path to configuration defaults. Accessed within the ``config`` resource 

208 or relative to a search path. Can be None if no defaults specified. 

209 """ 

210 

211 containerKey: ClassVar[Optional[str]] = None 

212 """Name of the key containing a list of subconfigurations that also 

213 need to be merged with defaults and will likely use different Python 

214 datastore classes (but all using DatastoreConfig). Assumed to be a 

215 list of configurations that can be represented in a DatastoreConfig 

216 and containing a "cls" definition. None indicates that no containers 

217 are expected in this Datastore.""" 

218 

219 isEphemeral: bool = False 

220 """Indicate whether this Datastore is ephemeral or not. An ephemeral 

221 datastore is one where the contents of the datastore will not exist 

222 across process restarts. This value can change per-instance.""" 

223 

224 config: DatastoreConfig 

225 """Configuration used to create Datastore.""" 

226 

227 name: str 

228 """Label associated with this Datastore.""" 

229 

230 storageClassFactory: StorageClassFactory 

231 """Factory for creating storage class instances from name.""" 

232 

233 constraints: Constraints 

234 """Constraints to apply when putting datasets into the datastore.""" 

235 

236 # MyPy does not like for this to be annotated as any kind of type, because 

237 # it can't do static checking on type variables that can change at runtime. 

238 IngestPrepData: ClassVar[Any] = IngestPrepData 

239 """Helper base class for ingest implementations. 

240 """ 

241 

242 @classmethod 

243 @abstractmethod 

244 def setConfigRoot(cls, root: str, config: Config, full: Config, overwrite: bool = True) -> None: 

245 """Set filesystem-dependent config options for this datastore. 

246 

247 The options will be appropriate for a new empty repository with the 

248 given root. 

249 

250 Parameters 

251 ---------- 

252 root : `str` 

253 Filesystem path to the root of the data repository. 

254 config : `Config` 

255 A `Config` to update. Only the subset understood by 

256 this component will be updated. Will not expand 

257 defaults. 

258 full : `Config` 

259 A complete config with all defaults expanded that can be 

260 converted to a `DatastoreConfig`. Read-only and will not be 

261 modified by this method. 

262 Repository-specific options that should not be obtained 

263 from defaults when Butler instances are constructed 

264 should be copied from ``full`` to ``config``. 

265 overwrite : `bool`, optional 

266 If `False`, do not modify a value in ``config`` if the value 

267 already exists. Default is always to overwrite with the provided 

268 ``root``. 

269 

270 Notes 

271 ----- 

272 If a keyword is explicitly defined in the supplied ``config`` it 

273 will not be overridden by this method if ``overwrite`` is `False`. 

274 This allows explicit values set in external configs to be retained. 

275 """ 

276 raise NotImplementedError() 

277 

278 @staticmethod 

279 def fromConfig(config: Config, bridgeManager: DatastoreRegistryBridgeManager, 

280 butlerRoot: Optional[Union[str, ButlerURI]] = None) -> 'Datastore': 

281 """Create datastore from type specified in config file. 

282 

283 Parameters 

284 ---------- 

285 config : `Config` 

286 Configuration instance. 

287 bridgeManager : `DatastoreRegistryBridgeManager` 

288 Object that manages the interface between `Registry` and 

289 datastores. 

290 butlerRoot : `str`, optional 

291 Butler root directory. 

292 """ 

293 cls = doImport(config["datastore", "cls"]) 

294 return cls(config=config, bridgeManager=bridgeManager, butlerRoot=butlerRoot) 

295 

296 def __init__(self, config: Union[Config, str], 

297 bridgeManager: DatastoreRegistryBridgeManager, butlerRoot: str = None): 

298 self.config = DatastoreConfig(config) 

299 self.name = "ABCDataStore" 

300 self._transaction: Optional[DatastoreTransaction] = None 

301 

302 # All Datastores need storage classes and constraints 

303 self.storageClassFactory = StorageClassFactory() 

304 

305 # And read the constraints list 

306 constraintsConfig = self.config.get("constraints") 

307 self.constraints = Constraints(constraintsConfig, universe=bridgeManager.universe) 

308 

309 def __str__(self) -> str: 

310 return self.name 

311 

312 def __repr__(self) -> str: 

313 return self.name 

314 

315 @property 

316 def names(self) -> Tuple[str, ...]: 

317 """Names associated with this datastore returned as a list. 

318 

319 Can be different to ``name`` for a chaining datastore. 

320 """ 

321 # Default implementation returns solely the name itself 

322 return (self.name, ) 

323 

324 @contextlib.contextmanager 

325 def transaction(self) -> Iterator[DatastoreTransaction]: 

326 """Context manager supporting `Datastore` transactions. 

327 

328 Transactions can be nested, and are to be used in combination with 

329 `Registry.transaction`. 

330 """ 

331 self._transaction = DatastoreTransaction(self._transaction) 

332 try: 

333 yield self._transaction 

334 except BaseException: 

335 self._transaction.rollback() 

336 raise 

337 else: 

338 self._transaction.commit() 

339 self._transaction = self._transaction.parent 

340 

341 @abstractmethod 

342 def knows(self, ref: DatasetRef) -> bool: 

343 """Check if the dataset is known to the datastore. 

344 

345 Does not check for existence of any artifact. 

346 

347 Parameters 

348 ---------- 

349 ref : `DatasetRef` 

350 Reference to the required dataset. 

351 

352 Returns 

353 ------- 

354 exists : `bool` 

355 `True` if the dataset is known to the datastore. 

356 """ 

357 raise NotImplementedError() 

358 

359 def mexists(self, refs: Iterable[DatasetRef], 

360 artifact_existence: Optional[Dict[ButlerURI, bool]] = None) -> Dict[DatasetRef, bool]: 

361 """Check the existence of multiple datasets at once. 

362 

363 Parameters 

364 ---------- 

365 refs : iterable of `DatasetRef` 

366 The datasets to be checked. 

367 artifact_existence : `dict` of [`ButlerURI`, `bool`], optional 

368 Mapping of datastore artifact to existence. Updated by this 

369 method with details of all artifacts tested. Can be `None` 

370 if the caller is not interested. 

371 

372 Returns 

373 ------- 

374 existence : `dict` of [`DatasetRef`, `bool`] 

375 Mapping from dataset to boolean indicating existence. 

376 """ 

377 existence: Dict[DatasetRef, bool] = {} 

378 # Non-optimized default. 

379 for ref in refs: 

380 existence[ref] = self.exists(ref) 

381 return existence 

382 

383 @abstractmethod 

384 def exists(self, datasetRef: DatasetRef) -> bool: 

385 """Check if the dataset exists in the datastore. 

386 

387 Parameters 

388 ---------- 

389 datasetRef : `DatasetRef` 

390 Reference to the required dataset. 

391 

392 Returns 

393 ------- 

394 exists : `bool` 

395 `True` if the entity exists in the `Datastore`. 

396 """ 

397 raise NotImplementedError("Must be implemented by subclass") 

398 

399 @abstractmethod 

400 def get(self, datasetRef: DatasetRef, parameters: Mapping[str, Any] = None) -> Any: 

401 """Load an `InMemoryDataset` from the store. 

402 

403 Parameters 

404 ---------- 

405 datasetRef : `DatasetRef` 

406 Reference to the required Dataset. 

407 parameters : `dict` 

408 `StorageClass`-specific parameters that specify a slice of the 

409 Dataset to be loaded. 

410 

411 Returns 

412 ------- 

413 inMemoryDataset : `object` 

414 Requested Dataset or slice thereof as an InMemoryDataset. 

415 """ 

416 raise NotImplementedError("Must be implemented by subclass") 

417 

418 @abstractmethod 

419 def put(self, inMemoryDataset: Any, datasetRef: DatasetRef) -> None: 

420 """Write a `InMemoryDataset` with a given `DatasetRef` to the store. 

421 

422 Parameters 

423 ---------- 

424 inMemoryDataset : `object` 

425 The Dataset to store. 

426 datasetRef : `DatasetRef` 

427 Reference to the associated Dataset. 

428 """ 

429 raise NotImplementedError("Must be implemented by subclass") 

430 

431 def _overrideTransferMode(self, *datasets: FileDataset, transfer: Optional[str] = None) -> Optional[str]: 

432 """Allow ingest transfer mode to be defaulted based on datasets. 

433 

434 Parameters 

435 ---------- 

436 datasets : `FileDataset` 

437 Each positional argument is a struct containing information about 

438 a file to be ingested, including its path (either absolute or 

439 relative to the datastore root, if applicable), a complete 

440 `DatasetRef` (with ``dataset_id not None``), and optionally a 

441 formatter class or its fully-qualified string name. If a formatter 

442 is not provided, this method should populate that attribute with 

443 the formatter the datastore would use for `put`. Subclasses are 

444 also permitted to modify the path attribute (typically to put it 

445 in what the datastore considers its standard form). 

446 transfer : `str`, optional 

447 How (and whether) the dataset should be added to the datastore. 

448 See `ingest` for details of transfer modes. 

449 

450 Returns 

451 ------- 

452 newTransfer : `str` 

453 Transfer mode to use. Will be identical to the supplied transfer 

454 mode unless "auto" is used. 

455 """ 

456 if transfer != "auto": 

457 return transfer 

458 raise RuntimeError(f"{transfer} is not allowed without specialization.") 

459 

460 def _prepIngest(self, *datasets: FileDataset, transfer: Optional[str] = None) -> IngestPrepData: 

461 """Process datasets to identify which ones can be ingested. 

462 

463 Parameters 

464 ---------- 

465 datasets : `FileDataset` 

466 Each positional argument is a struct containing information about 

467 a file to be ingested, including its path (either absolute or 

468 relative to the datastore root, if applicable), a complete 

469 `DatasetRef` (with ``dataset_id not None``), and optionally a 

470 formatter class or its fully-qualified string name. If a formatter 

471 is not provided, this method should populate that attribute with 

472 the formatter the datastore would use for `put`. Subclasses are 

473 also permitted to modify the path attribute (typically to put it 

474 in what the datastore considers its standard form). 

475 transfer : `str`, optional 

476 How (and whether) the dataset should be added to the datastore. 

477 See `ingest` for details of transfer modes. 

478 

479 Returns 

480 ------- 

481 data : `IngestPrepData` 

482 An instance of a subclass of `IngestPrepData`, used to pass 

483 arbitrary data from `_prepIngest` to `_finishIngest`. This should 

484 include only the datasets this datastore can actually ingest; 

485 others should be silently ignored (`Datastore.ingest` will inspect 

486 `IngestPrepData.refs` and raise `DatasetTypeNotSupportedError` if 

487 necessary). 

488 

489 Raises 

490 ------ 

491 NotImplementedError 

492 Raised if the datastore does not support the given transfer mode 

493 (including the case where ingest is not supported at all). 

494 FileNotFoundError 

495 Raised if one of the given files does not exist. 

496 FileExistsError 

497 Raised if transfer is not `None` but the (internal) location the 

498 file would be moved to is already occupied. 

499 

500 Notes 

501 ----- 

502 This method (along with `_finishIngest`) should be implemented by 

503 subclasses to provide ingest support instead of implementing `ingest` 

504 directly. 

505 

506 `_prepIngest` should not modify the data repository or given files in 

507 any way; all changes should be deferred to `_finishIngest`. 

508 

509 When possible, exceptions should be raised in `_prepIngest` instead of 

510 `_finishIngest`. `NotImplementedError` exceptions that indicate that 

511 the transfer mode is not supported must be raised by `_prepIngest` 

512 instead of `_finishIngest`. 

513 """ 

514 raise NotImplementedError( 

515 f"Datastore {self} does not support direct file-based ingest." 

516 ) 

517 

518 def _finishIngest(self, prepData: IngestPrepData, *, transfer: Optional[str] = None) -> None: 

519 """Complete an ingest operation. 

520 

521 Parameters 

522 ---------- 

523 data : `IngestPrepData` 

524 An instance of a subclass of `IngestPrepData`. Guaranteed to be 

525 the direct result of a call to `_prepIngest` on this datastore. 

526 transfer : `str`, optional 

527 How (and whether) the dataset should be added to the datastore. 

528 See `ingest` for details of transfer modes. 

529 

530 Raises 

531 ------ 

532 FileNotFoundError 

533 Raised if one of the given files does not exist. 

534 FileExistsError 

535 Raised if transfer is not `None` but the (internal) location the 

536 file would be moved to is already occupied. 

537 

538 Notes 

539 ----- 

540 This method (along with `_prepIngest`) should be implemented by 

541 subclasses to provide ingest support instead of implementing `ingest` 

542 directly. 

543 """ 

544 raise NotImplementedError( 

545 f"Datastore {self} does not support direct file-based ingest." 

546 ) 

547 

548 def ingest(self, *datasets: FileDataset, transfer: Optional[str] = None) -> None: 

549 """Ingest one or more files into the datastore. 

550 

551 Parameters 

552 ---------- 

553 datasets : `FileDataset` 

554 Each positional argument is a struct containing information about 

555 a file to be ingested, including its path (either absolute or 

556 relative to the datastore root, if applicable), a complete 

557 `DatasetRef` (with ``dataset_id not None``), and optionally a 

558 formatter class or its fully-qualified string name. If a formatter 

559 is not provided, the one the datastore would use for ``put`` on 

560 that dataset is assumed. 

561 transfer : `str`, optional 

562 How (and whether) the dataset should be added to the datastore. 

563 If `None` (default), the file must already be in a location 

564 appropriate for the datastore (e.g. within its root directory), 

565 and will not be modified. Other choices include "move", "copy", 

566 "link", "symlink", "relsymlink", and "hardlink". "link" is a 

567 special transfer mode that will first try to make a hardlink and 

568 if that fails a symlink will be used instead. "relsymlink" creates 

569 a relative symlink rather than use an absolute path. 

570 Most datastores do not support all transfer modes. 

571 "auto" is a special option that will let the 

572 data store choose the most natural option for itself. 

573 

574 Raises 

575 ------ 

576 NotImplementedError 

577 Raised if the datastore does not support the given transfer mode 

578 (including the case where ingest is not supported at all). 

579 DatasetTypeNotSupportedError 

580 Raised if one or more files to be ingested have a dataset type that 

581 is not supported by the datastore. 

582 FileNotFoundError 

583 Raised if one of the given files does not exist. 

584 FileExistsError 

585 Raised if transfer is not `None` but the (internal) location the 

586 file would be moved to is already occupied. 

587 

588 Notes 

589 ----- 

590 Subclasses should implement `_prepIngest` and `_finishIngest` instead 

591 of implementing `ingest` directly. Datastores that hold and 

592 delegate to child datastores may want to call those methods as well. 

593 

594 Subclasses are encouraged to document their supported transfer modes 

595 in their class documentation. 

596 """ 

597 # Allow a datastore to select a default transfer mode 

598 transfer = self._overrideTransferMode(*datasets, transfer=transfer) 

599 prepData = self._prepIngest(*datasets, transfer=transfer) 

600 refs = {ref.id: ref for dataset in datasets for ref in dataset.refs} 

601 if None in refs: 

602 # Find the file for the error message. There may be multiple 

603 # bad refs so look for all of them. 

604 unresolved_paths = {} 

605 for dataset in datasets: 

606 unresolved = [] 

607 for ref in dataset.refs: 

608 if ref.id is None: 

609 unresolved.append(ref) 

610 if unresolved: 

611 unresolved_paths[dataset.path] = unresolved 

612 raise RuntimeError("Attempt to ingest unresolved DatasetRef from: " 

613 + ",".join(f"{p}: ({[str(r) for r in ref]})" 

614 for p, ref in unresolved_paths.items())) 

615 if refs.keys() != prepData.refs.keys(): 

616 unsupported = refs.keys() - prepData.refs.keys() 

617 # Group unsupported refs by DatasetType for an informative 

618 # but still concise error message. 

619 byDatasetType = defaultdict(list) 

620 for datasetId in unsupported: 

621 ref = refs[datasetId] 

622 byDatasetType[ref.datasetType].append(ref) 

623 raise DatasetTypeNotSupportedError( 

624 "DatasetType(s) not supported in ingest: " 

625 + ", ".join(f"{k.name} ({len(v)} dataset(s))" for k, v in byDatasetType.items()) 

626 ) 

627 self._finishIngest(prepData, transfer=transfer) 

628 

629 def transfer_from(self, source_datastore: Datastore, refs: Iterable[DatasetRef], 

630 local_refs: Optional[Iterable[DatasetRef]] = None, 

631 transfer: str = "auto", 

632 artifact_existence: Optional[Dict[ButlerURI, bool]] = None) -> None: 

633 """Transfer dataset artifacts from another datastore to this one. 

634 

635 Parameters 

636 ---------- 

637 source_datastore : `Datastore` 

638 The datastore from which to transfer artifacts. That datastore 

639 must be compatible with this datastore receiving the artifacts. 

640 refs : iterable of `DatasetRef` 

641 The datasets to transfer from the source datastore. 

642 local_refs : iterable of `DatasetRef`, optional 

643 The dataset refs associated with the registry associated with 

644 this datastore. Can be `None` if the source and target datastore 

645 are using UUIDs. 

646 transfer : `str`, optional 

647 How (and whether) the dataset should be added to the datastore. 

648 Choices include "move", "copy", 

649 "link", "symlink", "relsymlink", and "hardlink". "link" is a 

650 special transfer mode that will first try to make a hardlink and 

651 if that fails a symlink will be used instead. "relsymlink" creates 

652 a relative symlink rather than use an absolute path. 

653 Most datastores do not support all transfer modes. 

654 "auto" (the default) is a special option that will let the 

655 data store choose the most natural option for itself. 

656 If the source location and transfer location are identical the 

657 transfer mode will be ignored. 

658 artifact_existence : `dict` of [`ButlerURI`, `bool`], optional 

659 Mapping of datastore artifact to existence. Updated by this 

660 method with details of all artifacts tested. Can be `None` 

661 if the caller is not interested. 

662 

663 Raises 

664 ------ 

665 TypeError 

666 Raised if the two datastores are not compatible. 

667 """ 

668 if type(self) is not type(source_datastore): 

669 raise TypeError(f"Datastore mismatch between this datastore ({type(self)}) and the " 

670 f"source datastore ({type(source_datastore)}).") 

671 

672 raise NotImplementedError(f"Datastore {type(self)} must implement a transfer_from method.") 

673 

674 @abstractmethod 

675 def getURIs(self, datasetRef: DatasetRef, 

676 predict: bool = False) -> Tuple[Optional[ButlerURI], Dict[str, ButlerURI]]: 

677 """Return URIs associated with dataset. 

678 

679 Parameters 

680 ---------- 

681 ref : `DatasetRef` 

682 Reference to the required dataset. 

683 predict : `bool`, optional 

684 If the datastore does not know about the dataset, should it 

685 return a predicted URI or not? 

686 

687 Returns 

688 ------- 

689 primary : `ButlerURI` 

690 The URI to the primary artifact associated with this dataset. 

691 If the dataset was disassembled within the datastore this 

692 may be `None`. 

693 components : `dict` 

694 URIs to any components associated with the dataset artifact. 

695 Can be empty if there are no components. 

696 """ 

697 raise NotImplementedError() 

698 

699 @abstractmethod 

700 def getURI(self, datasetRef: DatasetRef, predict: bool = False) -> ButlerURI: 

701 """URI to the Dataset. 

702 

703 Parameters 

704 ---------- 

705 datasetRef : `DatasetRef` 

706 Reference to the required Dataset. 

707 predict : `bool` 

708 If `True` attempt to predict the URI for a dataset if it does 

709 not exist in datastore. 

710 

711 Returns 

712 ------- 

713 uri : `str` 

714 URI string pointing to the Dataset within the datastore. If the 

715 Dataset does not exist in the datastore, the URI may be a guess. 

716 If the datastore does not have entities that relate well 

717 to the concept of a URI the returned URI string will be 

718 descriptive. The returned URI is not guaranteed to be obtainable. 

719 

720 Raises 

721 ------ 

722 FileNotFoundError 

723 A URI has been requested for a dataset that does not exist and 

724 guessing is not allowed. 

725 """ 

726 raise NotImplementedError("Must be implemented by subclass") 

727 

728 @abstractmethod 

729 def retrieveArtifacts(self, refs: Iterable[DatasetRef], 

730 destination: ButlerURI, transfer: str = "auto", 

731 preserve_path: bool = True, 

732 overwrite: bool = False) -> List[ButlerURI]: 

733 """Retrieve the artifacts associated with the supplied refs. 

734 

735 Parameters 

736 ---------- 

737 refs : iterable of `DatasetRef` 

738 The datasets for which artifacts are to be retrieved. 

739 A single ref can result in multiple artifacts. The refs must 

740 be resolved. 

741 destination : `ButlerURI` 

742 Location to write the artifacts. 

743 transfer : `str`, optional 

744 Method to use to transfer the artifacts. Must be one of the options 

745 supported by `ButlerURI.transfer_from()`. "move" is not allowed. 

746 preserve_path : `bool`, optional 

747 If `True` the full path of the artifact within the datastore 

748 is preserved. If `False` the final file component of the path 

749 is used. 

750 overwrite : `bool`, optional 

751 If `True` allow transfers to overwrite existing files at the 

752 destination. 

753 

754 Returns 

755 ------- 

756 targets : `list` of `ButlerURI` 

757 URIs of file artifacts in destination location. Order is not 

758 preserved. 

759 

760 Notes 

761 ----- 

762 For non-file datastores the artifacts written to the destination 

763 may not match the representation inside the datastore. For example 

764 a hierarchichal data structure in a NoSQL database may well be stored 

765 as a JSON file. 

766 """ 

767 raise NotImplementedError() 

768 

769 @abstractmethod 

770 def remove(self, datasetRef: DatasetRef) -> None: 

771 """Indicate to the Datastore that a Dataset can be removed. 

772 

773 Parameters 

774 ---------- 

775 datasetRef : `DatasetRef` 

776 Reference to the required Dataset. 

777 

778 Raises 

779 ------ 

780 FileNotFoundError 

781 When Dataset does not exist. 

782 

783 Notes 

784 ----- 

785 Some Datastores may implement this method as a silent no-op to 

786 disable Dataset deletion through standard interfaces. 

787 """ 

788 raise NotImplementedError("Must be implemented by subclass") 

789 

790 @abstractmethod 

791 def forget(self, refs: Iterable[DatasetRef]) -> None: 

792 """Indicate to the Datastore that it should remove all records of the 

793 given datasets, without actually deleting them. 

794 

795 Parameters 

796 ---------- 

797 refs : `Iterable` [ `DatasetRef` ] 

798 References to the datasets being forgotten. 

799 

800 Notes 

801 ----- 

802 Asking a datastore to forget a `DatasetRef` it does not hold should be 

803 a silent no-op, not an error. 

804 """ 

805 raise NotImplementedError("Must be implemented by subclass") 

806 

807 @abstractmethod 

808 def trash(self, ref: Union[DatasetRef, Iterable[DatasetRef]], ignore_errors: bool = True) -> None: 

809 """Indicate to the Datastore that a Dataset can be moved to the trash. 

810 

811 Parameters 

812 ---------- 

813 ref : `DatasetRef` or iterable thereof 

814 Reference(s) to the required Dataset. 

815 ignore_errors : `bool`, optional 

816 Determine whether errors should be ignored. When multiple 

817 refs are being trashed there will be no per-ref check. 

818 

819 Raises 

820 ------ 

821 FileNotFoundError 

822 When Dataset does not exist and errors are not ignored. Only 

823 checked if a single ref is supplied (and not in a list). 

824 

825 Notes 

826 ----- 

827 Some Datastores may implement this method as a silent no-op to 

828 disable Dataset deletion through standard interfaces. 

829 """ 

830 raise NotImplementedError("Must be implemented by subclass") 

831 

832 @abstractmethod 

833 def emptyTrash(self, ignore_errors: bool = True) -> None: 

834 """Remove all datasets from the trash. 

835 

836 Parameters 

837 ---------- 

838 ignore_errors : `bool`, optional 

839 Determine whether errors should be ignored. 

840 

841 Notes 

842 ----- 

843 Some Datastores may implement this method as a silent no-op to 

844 disable Dataset deletion through standard interfaces. 

845 """ 

846 raise NotImplementedError("Must be implemented by subclass") 

847 

848 @abstractmethod 

849 def transfer(self, inputDatastore: Datastore, datasetRef: DatasetRef) -> None: 

850 """Transfer a dataset from another datastore to this datastore. 

851 

852 Parameters 

853 ---------- 

854 inputDatastore : `Datastore` 

855 The external `Datastore` from which to retrieve the Dataset. 

856 datasetRef : `DatasetRef` 

857 Reference to the required Dataset. 

858 """ 

859 raise NotImplementedError("Must be implemented by subclass") 

860 

861 def export(self, refs: Iterable[DatasetRef], *, 

862 directory: Optional[str] = None, transfer: Optional[str] = None) -> Iterable[FileDataset]: 

863 """Export datasets for transfer to another data repository. 

864 

865 Parameters 

866 ---------- 

867 refs : iterable of `DatasetRef` 

868 Dataset references to be exported. 

869 directory : `str`, optional 

870 Path to a directory that should contain files corresponding to 

871 output datasets. Ignored if ``transfer`` is `None`. 

872 transfer : `str`, optional 

873 Mode that should be used to move datasets out of the repository. 

874 Valid options are the same as those of the ``transfer`` argument 

875 to ``ingest``, and datastores may similarly signal that a transfer 

876 mode is not supported by raising `NotImplementedError`. 

877 

878 Returns 

879 ------- 

880 dataset : iterable of `DatasetTransfer` 

881 Structs containing information about the exported datasets, in the 

882 same order as ``refs``. 

883 

884 Raises 

885 ------ 

886 NotImplementedError 

887 Raised if the given transfer mode is not supported. 

888 """ 

889 raise NotImplementedError(f"Transfer mode {transfer} not supported.") 

890 

891 @abstractmethod 

892 def validateConfiguration(self, entities: Iterable[Union[DatasetRef, DatasetType, StorageClass]], 

893 logFailures: bool = False) -> None: 

894 """Validate some of the configuration for this datastore. 

895 

896 Parameters 

897 ---------- 

898 entities : iterable of `DatasetRef`, `DatasetType`, or `StorageClass` 

899 Entities to test against this configuration. Can be differing 

900 types. 

901 logFailures : `bool`, optional 

902 If `True`, output a log message for every validation error 

903 detected. 

904 

905 Raises 

906 ------ 

907 DatastoreValidationError 

908 Raised if there is a validation problem with a configuration. 

909 

910 Notes 

911 ----- 

912 Which parts of the configuration are validated is at the discretion 

913 of each Datastore implementation. 

914 """ 

915 raise NotImplementedError("Must be implemented by subclass") 

916 

917 @abstractmethod 

918 def validateKey(self, 

919 lookupKey: LookupKey, entity: Union[DatasetRef, DatasetType, StorageClass]) -> None: 

920 """Validate a specific look up key with supplied entity. 

921 

922 Parameters 

923 ---------- 

924 lookupKey : `LookupKey` 

925 Key to use to retrieve information from the datastore 

926 configuration. 

927 entity : `DatasetRef`, `DatasetType`, or `StorageClass` 

928 Entity to compare with configuration retrieved using the 

929 specified lookup key. 

930 

931 Raises 

932 ------ 

933 DatastoreValidationError 

934 Raised if there is a problem with the combination of entity 

935 and lookup key. 

936 

937 Notes 

938 ----- 

939 Bypasses the normal selection priorities by allowing a key that 

940 would normally not be selected to be validated. 

941 """ 

942 raise NotImplementedError("Must be implemented by subclass") 

943 

944 @abstractmethod 

945 def getLookupKeys(self) -> Set[LookupKey]: 

946 """Return all the lookup keys relevant to this datastore. 

947 

948 Returns 

949 ------- 

950 keys : `set` of `LookupKey` 

951 The keys stored internally for looking up information based 

952 on `DatasetType` name or `StorageClass`. 

953 """ 

954 raise NotImplementedError("Must be implemented by subclass") 

955 

956 def needs_expanded_data_ids( 

957 self, 

958 transfer: Optional[str], 

959 entity: Optional[Union[DatasetRef, DatasetType, StorageClass]] = None, 

960 ) -> bool: 

961 """Test whether this datastore needs expanded data IDs to ingest. 

962 

963 Parameters 

964 ---------- 

965 transfer : `str` or `None` 

966 Transfer mode for ingest. 

967 entity, optional 

968 Object representing what will be ingested. If not provided (or not 

969 specific enough), `True` may be returned even if expanded data 

970 IDs aren't necessary. 

971 

972 Returns 

973 ------- 

974 needed : `bool` 

975 If `True`, expanded data IDs may be needed. `False` only if 

976 expansion definitely isn't necessary. 

977 """ 

978 return True