Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Support for generic data stores.""" 

23 

24from __future__ import annotations 

25 

26__all__ = ("DatastoreConfig", "Datastore", "DatastoreValidationError") 

27 

28import contextlib 

29import logging 

30from collections import defaultdict 

31from typing import ( 

32 TYPE_CHECKING, 

33 Any, 

34 Callable, 

35 ClassVar, 

36 Dict, 

37 Iterable, 

38 Iterator, 

39 List, 

40 Mapping, 

41 Optional, 

42 Set, 

43 Tuple, 

44 Type, 

45 Union, 

46) 

47 

48from dataclasses import dataclass 

49from abc import ABCMeta, abstractmethod 

50 

51from lsst.utils import doImport 

52from .config import ConfigSubset, Config 

53from .exceptions import ValidationError, DatasetTypeNotSupportedError 

54from .constraints import Constraints 

55from .storageClass import StorageClassFactory 

56from .fileDataset import FileDataset 

57 

58if TYPE_CHECKING: 58 ↛ 59line 58 didn't jump to line 59, because the condition on line 58 was never true

59 from ..registry.interfaces import DatastoreRegistryBridgeManager 

60 from .datasets import DatasetRef, DatasetType 

61 from .configSupport import LookupKey 

62 from .storageClass import StorageClass 

63 from ._butlerUri import ButlerURI 

64 

65 

66class DatastoreConfig(ConfigSubset): 

67 """Configuration for Datastores.""" 

68 

69 component = "datastore" 

70 requiredKeys = ("cls",) 

71 defaultConfigFile = "datastore.yaml" 

72 

73 

74class DatastoreValidationError(ValidationError): 

75 """There is a problem with the Datastore configuration.""" 

76 

77 pass 

78 

79 

80@dataclass(frozen=True) 

81class Event: 

82 __slots__ = {"name", "undoFunc", "args", "kwargs"} 

83 name: str 

84 undoFunc: Callable 

85 args: tuple 

86 kwargs: dict 

87 

88 

89class IngestPrepData: 

90 """A helper base class for `Datastore` ingest implementations. 

91 

92 Datastore implementations will generally need a custom implementation of 

93 this class. 

94 

95 Should be accessed as ``Datastore.IngestPrepData`` instead of via direct 

96 import. 

97 

98 Parameters 

99 ---------- 

100 refs : iterable of `DatasetRef` 

101 References for the datasets that can be ingested by this datastore. 

102 """ 

103 

104 def __init__(self, refs: Iterable[DatasetRef]): 

105 self.refs = {ref.id: ref for ref in refs} 

106 

107 

108class DatastoreTransaction: 

109 """Keeps a log of `Datastore` activity and allow rollback. 

110 

111 Parameters 

112 ---------- 

113 parent : `DatastoreTransaction`, optional 

114 The parent transaction (if any) 

115 """ 

116 

117 Event: ClassVar[Type] = Event 

118 

119 parent: Optional['DatastoreTransaction'] 

120 """The parent transaction. (`DatastoreTransaction`, optional)""" 

121 

122 def __init__(self, parent: Optional[DatastoreTransaction] = None): 

123 self.parent = parent 

124 self._log: List[Event] = [] 

125 

126 def registerUndo(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> None: 

127 """Register event with undo function. 

128 

129 Parameters 

130 ---------- 

131 name : `str` 

132 Name of the event. 

133 undoFunc : func 

134 Function to undo this event. 

135 args : `tuple` 

136 Positional arguments to `undoFunc`. 

137 kwargs : `dict` 

138 Keyword arguments to `undoFunc`. 

139 """ 

140 self._log.append(self.Event(name, undoFunc, args, kwargs)) 

141 

142 @contextlib.contextmanager 

143 def undoWith(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> Iterator[None]: 

144 """Register undo function if nested operation succeeds. 

145 

146 Calls `registerUndo`. 

147 

148 This can be used to wrap individual undo-able statements within a 

149 DatastoreTransaction block. Multiple statements that can fail 

150 separately should not be part of the same `undoWith` block. 

151 

152 All arguments are forwarded directly to `registerUndo`. 

153 """ 

154 try: 

155 yield None 

156 except BaseException: 

157 raise 

158 else: 

159 self.registerUndo(name, undoFunc, *args, **kwargs) 

160 

161 def rollback(self) -> None: 

162 """Roll back all events in this transaction.""" 

163 log = logging.getLogger(__name__) 

164 while self._log: 

165 ev = self._log.pop() 

166 try: 

167 log.debug("Rolling back transaction: %s: %s(%s,%s)", ev.name, 

168 ev.undoFunc, 

169 ",".join(str(a) for a in ev.args), 

170 ",".join(f"{k}={v}" for k, v in ev.kwargs.items())) 

171 except Exception: 

172 # In case we had a problem in stringification of arguments 

173 log.warning("Rolling back transaction: %s", ev.name) 

174 try: 

175 ev.undoFunc(*ev.args, **ev.kwargs) 

176 except BaseException as e: 

177 # Deliberately swallow error that may occur in unrolling 

178 log.warning("Exception: %s caught while unrolling: %s", e, ev.name) 

179 pass 

180 

181 def commit(self) -> None: 

182 """Commit this transaction.""" 

183 if self.parent is None: 

184 # Just forget about the events, they have already happened. 

185 return 

186 else: 

187 # We may still want to events from this transaction as part of 

188 # the parent. 

189 self.parent._log.extend(self._log) 

190 

191 

192class Datastore(metaclass=ABCMeta): 

193 """Datastore interface. 

194 

195 Parameters 

196 ---------- 

197 config : `DatastoreConfig` or `str` 

198 Load configuration either from an existing config instance or by 

199 referring to a configuration file. 

200 bridgeManager : `DatastoreRegistryBridgeManager` 

201 Object that manages the interface between `Registry` and datastores. 

202 butlerRoot : `str`, optional 

203 New datastore root to use to override the configuration value. 

204 """ 

205 

206 defaultConfigFile: ClassVar[Optional[str]] = None 

207 """Path to configuration defaults. Accessed within the ``config`` resource 

208 or relative to a search path. Can be None if no defaults specified. 

209 """ 

210 

211 containerKey: ClassVar[Optional[str]] = None 

212 """Name of the key containing a list of subconfigurations that also 

213 need to be merged with defaults and will likely use different Python 

214 datastore classes (but all using DatastoreConfig). Assumed to be a 

215 list of configurations that can be represented in a DatastoreConfig 

216 and containing a "cls" definition. None indicates that no containers 

217 are expected in this Datastore.""" 

218 

219 isEphemeral: bool = False 

220 """Indicate whether this Datastore is ephemeral or not. An ephemeral 

221 datastore is one where the contents of the datastore will not exist 

222 across process restarts. This value can change per-instance.""" 

223 

224 config: DatastoreConfig 

225 """Configuration used to create Datastore.""" 

226 

227 name: str 

228 """Label associated with this Datastore.""" 

229 

230 storageClassFactory: StorageClassFactory 

231 """Factory for creating storage class instances from name.""" 

232 

233 constraints: Constraints 

234 """Constraints to apply when putting datasets into the datastore.""" 

235 

236 # MyPy does not like for this to be annotated as any kind of type, because 

237 # it can't do static checking on type variables that can change at runtime. 

238 IngestPrepData: ClassVar[Any] = IngestPrepData 

239 """Helper base class for ingest implementations. 

240 """ 

241 

242 @classmethod 

243 @abstractmethod 

244 def setConfigRoot(cls, root: str, config: Config, full: Config, overwrite: bool = True) -> None: 

245 """Set filesystem-dependent config options for this datastore. 

246 

247 The options will be appropriate for a new empty repository with the 

248 given root. 

249 

250 Parameters 

251 ---------- 

252 root : `str` 

253 Filesystem path to the root of the data repository. 

254 config : `Config` 

255 A `Config` to update. Only the subset understood by 

256 this component will be updated. Will not expand 

257 defaults. 

258 full : `Config` 

259 A complete config with all defaults expanded that can be 

260 converted to a `DatastoreConfig`. Read-only and will not be 

261 modified by this method. 

262 Repository-specific options that should not be obtained 

263 from defaults when Butler instances are constructed 

264 should be copied from ``full`` to ``config``. 

265 overwrite : `bool`, optional 

266 If `False`, do not modify a value in ``config`` if the value 

267 already exists. Default is always to overwrite with the provided 

268 ``root``. 

269 

270 Notes 

271 ----- 

272 If a keyword is explicitly defined in the supplied ``config`` it 

273 will not be overridden by this method if ``overwrite`` is `False`. 

274 This allows explicit values set in external configs to be retained. 

275 """ 

276 raise NotImplementedError() 

277 

278 @staticmethod 

279 def fromConfig(config: Config, bridgeManager: DatastoreRegistryBridgeManager, 

280 butlerRoot: Optional[Union[str, ButlerURI]] = None) -> 'Datastore': 

281 """Create datastore from type specified in config file. 

282 

283 Parameters 

284 ---------- 

285 config : `Config` 

286 Configuration instance. 

287 bridgeManager : `DatastoreRegistryBridgeManager` 

288 Object that manages the interface between `Registry` and 

289 datastores. 

290 butlerRoot : `str`, optional 

291 Butler root directory. 

292 """ 

293 cls = doImport(config["datastore", "cls"]) 

294 return cls(config=config, bridgeManager=bridgeManager, butlerRoot=butlerRoot) 

295 

296 def __init__(self, config: Union[Config, str], 

297 bridgeManager: DatastoreRegistryBridgeManager, butlerRoot: str = None): 

298 self.config = DatastoreConfig(config) 

299 self.name = "ABCDataStore" 

300 self._transaction: Optional[DatastoreTransaction] = None 

301 

302 # All Datastores need storage classes and constraints 

303 self.storageClassFactory = StorageClassFactory() 

304 

305 # And read the constraints list 

306 constraintsConfig = self.config.get("constraints") 

307 self.constraints = Constraints(constraintsConfig, universe=bridgeManager.universe) 

308 

309 def __str__(self) -> str: 

310 return self.name 

311 

312 def __repr__(self) -> str: 

313 return self.name 

314 

315 @property 

316 def names(self) -> Tuple[str, ...]: 

317 """Names associated with this datastore returned as a list. 

318 

319 Can be different to ``name`` for a chaining datastore. 

320 """ 

321 # Default implementation returns solely the name itself 

322 return (self.name, ) 

323 

324 @contextlib.contextmanager 

325 def transaction(self) -> Iterator[DatastoreTransaction]: 

326 """Context manager supporting `Datastore` transactions. 

327 

328 Transactions can be nested, and are to be used in combination with 

329 `Registry.transaction`. 

330 """ 

331 self._transaction = DatastoreTransaction(self._transaction) 

332 try: 

333 yield self._transaction 

334 except BaseException: 

335 self._transaction.rollback() 

336 raise 

337 else: 

338 self._transaction.commit() 

339 self._transaction = self._transaction.parent 

340 

341 @abstractmethod 

342 def exists(self, datasetRef: DatasetRef) -> bool: 

343 """Check if the dataset exists in the datastore. 

344 

345 Parameters 

346 ---------- 

347 datasetRef : `DatasetRef` 

348 Reference to the required dataset. 

349 

350 Returns 

351 ------- 

352 exists : `bool` 

353 `True` if the entity exists in the `Datastore`. 

354 """ 

355 raise NotImplementedError("Must be implemented by subclass") 

356 

357 @abstractmethod 

358 def get(self, datasetRef: DatasetRef, parameters: Mapping[str, Any] = None) -> Any: 

359 """Load an `InMemoryDataset` from the store. 

360 

361 Parameters 

362 ---------- 

363 datasetRef : `DatasetRef` 

364 Reference to the required Dataset. 

365 parameters : `dict` 

366 `StorageClass`-specific parameters that specify a slice of the 

367 Dataset to be loaded. 

368 

369 Returns 

370 ------- 

371 inMemoryDataset : `object` 

372 Requested Dataset or slice thereof as an InMemoryDataset. 

373 """ 

374 raise NotImplementedError("Must be implemented by subclass") 

375 

376 @abstractmethod 

377 def put(self, inMemoryDataset: Any, datasetRef: DatasetRef) -> None: 

378 """Write a `InMemoryDataset` with a given `DatasetRef` to the store. 

379 

380 Parameters 

381 ---------- 

382 inMemoryDataset : `object` 

383 The Dataset to store. 

384 datasetRef : `DatasetRef` 

385 Reference to the associated Dataset. 

386 """ 

387 raise NotImplementedError("Must be implemented by subclass") 

388 

389 def _overrideTransferMode(self, *datasets: FileDataset, transfer: Optional[str] = None) -> Optional[str]: 

390 """Allow ingest transfer mode to be defaulted based on datasets. 

391 

392 Parameters 

393 ---------- 

394 datasets : `FileDataset` 

395 Each positional argument is a struct containing information about 

396 a file to be ingested, including its path (either absolute or 

397 relative to the datastore root, if applicable), a complete 

398 `DatasetRef` (with ``dataset_id not None``), and optionally a 

399 formatter class or its fully-qualified string name. If a formatter 

400 is not provided, this method should populate that attribute with 

401 the formatter the datastore would use for `put`. Subclasses are 

402 also permitted to modify the path attribute (typically to put it 

403 in what the datastore considers its standard form). 

404 transfer : `str`, optional 

405 How (and whether) the dataset should be added to the datastore. 

406 See `ingest` for details of transfer modes. 

407 

408 Returns 

409 ------- 

410 newTransfer : `str` 

411 Transfer mode to use. Will be identical to the supplied transfer 

412 mode unless "auto" is used. 

413 """ 

414 if transfer != "auto": 

415 return transfer 

416 raise RuntimeError(f"{transfer} is not allowed without specialization.") 

417 

418 def _prepIngest(self, *datasets: FileDataset, transfer: Optional[str] = None) -> IngestPrepData: 

419 """Process datasets to identify which ones can be ingested. 

420 

421 Parameters 

422 ---------- 

423 datasets : `FileDataset` 

424 Each positional argument is a struct containing information about 

425 a file to be ingested, including its path (either absolute or 

426 relative to the datastore root, if applicable), a complete 

427 `DatasetRef` (with ``dataset_id not None``), and optionally a 

428 formatter class or its fully-qualified string name. If a formatter 

429 is not provided, this method should populate that attribute with 

430 the formatter the datastore would use for `put`. Subclasses are 

431 also permitted to modify the path attribute (typically to put it 

432 in what the datastore considers its standard form). 

433 transfer : `str`, optional 

434 How (and whether) the dataset should be added to the datastore. 

435 See `ingest` for details of transfer modes. 

436 

437 Returns 

438 ------- 

439 data : `IngestPrepData` 

440 An instance of a subclass of `IngestPrepData`, used to pass 

441 arbitrary data from `_prepIngest` to `_finishIngest`. This should 

442 include only the datasets this datastore can actually ingest; 

443 others should be silently ignored (`Datastore.ingest` will inspect 

444 `IngestPrepData.refs` and raise `DatasetTypeNotSupportedError` if 

445 necessary). 

446 

447 Raises 

448 ------ 

449 NotImplementedError 

450 Raised if the datastore does not support the given transfer mode 

451 (including the case where ingest is not supported at all). 

452 FileNotFoundError 

453 Raised if one of the given files does not exist. 

454 FileExistsError 

455 Raised if transfer is not `None` but the (internal) location the 

456 file would be moved to is already occupied. 

457 

458 Notes 

459 ----- 

460 This method (along with `_finishIngest`) should be implemented by 

461 subclasses to provide ingest support instead of implementing `ingest` 

462 directly. 

463 

464 `_prepIngest` should not modify the data repository or given files in 

465 any way; all changes should be deferred to `_finishIngest`. 

466 

467 When possible, exceptions should be raised in `_prepIngest` instead of 

468 `_finishIngest`. `NotImplementedError` exceptions that indicate that 

469 the transfer mode is not supported must be raised by `_prepIngest` 

470 instead of `_finishIngest`. 

471 """ 

472 raise NotImplementedError( 

473 "Datastore does not support direct file-based ingest." 

474 ) 

475 

476 def _finishIngest(self, prepData: IngestPrepData, *, transfer: Optional[str] = None) -> None: 

477 """Complete an ingest operation. 

478 

479 Parameters 

480 ---------- 

481 data : `IngestPrepData` 

482 An instance of a subclass of `IngestPrepData`. Guaranteed to be 

483 the direct result of a call to `_prepIngest` on this datastore. 

484 transfer : `str`, optional 

485 How (and whether) the dataset should be added to the datastore. 

486 See `ingest` for details of transfer modes. 

487 

488 Raises 

489 ------ 

490 FileNotFoundError 

491 Raised if one of the given files does not exist. 

492 FileExistsError 

493 Raised if transfer is not `None` but the (internal) location the 

494 file would be moved to is already occupied. 

495 

496 Notes 

497 ----- 

498 This method (along with `_prepIngest`) should be implemented by 

499 subclasses to provide ingest support instead of implementing `ingest` 

500 directly. 

501 """ 

502 raise NotImplementedError( 

503 "Datastore does not support direct file-based ingest." 

504 ) 

505 

506 def ingest(self, *datasets: FileDataset, transfer: Optional[str] = None) -> None: 

507 """Ingest one or more files into the datastore. 

508 

509 Parameters 

510 ---------- 

511 datasets : `FileDataset` 

512 Each positional argument is a struct containing information about 

513 a file to be ingested, including its path (either absolute or 

514 relative to the datastore root, if applicable), a complete 

515 `DatasetRef` (with ``dataset_id not None``), and optionally a 

516 formatter class or its fully-qualified string name. If a formatter 

517 is not provided, the one the datastore would use for ``put`` on 

518 that dataset is assumed. 

519 transfer : `str`, optional 

520 How (and whether) the dataset should be added to the datastore. 

521 If `None` (default), the file must already be in a location 

522 appropriate for the datastore (e.g. within its root directory), 

523 and will not be modified. Other choices include "move", "copy", 

524 "link", "symlink", "relsymlink", and "hardlink". "link" is a 

525 special transfer mode that will first try to make a hardlink and 

526 if that fails a symlink will be used instead. "relsymlink" creates 

527 a relative symlink rather than use an absolute path. 

528 Most datastores do not support all transfer modes. 

529 "auto" is a special option that will let the 

530 data store choose the most natural option for itself. 

531 

532 Raises 

533 ------ 

534 NotImplementedError 

535 Raised if the datastore does not support the given transfer mode 

536 (including the case where ingest is not supported at all). 

537 DatasetTypeNotSupportedError 

538 Raised if one or more files to be ingested have a dataset type that 

539 is not supported by the datastore. 

540 FileNotFoundError 

541 Raised if one of the given files does not exist. 

542 FileExistsError 

543 Raised if transfer is not `None` but the (internal) location the 

544 file would be moved to is already occupied. 

545 

546 Notes 

547 ----- 

548 Subclasses should implement `_prepIngest` and `_finishIngest` instead 

549 of implementing `ingest` directly. Datastores that hold and 

550 delegate to child datastores may want to call those methods as well. 

551 

552 Subclasses are encouraged to document their supported transfer modes 

553 in their class documentation. 

554 """ 

555 # Allow a datastore to select a default transfer mode 

556 transfer = self._overrideTransferMode(*datasets, transfer=transfer) 

557 prepData = self._prepIngest(*datasets, transfer=transfer) 

558 refs = {ref.id: ref for dataset in datasets for ref in dataset.refs} 

559 if None in refs: 

560 # Find the file for the error message. There may be multiple 

561 # bad refs so look for all of them. 

562 unresolved_paths = {} 

563 for dataset in datasets: 

564 unresolved = [] 

565 for ref in dataset.refs: 

566 if ref.id is None: 

567 unresolved.append(ref) 

568 if unresolved: 

569 unresolved_paths[dataset.path] = unresolved 

570 raise RuntimeError("Attempt to ingest unresolved DatasetRef from: " 

571 + ",".join(f"{p}: ({[str(r) for r in ref]})" 

572 for p, ref in unresolved_paths.items())) 

573 if refs.keys() != prepData.refs.keys(): 

574 unsupported = refs.keys() - prepData.refs.keys() 

575 # Group unsupported refs by DatasetType for an informative 

576 # but still concise error message. 

577 byDatasetType = defaultdict(list) 

578 for datasetId in unsupported: 

579 ref = refs[datasetId] 

580 byDatasetType[ref.datasetType].append(ref) 

581 raise DatasetTypeNotSupportedError( 

582 "DatasetType(s) not supported in ingest: " 

583 + ", ".join(f"{k.name} ({len(v)} dataset(s))" for k, v in byDatasetType.items()) 

584 ) 

585 self._finishIngest(prepData, transfer=transfer) 

586 

587 @abstractmethod 

588 def getURIs(self, datasetRef: DatasetRef, 

589 predict: bool = False) -> Tuple[Optional[ButlerURI], Dict[str, ButlerURI]]: 

590 """Return URIs associated with dataset. 

591 

592 Parameters 

593 ---------- 

594 ref : `DatasetRef` 

595 Reference to the required dataset. 

596 predict : `bool`, optional 

597 If the datastore does not know about the dataset, should it 

598 return a predicted URI or not? 

599 

600 Returns 

601 ------- 

602 primary : `ButlerURI` 

603 The URI to the primary artifact associated with this dataset. 

604 If the dataset was disassembled within the datastore this 

605 may be `None`. 

606 components : `dict` 

607 URIs to any components associated with the dataset artifact. 

608 Can be empty if there are no components. 

609 """ 

610 raise NotImplementedError() 

611 

612 @abstractmethod 

613 def getURI(self, datasetRef: DatasetRef, predict: bool = False) -> ButlerURI: 

614 """URI to the Dataset. 

615 

616 Parameters 

617 ---------- 

618 datasetRef : `DatasetRef` 

619 Reference to the required Dataset. 

620 predict : `bool` 

621 If `True` attempt to predict the URI for a dataset if it does 

622 not exist in datastore. 

623 

624 Returns 

625 ------- 

626 uri : `str` 

627 URI string pointing to the Dataset within the datastore. If the 

628 Dataset does not exist in the datastore, the URI may be a guess. 

629 If the datastore does not have entities that relate well 

630 to the concept of a URI the returned URI string will be 

631 descriptive. The returned URI is not guaranteed to be obtainable. 

632 

633 Raises 

634 ------ 

635 FileNotFoundError 

636 A URI has been requested for a dataset that does not exist and 

637 guessing is not allowed. 

638 """ 

639 raise NotImplementedError("Must be implemented by subclass") 

640 

641 @abstractmethod 

642 def retrieveArtifacts(self, refs: Iterable[DatasetRef], 

643 destination: ButlerURI, transfer: str = "auto", 

644 preserve_path: bool = True, 

645 overwrite: bool = False) -> List[ButlerURI]: 

646 """Retrieve the artifacts associated with the supplied refs. 

647 

648 Parameters 

649 ---------- 

650 refs : iterable of `DatasetRef` 

651 The datasets for which artifacts are to be retrieved. 

652 A single ref can result in multiple artifacts. The refs must 

653 be resolved. 

654 destination : `ButlerURI` 

655 Location to write the artifacts. 

656 transfer : `str`, optional 

657 Method to use to transfer the artifacts. Must be one of the options 

658 supported by `ButlerURI.transfer_from()`. "move" is not allowed. 

659 preserve_path : `bool`, optional 

660 If `True` the full path of the artifact within the datastore 

661 is preserved. If `False` the final file component of the path 

662 is used. 

663 overwrite : `bool`, optional 

664 If `True` allow transfers to overwrite existing files at the 

665 destination. 

666 

667 Returns 

668 ------- 

669 targets : `list` of `ButlerURI` 

670 URIs of file artifacts in destination location. Order is not 

671 preserved. 

672 

673 Notes 

674 ----- 

675 For non-file datastores the artifacts written to the destination 

676 may not match the representation inside the datastore. For example 

677 a hierarchichal data structure in a NoSQL database may well be stored 

678 as a JSON file. 

679 """ 

680 raise NotImplementedError() 

681 

682 @abstractmethod 

683 def remove(self, datasetRef: DatasetRef) -> None: 

684 """Indicate to the Datastore that a Dataset can be removed. 

685 

686 Parameters 

687 ---------- 

688 datasetRef : `DatasetRef` 

689 Reference to the required Dataset. 

690 

691 Raises 

692 ------ 

693 FileNotFoundError 

694 When Dataset does not exist. 

695 

696 Notes 

697 ----- 

698 Some Datastores may implement this method as a silent no-op to 

699 disable Dataset deletion through standard interfaces. 

700 """ 

701 raise NotImplementedError("Must be implemented by subclass") 

702 

703 @abstractmethod 

704 def forget(self, refs: Iterable[DatasetRef]) -> None: 

705 """Indicate to the Datastore that it should remove all records of the 

706 given datasets, without actually deleting them. 

707 

708 Parameters 

709 ---------- 

710 refs : `Iterable` [ `DatasetRef` ] 

711 References to the datasets being forgotten. 

712 

713 Notes 

714 ----- 

715 Asking a datastore to forget a `DatasetRef` it does not hold should be 

716 a silent no-op, not an error. 

717 """ 

718 raise NotImplementedError("Must be implemented by subclass") 

719 

720 @abstractmethod 

721 def trash(self, ref: Union[DatasetRef, Iterable[DatasetRef]], ignore_errors: bool = True) -> None: 

722 """Indicate to the Datastore that a Dataset can be moved to the trash. 

723 

724 Parameters 

725 ---------- 

726 ref : `DatasetRef` or iterable thereof 

727 Reference(s) to the required Dataset. 

728 ignore_errors : `bool`, optional 

729 Determine whether errors should be ignored. When multiple 

730 refs are being trashed there will be no per-ref check. 

731 

732 Raises 

733 ------ 

734 FileNotFoundError 

735 When Dataset does not exist and errors are not ignored. Only 

736 checked if a single ref is supplied (and not in a list). 

737 

738 Notes 

739 ----- 

740 Some Datastores may implement this method as a silent no-op to 

741 disable Dataset deletion through standard interfaces. 

742 """ 

743 raise NotImplementedError("Must be implemented by subclass") 

744 

745 @abstractmethod 

746 def emptyTrash(self, ignore_errors: bool = True) -> None: 

747 """Remove all datasets from the trash. 

748 

749 Parameters 

750 ---------- 

751 ignore_errors : `bool`, optional 

752 Determine whether errors should be ignored. 

753 

754 Notes 

755 ----- 

756 Some Datastores may implement this method as a silent no-op to 

757 disable Dataset deletion through standard interfaces. 

758 """ 

759 raise NotImplementedError("Must be implemented by subclass") 

760 

761 @abstractmethod 

762 def transfer(self, inputDatastore: Datastore, datasetRef: DatasetRef) -> None: 

763 """Transfer a dataset from another datastore to this datastore. 

764 

765 Parameters 

766 ---------- 

767 inputDatastore : `Datastore` 

768 The external `Datastore` from which to retrieve the Dataset. 

769 datasetRef : `DatasetRef` 

770 Reference to the required Dataset. 

771 """ 

772 raise NotImplementedError("Must be implemented by subclass") 

773 

774 def export(self, refs: Iterable[DatasetRef], *, 

775 directory: Optional[str] = None, transfer: Optional[str] = None) -> Iterable[FileDataset]: 

776 """Export datasets for transfer to another data repository. 

777 

778 Parameters 

779 ---------- 

780 refs : iterable of `DatasetRef` 

781 Dataset references to be exported. 

782 directory : `str`, optional 

783 Path to a directory that should contain files corresponding to 

784 output datasets. Ignored if ``transfer`` is `None`. 

785 transfer : `str`, optional 

786 Mode that should be used to move datasets out of the repository. 

787 Valid options are the same as those of the ``transfer`` argument 

788 to ``ingest``, and datastores may similarly signal that a transfer 

789 mode is not supported by raising `NotImplementedError`. 

790 

791 Returns 

792 ------- 

793 dataset : iterable of `DatasetTransfer` 

794 Structs containing information about the exported datasets, in the 

795 same order as ``refs``. 

796 

797 Raises 

798 ------ 

799 NotImplementedError 

800 Raised if the given transfer mode is not supported. 

801 """ 

802 raise NotImplementedError(f"Transfer mode {transfer} not supported.") 

803 

804 @abstractmethod 

805 def validateConfiguration(self, entities: Iterable[Union[DatasetRef, DatasetType, StorageClass]], 

806 logFailures: bool = False) -> None: 

807 """Validate some of the configuration for this datastore. 

808 

809 Parameters 

810 ---------- 

811 entities : iterable of `DatasetRef`, `DatasetType`, or `StorageClass` 

812 Entities to test against this configuration. Can be differing 

813 types. 

814 logFailures : `bool`, optional 

815 If `True`, output a log message for every validation error 

816 detected. 

817 

818 Raises 

819 ------ 

820 DatastoreValidationError 

821 Raised if there is a validation problem with a configuration. 

822 

823 Notes 

824 ----- 

825 Which parts of the configuration are validated is at the discretion 

826 of each Datastore implementation. 

827 """ 

828 raise NotImplementedError("Must be implemented by subclass") 

829 

830 @abstractmethod 

831 def validateKey(self, 

832 lookupKey: LookupKey, entity: Union[DatasetRef, DatasetType, StorageClass]) -> None: 

833 """Validate a specific look up key with supplied entity. 

834 

835 Parameters 

836 ---------- 

837 lookupKey : `LookupKey` 

838 Key to use to retrieve information from the datastore 

839 configuration. 

840 entity : `DatasetRef`, `DatasetType`, or `StorageClass` 

841 Entity to compare with configuration retrieved using the 

842 specified lookup key. 

843 

844 Raises 

845 ------ 

846 DatastoreValidationError 

847 Raised if there is a problem with the combination of entity 

848 and lookup key. 

849 

850 Notes 

851 ----- 

852 Bypasses the normal selection priorities by allowing a key that 

853 would normally not be selected to be validated. 

854 """ 

855 raise NotImplementedError("Must be implemented by subclass") 

856 

857 @abstractmethod 

858 def getLookupKeys(self) -> Set[LookupKey]: 

859 """Return all the lookup keys relevant to this datastore. 

860 

861 Returns 

862 ------- 

863 keys : `set` of `LookupKey` 

864 The keys stored internally for looking up information based 

865 on `DatasetType` name or `StorageClass`. 

866 """ 

867 raise NotImplementedError("Must be implemented by subclass") 

868 

869 def needs_expanded_data_ids( 

870 self, 

871 transfer: Optional[str], 

872 entity: Optional[Union[DatasetRef, DatasetType, StorageClass]] = None, 

873 ) -> bool: 

874 """Test whether this datastore needs expanded data IDs to ingest. 

875 

876 Parameters 

877 ---------- 

878 transfer : `str` or `None` 

879 Transfer mode for ingest. 

880 entity, optional 

881 Object representing what will be ingested. If not provided (or not 

882 specific enough), `True` may be returned even if expanded data 

883 IDs aren't necessary. 

884 

885 Returns 

886 ------- 

887 needed : `bool` 

888 If `True`, expanded data IDs may be needed. `False` only if 

889 expansion definitely isn't necessary. 

890 """ 

891 return True