Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Support for generic data stores.""" 

23 

24from __future__ import annotations 

25 

26__all__ = ("DatastoreConfig", "Datastore", "DatastoreValidationError") 

27 

28import contextlib 

29import logging 

30from collections import defaultdict 

31from typing import ( 

32 TYPE_CHECKING, 

33 Any, 

34 Callable, 

35 ClassVar, 

36 Dict, 

37 Iterable, 

38 Iterator, 

39 List, 

40 Mapping, 

41 Optional, 

42 Set, 

43 Tuple, 

44 Type, 

45 Union, 

46) 

47 

48from dataclasses import dataclass 

49from abc import ABCMeta, abstractmethod 

50 

51from lsst.utils import doImportType 

52from .config import ConfigSubset, Config 

53from .exceptions import ValidationError, DatasetTypeNotSupportedError 

54from .constraints import Constraints 

55from .storageClass import StorageClassFactory 

56from .fileDataset import FileDataset 

57 

58if TYPE_CHECKING: 58 ↛ 59line 58 didn't jump to line 59, because the condition on line 58 was never true

59 from ..registry.interfaces import DatastoreRegistryBridgeManager 

60 from .datasets import DatasetRef, DatasetType 

61 from .configSupport import LookupKey 

62 from .storageClass import StorageClass 

63 from ._butlerUri import ButlerURI 

64 

65 

66class DatastoreConfig(ConfigSubset): 

67 """Configuration for Datastores.""" 

68 

69 component = "datastore" 

70 requiredKeys = ("cls",) 

71 defaultConfigFile = "datastore.yaml" 

72 

73 

74class DatastoreValidationError(ValidationError): 

75 """There is a problem with the Datastore configuration.""" 

76 

77 pass 

78 

79 

80@dataclass(frozen=True) 

81class Event: 

82 __slots__ = {"name", "undoFunc", "args", "kwargs"} 

83 name: str 

84 undoFunc: Callable 

85 args: tuple 

86 kwargs: dict 

87 

88 

89class IngestPrepData: 

90 """A helper base class for `Datastore` ingest implementations. 

91 

92 Datastore implementations will generally need a custom implementation of 

93 this class. 

94 

95 Should be accessed as ``Datastore.IngestPrepData`` instead of via direct 

96 import. 

97 

98 Parameters 

99 ---------- 

100 refs : iterable of `DatasetRef` 

101 References for the datasets that can be ingested by this datastore. 

102 """ 

103 

104 def __init__(self, refs: Iterable[DatasetRef]): 

105 self.refs = {ref.id: ref for ref in refs} 

106 

107 

108class DatastoreTransaction: 

109 """Keeps a log of `Datastore` activity and allow rollback. 

110 

111 Parameters 

112 ---------- 

113 parent : `DatastoreTransaction`, optional 

114 The parent transaction (if any) 

115 """ 

116 

117 Event: ClassVar[Type] = Event 

118 

119 parent: Optional['DatastoreTransaction'] 

120 """The parent transaction. (`DatastoreTransaction`, optional)""" 

121 

122 def __init__(self, parent: Optional[DatastoreTransaction] = None): 

123 self.parent = parent 

124 self._log: List[Event] = [] 

125 

126 def registerUndo(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> None: 

127 """Register event with undo function. 

128 

129 Parameters 

130 ---------- 

131 name : `str` 

132 Name of the event. 

133 undoFunc : func 

134 Function to undo this event. 

135 args : `tuple` 

136 Positional arguments to `undoFunc`. 

137 **kwargs 

138 Keyword arguments to `undoFunc`. 

139 """ 

140 self._log.append(self.Event(name, undoFunc, args, kwargs)) 

141 

142 @contextlib.contextmanager 

143 def undoWith(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> Iterator[None]: 

144 """Register undo function if nested operation succeeds. 

145 

146 Calls `registerUndo`. 

147 

148 This can be used to wrap individual undo-able statements within a 

149 DatastoreTransaction block. Multiple statements that can fail 

150 separately should not be part of the same `undoWith` block. 

151 

152 All arguments are forwarded directly to `registerUndo`. 

153 """ 

154 try: 

155 yield None 

156 except BaseException: 

157 raise 

158 else: 

159 self.registerUndo(name, undoFunc, *args, **kwargs) 

160 

161 def rollback(self) -> None: 

162 """Roll back all events in this transaction.""" 

163 log = logging.getLogger(__name__) 

164 while self._log: 

165 ev = self._log.pop() 

166 try: 

167 log.debug("Rolling back transaction: %s: %s(%s,%s)", ev.name, 

168 ev.undoFunc, 

169 ",".join(str(a) for a in ev.args), 

170 ",".join(f"{k}={v}" for k, v in ev.kwargs.items())) 

171 except Exception: 

172 # In case we had a problem in stringification of arguments 

173 log.warning("Rolling back transaction: %s", ev.name) 

174 try: 

175 ev.undoFunc(*ev.args, **ev.kwargs) 

176 except BaseException as e: 

177 # Deliberately swallow error that may occur in unrolling 

178 log.warning("Exception: %s caught while unrolling: %s", e, ev.name) 

179 pass 

180 

181 def commit(self) -> None: 

182 """Commit this transaction.""" 

183 if self.parent is None: 

184 # Just forget about the events, they have already happened. 

185 return 

186 else: 

187 # We may still want to events from this transaction as part of 

188 # the parent. 

189 self.parent._log.extend(self._log) 

190 

191 

192class Datastore(metaclass=ABCMeta): 

193 """Datastore interface. 

194 

195 Parameters 

196 ---------- 

197 config : `DatastoreConfig` or `str` 

198 Load configuration either from an existing config instance or by 

199 referring to a configuration file. 

200 bridgeManager : `DatastoreRegistryBridgeManager` 

201 Object that manages the interface between `Registry` and datastores. 

202 butlerRoot : `str`, optional 

203 New datastore root to use to override the configuration value. 

204 """ 

205 

206 defaultConfigFile: ClassVar[Optional[str]] = None 

207 """Path to configuration defaults. Accessed within the ``config`` resource 

208 or relative to a search path. Can be None if no defaults specified. 

209 """ 

210 

211 containerKey: ClassVar[Optional[str]] = None 

212 """Name of the key containing a list of subconfigurations that also 

213 need to be merged with defaults and will likely use different Python 

214 datastore classes (but all using DatastoreConfig). Assumed to be a 

215 list of configurations that can be represented in a DatastoreConfig 

216 and containing a "cls" definition. None indicates that no containers 

217 are expected in this Datastore.""" 

218 

219 isEphemeral: bool = False 

220 """Indicate whether this Datastore is ephemeral or not. An ephemeral 

221 datastore is one where the contents of the datastore will not exist 

222 across process restarts. This value can change per-instance.""" 

223 

224 config: DatastoreConfig 

225 """Configuration used to create Datastore.""" 

226 

227 name: str 

228 """Label associated with this Datastore.""" 

229 

230 storageClassFactory: StorageClassFactory 

231 """Factory for creating storage class instances from name.""" 

232 

233 constraints: Constraints 

234 """Constraints to apply when putting datasets into the datastore.""" 

235 

236 # MyPy does not like for this to be annotated as any kind of type, because 

237 # it can't do static checking on type variables that can change at runtime. 

238 IngestPrepData: ClassVar[Any] = IngestPrepData 

239 """Helper base class for ingest implementations. 

240 """ 

241 

242 @classmethod 

243 @abstractmethod 

244 def setConfigRoot(cls, root: str, config: Config, full: Config, overwrite: bool = True) -> None: 

245 """Set filesystem-dependent config options for this datastore. 

246 

247 The options will be appropriate for a new empty repository with the 

248 given root. 

249 

250 Parameters 

251 ---------- 

252 root : `str` 

253 Filesystem path to the root of the data repository. 

254 config : `Config` 

255 A `Config` to update. Only the subset understood by 

256 this component will be updated. Will not expand 

257 defaults. 

258 full : `Config` 

259 A complete config with all defaults expanded that can be 

260 converted to a `DatastoreConfig`. Read-only and will not be 

261 modified by this method. 

262 Repository-specific options that should not be obtained 

263 from defaults when Butler instances are constructed 

264 should be copied from ``full`` to ``config``. 

265 overwrite : `bool`, optional 

266 If `False`, do not modify a value in ``config`` if the value 

267 already exists. Default is always to overwrite with the provided 

268 ``root``. 

269 

270 Notes 

271 ----- 

272 If a keyword is explicitly defined in the supplied ``config`` it 

273 will not be overridden by this method if ``overwrite`` is `False`. 

274 This allows explicit values set in external configs to be retained. 

275 """ 

276 raise NotImplementedError() 

277 

278 @staticmethod 

279 def fromConfig(config: Config, bridgeManager: DatastoreRegistryBridgeManager, 

280 butlerRoot: Optional[Union[str, ButlerURI]] = None) -> 'Datastore': 

281 """Create datastore from type specified in config file. 

282 

283 Parameters 

284 ---------- 

285 config : `Config` 

286 Configuration instance. 

287 bridgeManager : `DatastoreRegistryBridgeManager` 

288 Object that manages the interface between `Registry` and 

289 datastores. 

290 butlerRoot : `str`, optional 

291 Butler root directory. 

292 """ 

293 cls = doImportType(config["datastore", "cls"]) 

294 if not issubclass(cls, Datastore): 

295 raise TypeError(f"Imported child class {config['datastore', 'cls']} is not a Datastore") 

296 return cls(config=config, bridgeManager=bridgeManager, butlerRoot=butlerRoot) 

297 

298 def __init__(self, config: Union[Config, str], 

299 bridgeManager: DatastoreRegistryBridgeManager, 

300 butlerRoot: Optional[Union[str, ButlerURI]] = None): 

301 self.config = DatastoreConfig(config) 

302 self.name = "ABCDataStore" 

303 self._transaction: Optional[DatastoreTransaction] = None 

304 

305 # All Datastores need storage classes and constraints 

306 self.storageClassFactory = StorageClassFactory() 

307 

308 # And read the constraints list 

309 constraintsConfig = self.config.get("constraints") 

310 self.constraints = Constraints(constraintsConfig, universe=bridgeManager.universe) 

311 

312 def __str__(self) -> str: 

313 return self.name 

314 

315 def __repr__(self) -> str: 

316 return self.name 

317 

318 @property 

319 def names(self) -> Tuple[str, ...]: 

320 """Names associated with this datastore returned as a list. 

321 

322 Can be different to ``name`` for a chaining datastore. 

323 """ 

324 # Default implementation returns solely the name itself 

325 return (self.name, ) 

326 

327 @contextlib.contextmanager 

328 def transaction(self) -> Iterator[DatastoreTransaction]: 

329 """Context manager supporting `Datastore` transactions. 

330 

331 Transactions can be nested, and are to be used in combination with 

332 `Registry.transaction`. 

333 """ 

334 self._transaction = DatastoreTransaction(self._transaction) 

335 try: 

336 yield self._transaction 

337 except BaseException: 

338 self._transaction.rollback() 

339 raise 

340 else: 

341 self._transaction.commit() 

342 self._transaction = self._transaction.parent 

343 

344 @abstractmethod 

345 def knows(self, ref: DatasetRef) -> bool: 

346 """Check if the dataset is known to the datastore. 

347 

348 Does not check for existence of any artifact. 

349 

350 Parameters 

351 ---------- 

352 ref : `DatasetRef` 

353 Reference to the required dataset. 

354 

355 Returns 

356 ------- 

357 exists : `bool` 

358 `True` if the dataset is known to the datastore. 

359 """ 

360 raise NotImplementedError() 

361 

362 def mexists(self, refs: Iterable[DatasetRef], 

363 artifact_existence: Optional[Dict[ButlerURI, bool]] = None) -> Dict[DatasetRef, bool]: 

364 """Check the existence of multiple datasets at once. 

365 

366 Parameters 

367 ---------- 

368 refs : iterable of `DatasetRef` 

369 The datasets to be checked. 

370 artifact_existence : `dict` of [`ButlerURI`, `bool`], optional 

371 Mapping of datastore artifact to existence. Updated by this 

372 method with details of all artifacts tested. Can be `None` 

373 if the caller is not interested. 

374 

375 Returns 

376 ------- 

377 existence : `dict` of [`DatasetRef`, `bool`] 

378 Mapping from dataset to boolean indicating existence. 

379 """ 

380 existence: Dict[DatasetRef, bool] = {} 

381 # Non-optimized default. 

382 for ref in refs: 

383 existence[ref] = self.exists(ref) 

384 return existence 

385 

386 @abstractmethod 

387 def exists(self, datasetRef: DatasetRef) -> bool: 

388 """Check if the dataset exists in the datastore. 

389 

390 Parameters 

391 ---------- 

392 datasetRef : `DatasetRef` 

393 Reference to the required dataset. 

394 

395 Returns 

396 ------- 

397 exists : `bool` 

398 `True` if the entity exists in the `Datastore`. 

399 """ 

400 raise NotImplementedError("Must be implemented by subclass") 

401 

402 @abstractmethod 

403 def get(self, datasetRef: DatasetRef, parameters: Mapping[str, Any] = None) -> Any: 

404 """Load an `InMemoryDataset` from the store. 

405 

406 Parameters 

407 ---------- 

408 datasetRef : `DatasetRef` 

409 Reference to the required Dataset. 

410 parameters : `dict` 

411 `StorageClass`-specific parameters that specify a slice of the 

412 Dataset to be loaded. 

413 

414 Returns 

415 ------- 

416 inMemoryDataset : `object` 

417 Requested Dataset or slice thereof as an InMemoryDataset. 

418 """ 

419 raise NotImplementedError("Must be implemented by subclass") 

420 

421 @abstractmethod 

422 def put(self, inMemoryDataset: Any, datasetRef: DatasetRef) -> None: 

423 """Write a `InMemoryDataset` with a given `DatasetRef` to the store. 

424 

425 Parameters 

426 ---------- 

427 inMemoryDataset : `object` 

428 The Dataset to store. 

429 datasetRef : `DatasetRef` 

430 Reference to the associated Dataset. 

431 """ 

432 raise NotImplementedError("Must be implemented by subclass") 

433 

434 def _overrideTransferMode(self, *datasets: FileDataset, transfer: Optional[str] = None) -> Optional[str]: 

435 """Allow ingest transfer mode to be defaulted based on datasets. 

436 

437 Parameters 

438 ---------- 

439 datasets : `FileDataset` 

440 Each positional argument is a struct containing information about 

441 a file to be ingested, including its path (either absolute or 

442 relative to the datastore root, if applicable), a complete 

443 `DatasetRef` (with ``dataset_id not None``), and optionally a 

444 formatter class or its fully-qualified string name. If a formatter 

445 is not provided, this method should populate that attribute with 

446 the formatter the datastore would use for `put`. Subclasses are 

447 also permitted to modify the path attribute (typically to put it 

448 in what the datastore considers its standard form). 

449 transfer : `str`, optional 

450 How (and whether) the dataset should be added to the datastore. 

451 See `ingest` for details of transfer modes. 

452 

453 Returns 

454 ------- 

455 newTransfer : `str` 

456 Transfer mode to use. Will be identical to the supplied transfer 

457 mode unless "auto" is used. 

458 """ 

459 if transfer != "auto": 

460 return transfer 

461 raise RuntimeError(f"{transfer} is not allowed without specialization.") 

462 

463 def _prepIngest(self, *datasets: FileDataset, transfer: Optional[str] = None) -> IngestPrepData: 

464 """Process datasets to identify which ones can be ingested. 

465 

466 Parameters 

467 ---------- 

468 datasets : `FileDataset` 

469 Each positional argument is a struct containing information about 

470 a file to be ingested, including its path (either absolute or 

471 relative to the datastore root, if applicable), a complete 

472 `DatasetRef` (with ``dataset_id not None``), and optionally a 

473 formatter class or its fully-qualified string name. If a formatter 

474 is not provided, this method should populate that attribute with 

475 the formatter the datastore would use for `put`. Subclasses are 

476 also permitted to modify the path attribute (typically to put it 

477 in what the datastore considers its standard form). 

478 transfer : `str`, optional 

479 How (and whether) the dataset should be added to the datastore. 

480 See `ingest` for details of transfer modes. 

481 

482 Returns 

483 ------- 

484 data : `IngestPrepData` 

485 An instance of a subclass of `IngestPrepData`, used to pass 

486 arbitrary data from `_prepIngest` to `_finishIngest`. This should 

487 include only the datasets this datastore can actually ingest; 

488 others should be silently ignored (`Datastore.ingest` will inspect 

489 `IngestPrepData.refs` and raise `DatasetTypeNotSupportedError` if 

490 necessary). 

491 

492 Raises 

493 ------ 

494 NotImplementedError 

495 Raised if the datastore does not support the given transfer mode 

496 (including the case where ingest is not supported at all). 

497 FileNotFoundError 

498 Raised if one of the given files does not exist. 

499 FileExistsError 

500 Raised if transfer is not `None` but the (internal) location the 

501 file would be moved to is already occupied. 

502 

503 Notes 

504 ----- 

505 This method (along with `_finishIngest`) should be implemented by 

506 subclasses to provide ingest support instead of implementing `ingest` 

507 directly. 

508 

509 `_prepIngest` should not modify the data repository or given files in 

510 any way; all changes should be deferred to `_finishIngest`. 

511 

512 When possible, exceptions should be raised in `_prepIngest` instead of 

513 `_finishIngest`. `NotImplementedError` exceptions that indicate that 

514 the transfer mode is not supported must be raised by `_prepIngest` 

515 instead of `_finishIngest`. 

516 """ 

517 raise NotImplementedError( 

518 f"Datastore {self} does not support direct file-based ingest." 

519 ) 

520 

521 def _finishIngest(self, prepData: IngestPrepData, *, transfer: Optional[str] = None) -> None: 

522 """Complete an ingest operation. 

523 

524 Parameters 

525 ---------- 

526 data : `IngestPrepData` 

527 An instance of a subclass of `IngestPrepData`. Guaranteed to be 

528 the direct result of a call to `_prepIngest` on this datastore. 

529 transfer : `str`, optional 

530 How (and whether) the dataset should be added to the datastore. 

531 See `ingest` for details of transfer modes. 

532 

533 Raises 

534 ------ 

535 FileNotFoundError 

536 Raised if one of the given files does not exist. 

537 FileExistsError 

538 Raised if transfer is not `None` but the (internal) location the 

539 file would be moved to is already occupied. 

540 

541 Notes 

542 ----- 

543 This method (along with `_prepIngest`) should be implemented by 

544 subclasses to provide ingest support instead of implementing `ingest` 

545 directly. 

546 """ 

547 raise NotImplementedError( 

548 f"Datastore {self} does not support direct file-based ingest." 

549 ) 

550 

551 def ingest(self, *datasets: FileDataset, transfer: Optional[str] = None) -> None: 

552 """Ingest one or more files into the datastore. 

553 

554 Parameters 

555 ---------- 

556 datasets : `FileDataset` 

557 Each positional argument is a struct containing information about 

558 a file to be ingested, including its path (either absolute or 

559 relative to the datastore root, if applicable), a complete 

560 `DatasetRef` (with ``dataset_id not None``), and optionally a 

561 formatter class or its fully-qualified string name. If a formatter 

562 is not provided, the one the datastore would use for ``put`` on 

563 that dataset is assumed. 

564 transfer : `str`, optional 

565 How (and whether) the dataset should be added to the datastore. 

566 If `None` (default), the file must already be in a location 

567 appropriate for the datastore (e.g. within its root directory), 

568 and will not be modified. Other choices include "move", "copy", 

569 "link", "symlink", "relsymlink", and "hardlink". "link" is a 

570 special transfer mode that will first try to make a hardlink and 

571 if that fails a symlink will be used instead. "relsymlink" creates 

572 a relative symlink rather than use an absolute path. 

573 Most datastores do not support all transfer modes. 

574 "auto" is a special option that will let the 

575 data store choose the most natural option for itself. 

576 

577 Raises 

578 ------ 

579 NotImplementedError 

580 Raised if the datastore does not support the given transfer mode 

581 (including the case where ingest is not supported at all). 

582 DatasetTypeNotSupportedError 

583 Raised if one or more files to be ingested have a dataset type that 

584 is not supported by the datastore. 

585 FileNotFoundError 

586 Raised if one of the given files does not exist. 

587 FileExistsError 

588 Raised if transfer is not `None` but the (internal) location the 

589 file would be moved to is already occupied. 

590 

591 Notes 

592 ----- 

593 Subclasses should implement `_prepIngest` and `_finishIngest` instead 

594 of implementing `ingest` directly. Datastores that hold and 

595 delegate to child datastores may want to call those methods as well. 

596 

597 Subclasses are encouraged to document their supported transfer modes 

598 in their class documentation. 

599 """ 

600 # Allow a datastore to select a default transfer mode 

601 transfer = self._overrideTransferMode(*datasets, transfer=transfer) 

602 prepData = self._prepIngest(*datasets, transfer=transfer) 

603 refs = {ref.id: ref for dataset in datasets for ref in dataset.refs} 

604 if None in refs: 

605 # Find the file for the error message. There may be multiple 

606 # bad refs so look for all of them. 

607 unresolved_paths = {} 

608 for dataset in datasets: 

609 unresolved = [] 

610 for ref in dataset.refs: 

611 if ref.id is None: 

612 unresolved.append(ref) 

613 if unresolved: 

614 unresolved_paths[dataset.path] = unresolved 

615 raise RuntimeError("Attempt to ingest unresolved DatasetRef from: " 

616 + ",".join(f"{p}: ({[str(r) for r in ref]})" 

617 for p, ref in unresolved_paths.items())) 

618 if refs.keys() != prepData.refs.keys(): 

619 unsupported = refs.keys() - prepData.refs.keys() 

620 # Group unsupported refs by DatasetType for an informative 

621 # but still concise error message. 

622 byDatasetType = defaultdict(list) 

623 for datasetId in unsupported: 

624 ref = refs[datasetId] 

625 byDatasetType[ref.datasetType].append(ref) 

626 raise DatasetTypeNotSupportedError( 

627 "DatasetType(s) not supported in ingest: " 

628 + ", ".join(f"{k.name} ({len(v)} dataset(s))" for k, v in byDatasetType.items()) 

629 ) 

630 self._finishIngest(prepData, transfer=transfer) 

631 

632 def transfer_from(self, source_datastore: Datastore, refs: Iterable[DatasetRef], 

633 local_refs: Optional[Iterable[DatasetRef]] = None, 

634 transfer: str = "auto", 

635 artifact_existence: Optional[Dict[ButlerURI, bool]] = None) -> None: 

636 """Transfer dataset artifacts from another datastore to this one. 

637 

638 Parameters 

639 ---------- 

640 source_datastore : `Datastore` 

641 The datastore from which to transfer artifacts. That datastore 

642 must be compatible with this datastore receiving the artifacts. 

643 refs : iterable of `DatasetRef` 

644 The datasets to transfer from the source datastore. 

645 local_refs : iterable of `DatasetRef`, optional 

646 The dataset refs associated with the registry associated with 

647 this datastore. Can be `None` if the source and target datastore 

648 are using UUIDs. 

649 transfer : `str`, optional 

650 How (and whether) the dataset should be added to the datastore. 

651 Choices include "move", "copy", 

652 "link", "symlink", "relsymlink", and "hardlink". "link" is a 

653 special transfer mode that will first try to make a hardlink and 

654 if that fails a symlink will be used instead. "relsymlink" creates 

655 a relative symlink rather than use an absolute path. 

656 Most datastores do not support all transfer modes. 

657 "auto" (the default) is a special option that will let the 

658 data store choose the most natural option for itself. 

659 If the source location and transfer location are identical the 

660 transfer mode will be ignored. 

661 artifact_existence : `dict` of [`ButlerURI`, `bool`], optional 

662 Mapping of datastore artifact to existence. Updated by this 

663 method with details of all artifacts tested. Can be `None` 

664 if the caller is not interested. 

665 

666 Raises 

667 ------ 

668 TypeError 

669 Raised if the two datastores are not compatible. 

670 """ 

671 if type(self) is not type(source_datastore): 

672 raise TypeError(f"Datastore mismatch between this datastore ({type(self)}) and the " 

673 f"source datastore ({type(source_datastore)}).") 

674 

675 raise NotImplementedError(f"Datastore {type(self)} must implement a transfer_from method.") 

676 

677 @abstractmethod 

678 def getURIs(self, datasetRef: DatasetRef, 

679 predict: bool = False) -> Tuple[Optional[ButlerURI], Dict[str, ButlerURI]]: 

680 """Return URIs associated with dataset. 

681 

682 Parameters 

683 ---------- 

684 ref : `DatasetRef` 

685 Reference to the required dataset. 

686 predict : `bool`, optional 

687 If the datastore does not know about the dataset, should it 

688 return a predicted URI or not? 

689 

690 Returns 

691 ------- 

692 primary : `ButlerURI` 

693 The URI to the primary artifact associated with this dataset. 

694 If the dataset was disassembled within the datastore this 

695 may be `None`. 

696 components : `dict` 

697 URIs to any components associated with the dataset artifact. 

698 Can be empty if there are no components. 

699 """ 

700 raise NotImplementedError() 

701 

702 @abstractmethod 

703 def getURI(self, datasetRef: DatasetRef, predict: bool = False) -> ButlerURI: 

704 """URI to the Dataset. 

705 

706 Parameters 

707 ---------- 

708 datasetRef : `DatasetRef` 

709 Reference to the required Dataset. 

710 predict : `bool` 

711 If `True` attempt to predict the URI for a dataset if it does 

712 not exist in datastore. 

713 

714 Returns 

715 ------- 

716 uri : `str` 

717 URI string pointing to the Dataset within the datastore. If the 

718 Dataset does not exist in the datastore, the URI may be a guess. 

719 If the datastore does not have entities that relate well 

720 to the concept of a URI the returned URI string will be 

721 descriptive. The returned URI is not guaranteed to be obtainable. 

722 

723 Raises 

724 ------ 

725 FileNotFoundError 

726 A URI has been requested for a dataset that does not exist and 

727 guessing is not allowed. 

728 """ 

729 raise NotImplementedError("Must be implemented by subclass") 

730 

731 @abstractmethod 

732 def retrieveArtifacts(self, refs: Iterable[DatasetRef], 

733 destination: ButlerURI, transfer: str = "auto", 

734 preserve_path: bool = True, 

735 overwrite: bool = False) -> List[ButlerURI]: 

736 """Retrieve the artifacts associated with the supplied refs. 

737 

738 Parameters 

739 ---------- 

740 refs : iterable of `DatasetRef` 

741 The datasets for which artifacts are to be retrieved. 

742 A single ref can result in multiple artifacts. The refs must 

743 be resolved. 

744 destination : `ButlerURI` 

745 Location to write the artifacts. 

746 transfer : `str`, optional 

747 Method to use to transfer the artifacts. Must be one of the options 

748 supported by `ButlerURI.transfer_from()`. "move" is not allowed. 

749 preserve_path : `bool`, optional 

750 If `True` the full path of the artifact within the datastore 

751 is preserved. If `False` the final file component of the path 

752 is used. 

753 overwrite : `bool`, optional 

754 If `True` allow transfers to overwrite existing files at the 

755 destination. 

756 

757 Returns 

758 ------- 

759 targets : `list` of `ButlerURI` 

760 URIs of file artifacts in destination location. Order is not 

761 preserved. 

762 

763 Notes 

764 ----- 

765 For non-file datastores the artifacts written to the destination 

766 may not match the representation inside the datastore. For example 

767 a hierarchichal data structure in a NoSQL database may well be stored 

768 as a JSON file. 

769 """ 

770 raise NotImplementedError() 

771 

772 @abstractmethod 

773 def remove(self, datasetRef: DatasetRef) -> None: 

774 """Indicate to the Datastore that a Dataset can be removed. 

775 

776 Parameters 

777 ---------- 

778 datasetRef : `DatasetRef` 

779 Reference to the required Dataset. 

780 

781 Raises 

782 ------ 

783 FileNotFoundError 

784 When Dataset does not exist. 

785 

786 Notes 

787 ----- 

788 Some Datastores may implement this method as a silent no-op to 

789 disable Dataset deletion through standard interfaces. 

790 """ 

791 raise NotImplementedError("Must be implemented by subclass") 

792 

793 @abstractmethod 

794 def forget(self, refs: Iterable[DatasetRef]) -> None: 

795 """Indicate to the Datastore that it should remove all records of the 

796 given datasets, without actually deleting them. 

797 

798 Parameters 

799 ---------- 

800 refs : `Iterable` [ `DatasetRef` ] 

801 References to the datasets being forgotten. 

802 

803 Notes 

804 ----- 

805 Asking a datastore to forget a `DatasetRef` it does not hold should be 

806 a silent no-op, not an error. 

807 """ 

808 raise NotImplementedError("Must be implemented by subclass") 

809 

810 @abstractmethod 

811 def trash(self, ref: Union[DatasetRef, Iterable[DatasetRef]], ignore_errors: bool = True) -> None: 

812 """Indicate to the Datastore that a Dataset can be moved to the trash. 

813 

814 Parameters 

815 ---------- 

816 ref : `DatasetRef` or iterable thereof 

817 Reference(s) to the required Dataset. 

818 ignore_errors : `bool`, optional 

819 Determine whether errors should be ignored. When multiple 

820 refs are being trashed there will be no per-ref check. 

821 

822 Raises 

823 ------ 

824 FileNotFoundError 

825 When Dataset does not exist and errors are not ignored. Only 

826 checked if a single ref is supplied (and not in a list). 

827 

828 Notes 

829 ----- 

830 Some Datastores may implement this method as a silent no-op to 

831 disable Dataset deletion through standard interfaces. 

832 """ 

833 raise NotImplementedError("Must be implemented by subclass") 

834 

835 @abstractmethod 

836 def emptyTrash(self, ignore_errors: bool = True) -> None: 

837 """Remove all datasets from the trash. 

838 

839 Parameters 

840 ---------- 

841 ignore_errors : `bool`, optional 

842 Determine whether errors should be ignored. 

843 

844 Notes 

845 ----- 

846 Some Datastores may implement this method as a silent no-op to 

847 disable Dataset deletion through standard interfaces. 

848 """ 

849 raise NotImplementedError("Must be implemented by subclass") 

850 

851 @abstractmethod 

852 def transfer(self, inputDatastore: Datastore, datasetRef: DatasetRef) -> None: 

853 """Transfer a dataset from another datastore to this datastore. 

854 

855 Parameters 

856 ---------- 

857 inputDatastore : `Datastore` 

858 The external `Datastore` from which to retrieve the Dataset. 

859 datasetRef : `DatasetRef` 

860 Reference to the required Dataset. 

861 """ 

862 raise NotImplementedError("Must be implemented by subclass") 

863 

864 def export(self, refs: Iterable[DatasetRef], *, 

865 directory: Optional[str] = None, transfer: Optional[str] = None) -> Iterable[FileDataset]: 

866 """Export datasets for transfer to another data repository. 

867 

868 Parameters 

869 ---------- 

870 refs : iterable of `DatasetRef` 

871 Dataset references to be exported. 

872 directory : `str`, optional 

873 Path to a directory that should contain files corresponding to 

874 output datasets. Ignored if ``transfer`` is `None`. 

875 transfer : `str`, optional 

876 Mode that should be used to move datasets out of the repository. 

877 Valid options are the same as those of the ``transfer`` argument 

878 to ``ingest``, and datastores may similarly signal that a transfer 

879 mode is not supported by raising `NotImplementedError`. 

880 

881 Returns 

882 ------- 

883 dataset : iterable of `DatasetTransfer` 

884 Structs containing information about the exported datasets, in the 

885 same order as ``refs``. 

886 

887 Raises 

888 ------ 

889 NotImplementedError 

890 Raised if the given transfer mode is not supported. 

891 """ 

892 raise NotImplementedError(f"Transfer mode {transfer} not supported.") 

893 

894 @abstractmethod 

895 def validateConfiguration(self, entities: Iterable[Union[DatasetRef, DatasetType, StorageClass]], 

896 logFailures: bool = False) -> None: 

897 """Validate some of the configuration for this datastore. 

898 

899 Parameters 

900 ---------- 

901 entities : iterable of `DatasetRef`, `DatasetType`, or `StorageClass` 

902 Entities to test against this configuration. Can be differing 

903 types. 

904 logFailures : `bool`, optional 

905 If `True`, output a log message for every validation error 

906 detected. 

907 

908 Raises 

909 ------ 

910 DatastoreValidationError 

911 Raised if there is a validation problem with a configuration. 

912 

913 Notes 

914 ----- 

915 Which parts of the configuration are validated is at the discretion 

916 of each Datastore implementation. 

917 """ 

918 raise NotImplementedError("Must be implemented by subclass") 

919 

920 @abstractmethod 

921 def validateKey(self, 

922 lookupKey: LookupKey, entity: Union[DatasetRef, DatasetType, StorageClass]) -> None: 

923 """Validate a specific look up key with supplied entity. 

924 

925 Parameters 

926 ---------- 

927 lookupKey : `LookupKey` 

928 Key to use to retrieve information from the datastore 

929 configuration. 

930 entity : `DatasetRef`, `DatasetType`, or `StorageClass` 

931 Entity to compare with configuration retrieved using the 

932 specified lookup key. 

933 

934 Raises 

935 ------ 

936 DatastoreValidationError 

937 Raised if there is a problem with the combination of entity 

938 and lookup key. 

939 

940 Notes 

941 ----- 

942 Bypasses the normal selection priorities by allowing a key that 

943 would normally not be selected to be validated. 

944 """ 

945 raise NotImplementedError("Must be implemented by subclass") 

946 

947 @abstractmethod 

948 def getLookupKeys(self) -> Set[LookupKey]: 

949 """Return all the lookup keys relevant to this datastore. 

950 

951 Returns 

952 ------- 

953 keys : `set` of `LookupKey` 

954 The keys stored internally for looking up information based 

955 on `DatasetType` name or `StorageClass`. 

956 """ 

957 raise NotImplementedError("Must be implemented by subclass") 

958 

959 def needs_expanded_data_ids( 

960 self, 

961 transfer: Optional[str], 

962 entity: Optional[Union[DatasetRef, DatasetType, StorageClass]] = None, 

963 ) -> bool: 

964 """Test whether this datastore needs expanded data IDs to ingest. 

965 

966 Parameters 

967 ---------- 

968 transfer : `str` or `None` 

969 Transfer mode for ingest. 

970 entity, optional 

971 Object representing what will be ingested. If not provided (or not 

972 specific enough), `True` may be returned even if expanded data 

973 IDs aren't necessary. 

974 

975 Returns 

976 ------- 

977 needed : `bool` 

978 If `True`, expanded data IDs may be needed. `False` only if 

979 expansion definitely isn't necessary. 

980 """ 

981 return True