Coverage for python/lsst/daf/butler/core/datastore.py: 45%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

200 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Support for generic data stores.""" 

23 

24from __future__ import annotations 

25 

26__all__ = ("DatastoreConfig", "Datastore", "DatastoreValidationError") 

27 

28import contextlib 

29import logging 

30from abc import ABCMeta, abstractmethod 

31from collections import defaultdict 

32from dataclasses import dataclass 

33from typing import ( 

34 TYPE_CHECKING, 

35 Any, 

36 Callable, 

37 ClassVar, 

38 Dict, 

39 Iterable, 

40 Iterator, 

41 List, 

42 Mapping, 

43 Optional, 

44 Set, 

45 Tuple, 

46 Type, 

47 Union, 

48) 

49 

50from lsst.utils import doImportType 

51 

52from .config import Config, ConfigSubset 

53from .constraints import Constraints 

54from .exceptions import DatasetTypeNotSupportedError, ValidationError 

55from .fileDataset import FileDataset 

56from .storageClass import StorageClassFactory 

57 

58if TYPE_CHECKING: 58 ↛ 59line 58 didn't jump to line 59, because the condition on line 58 was never true

59 from ..registry.interfaces import DatastoreRegistryBridgeManager 

60 from ._butlerUri import ButlerURI 

61 from .configSupport import LookupKey 

62 from .datasets import DatasetRef, DatasetType 

63 from .storageClass import StorageClass 

64 

65 

66class DatastoreConfig(ConfigSubset): 

67 """Configuration for Datastores.""" 

68 

69 component = "datastore" 

70 requiredKeys = ("cls",) 

71 defaultConfigFile = "datastore.yaml" 

72 

73 

74class DatastoreValidationError(ValidationError): 

75 """There is a problem with the Datastore configuration.""" 

76 

77 pass 

78 

79 

80@dataclass(frozen=True) 

81class Event: 

82 __slots__ = {"name", "undoFunc", "args", "kwargs"} 

83 name: str 

84 undoFunc: Callable 

85 args: tuple 

86 kwargs: dict 

87 

88 

89class IngestPrepData: 

90 """A helper base class for `Datastore` ingest implementations. 

91 

92 Datastore implementations will generally need a custom implementation of 

93 this class. 

94 

95 Should be accessed as ``Datastore.IngestPrepData`` instead of via direct 

96 import. 

97 

98 Parameters 

99 ---------- 

100 refs : iterable of `DatasetRef` 

101 References for the datasets that can be ingested by this datastore. 

102 """ 

103 

104 def __init__(self, refs: Iterable[DatasetRef]): 

105 self.refs = {ref.id: ref for ref in refs} 

106 

107 

108class DatastoreTransaction: 

109 """Keeps a log of `Datastore` activity and allow rollback. 

110 

111 Parameters 

112 ---------- 

113 parent : `DatastoreTransaction`, optional 

114 The parent transaction (if any) 

115 """ 

116 

117 Event: ClassVar[Type] = Event 

118 

119 parent: Optional["DatastoreTransaction"] 

120 """The parent transaction. (`DatastoreTransaction`, optional)""" 

121 

122 def __init__(self, parent: Optional[DatastoreTransaction] = None): 

123 self.parent = parent 

124 self._log: List[Event] = [] 

125 

126 def registerUndo(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> None: 

127 """Register event with undo function. 

128 

129 Parameters 

130 ---------- 

131 name : `str` 

132 Name of the event. 

133 undoFunc : func 

134 Function to undo this event. 

135 args : `tuple` 

136 Positional arguments to `undoFunc`. 

137 **kwargs 

138 Keyword arguments to `undoFunc`. 

139 """ 

140 self._log.append(self.Event(name, undoFunc, args, kwargs)) 

141 

142 @contextlib.contextmanager 

143 def undoWith(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> Iterator[None]: 

144 """Register undo function if nested operation succeeds. 

145 

146 Calls `registerUndo`. 

147 

148 This can be used to wrap individual undo-able statements within a 

149 DatastoreTransaction block. Multiple statements that can fail 

150 separately should not be part of the same `undoWith` block. 

151 

152 All arguments are forwarded directly to `registerUndo`. 

153 """ 

154 try: 

155 yield None 

156 except BaseException: 

157 raise 

158 else: 

159 self.registerUndo(name, undoFunc, *args, **kwargs) 

160 

161 def rollback(self) -> None: 

162 """Roll back all events in this transaction.""" 

163 log = logging.getLogger(__name__) 

164 while self._log: 

165 ev = self._log.pop() 

166 try: 

167 log.debug( 

168 "Rolling back transaction: %s: %s(%s,%s)", 

169 ev.name, 

170 ev.undoFunc, 

171 ",".join(str(a) for a in ev.args), 

172 ",".join(f"{k}={v}" for k, v in ev.kwargs.items()), 

173 ) 

174 except Exception: 

175 # In case we had a problem in stringification of arguments 

176 log.warning("Rolling back transaction: %s", ev.name) 

177 try: 

178 ev.undoFunc(*ev.args, **ev.kwargs) 

179 except BaseException as e: 

180 # Deliberately swallow error that may occur in unrolling 

181 log.warning("Exception: %s caught while unrolling: %s", e, ev.name) 

182 pass 

183 

184 def commit(self) -> None: 

185 """Commit this transaction.""" 

186 if self.parent is None: 

187 # Just forget about the events, they have already happened. 

188 return 

189 else: 

190 # We may still want to events from this transaction as part of 

191 # the parent. 

192 self.parent._log.extend(self._log) 

193 

194 

195class Datastore(metaclass=ABCMeta): 

196 """Datastore interface. 

197 

198 Parameters 

199 ---------- 

200 config : `DatastoreConfig` or `str` 

201 Load configuration either from an existing config instance or by 

202 referring to a configuration file. 

203 bridgeManager : `DatastoreRegistryBridgeManager` 

204 Object that manages the interface between `Registry` and datastores. 

205 butlerRoot : `str`, optional 

206 New datastore root to use to override the configuration value. 

207 """ 

208 

209 defaultConfigFile: ClassVar[Optional[str]] = None 

210 """Path to configuration defaults. Accessed within the ``config`` resource 

211 or relative to a search path. Can be None if no defaults specified. 

212 """ 

213 

214 containerKey: ClassVar[Optional[str]] = None 

215 """Name of the key containing a list of subconfigurations that also 

216 need to be merged with defaults and will likely use different Python 

217 datastore classes (but all using DatastoreConfig). Assumed to be a 

218 list of configurations that can be represented in a DatastoreConfig 

219 and containing a "cls" definition. None indicates that no containers 

220 are expected in this Datastore.""" 

221 

222 isEphemeral: bool = False 

223 """Indicate whether this Datastore is ephemeral or not. An ephemeral 

224 datastore is one where the contents of the datastore will not exist 

225 across process restarts. This value can change per-instance.""" 

226 

227 config: DatastoreConfig 

228 """Configuration used to create Datastore.""" 

229 

230 name: str 

231 """Label associated with this Datastore.""" 

232 

233 storageClassFactory: StorageClassFactory 

234 """Factory for creating storage class instances from name.""" 

235 

236 constraints: Constraints 

237 """Constraints to apply when putting datasets into the datastore.""" 

238 

239 # MyPy does not like for this to be annotated as any kind of type, because 

240 # it can't do static checking on type variables that can change at runtime. 

241 IngestPrepData: ClassVar[Any] = IngestPrepData 

242 """Helper base class for ingest implementations. 

243 """ 

244 

245 @classmethod 

246 @abstractmethod 

247 def setConfigRoot(cls, root: str, config: Config, full: Config, overwrite: bool = True) -> None: 

248 """Set filesystem-dependent config options for this datastore. 

249 

250 The options will be appropriate for a new empty repository with the 

251 given root. 

252 

253 Parameters 

254 ---------- 

255 root : `str` 

256 Filesystem path to the root of the data repository. 

257 config : `Config` 

258 A `Config` to update. Only the subset understood by 

259 this component will be updated. Will not expand 

260 defaults. 

261 full : `Config` 

262 A complete config with all defaults expanded that can be 

263 converted to a `DatastoreConfig`. Read-only and will not be 

264 modified by this method. 

265 Repository-specific options that should not be obtained 

266 from defaults when Butler instances are constructed 

267 should be copied from ``full`` to ``config``. 

268 overwrite : `bool`, optional 

269 If `False`, do not modify a value in ``config`` if the value 

270 already exists. Default is always to overwrite with the provided 

271 ``root``. 

272 

273 Notes 

274 ----- 

275 If a keyword is explicitly defined in the supplied ``config`` it 

276 will not be overridden by this method if ``overwrite`` is `False`. 

277 This allows explicit values set in external configs to be retained. 

278 """ 

279 raise NotImplementedError() 

280 

281 @staticmethod 

282 def fromConfig( 

283 config: Config, 

284 bridgeManager: DatastoreRegistryBridgeManager, 

285 butlerRoot: Optional[Union[str, ButlerURI]] = None, 

286 ) -> "Datastore": 

287 """Create datastore from type specified in config file. 

288 

289 Parameters 

290 ---------- 

291 config : `Config` 

292 Configuration instance. 

293 bridgeManager : `DatastoreRegistryBridgeManager` 

294 Object that manages the interface between `Registry` and 

295 datastores. 

296 butlerRoot : `str`, optional 

297 Butler root directory. 

298 """ 

299 cls = doImportType(config["datastore", "cls"]) 

300 if not issubclass(cls, Datastore): 

301 raise TypeError(f"Imported child class {config['datastore', 'cls']} is not a Datastore") 

302 return cls(config=config, bridgeManager=bridgeManager, butlerRoot=butlerRoot) 

303 

304 def __init__( 

305 self, 

306 config: Union[Config, str], 

307 bridgeManager: DatastoreRegistryBridgeManager, 

308 butlerRoot: Optional[Union[str, ButlerURI]] = None, 

309 ): 

310 self.config = DatastoreConfig(config) 

311 self.name = "ABCDataStore" 

312 self._transaction: Optional[DatastoreTransaction] = None 

313 

314 # All Datastores need storage classes and constraints 

315 self.storageClassFactory = StorageClassFactory() 

316 

317 # And read the constraints list 

318 constraintsConfig = self.config.get("constraints") 

319 self.constraints = Constraints(constraintsConfig, universe=bridgeManager.universe) 

320 

321 def __str__(self) -> str: 

322 return self.name 

323 

324 def __repr__(self) -> str: 

325 return self.name 

326 

327 @property 

328 def names(self) -> Tuple[str, ...]: 

329 """Names associated with this datastore returned as a list. 

330 

331 Can be different to ``name`` for a chaining datastore. 

332 """ 

333 # Default implementation returns solely the name itself 

334 return (self.name,) 

335 

336 @contextlib.contextmanager 

337 def transaction(self) -> Iterator[DatastoreTransaction]: 

338 """Context manager supporting `Datastore` transactions. 

339 

340 Transactions can be nested, and are to be used in combination with 

341 `Registry.transaction`. 

342 """ 

343 self._transaction = DatastoreTransaction(self._transaction) 

344 try: 

345 yield self._transaction 

346 except BaseException: 

347 self._transaction.rollback() 

348 raise 

349 else: 

350 self._transaction.commit() 

351 self._transaction = self._transaction.parent 

352 

353 @abstractmethod 

354 def knows(self, ref: DatasetRef) -> bool: 

355 """Check if the dataset is known to the datastore. 

356 

357 Does not check for existence of any artifact. 

358 

359 Parameters 

360 ---------- 

361 ref : `DatasetRef` 

362 Reference to the required dataset. 

363 

364 Returns 

365 ------- 

366 exists : `bool` 

367 `True` if the dataset is known to the datastore. 

368 """ 

369 raise NotImplementedError() 

370 

371 def mexists( 

372 self, refs: Iterable[DatasetRef], artifact_existence: Optional[Dict[ButlerURI, bool]] = None 

373 ) -> Dict[DatasetRef, bool]: 

374 """Check the existence of multiple datasets at once. 

375 

376 Parameters 

377 ---------- 

378 refs : iterable of `DatasetRef` 

379 The datasets to be checked. 

380 artifact_existence : `dict` of [`ButlerURI`, `bool`], optional 

381 Mapping of datastore artifact to existence. Updated by this 

382 method with details of all artifacts tested. Can be `None` 

383 if the caller is not interested. 

384 

385 Returns 

386 ------- 

387 existence : `dict` of [`DatasetRef`, `bool`] 

388 Mapping from dataset to boolean indicating existence. 

389 """ 

390 existence: Dict[DatasetRef, bool] = {} 

391 # Non-optimized default. 

392 for ref in refs: 

393 existence[ref] = self.exists(ref) 

394 return existence 

395 

396 @abstractmethod 

397 def exists(self, datasetRef: DatasetRef) -> bool: 

398 """Check if the dataset exists in the datastore. 

399 

400 Parameters 

401 ---------- 

402 datasetRef : `DatasetRef` 

403 Reference to the required dataset. 

404 

405 Returns 

406 ------- 

407 exists : `bool` 

408 `True` if the entity exists in the `Datastore`. 

409 """ 

410 raise NotImplementedError("Must be implemented by subclass") 

411 

412 @abstractmethod 

413 def get(self, datasetRef: DatasetRef, parameters: Mapping[str, Any] = None) -> Any: 

414 """Load an `InMemoryDataset` from the store. 

415 

416 Parameters 

417 ---------- 

418 datasetRef : `DatasetRef` 

419 Reference to the required Dataset. 

420 parameters : `dict` 

421 `StorageClass`-specific parameters that specify a slice of the 

422 Dataset to be loaded. 

423 

424 Returns 

425 ------- 

426 inMemoryDataset : `object` 

427 Requested Dataset or slice thereof as an InMemoryDataset. 

428 """ 

429 raise NotImplementedError("Must be implemented by subclass") 

430 

431 @abstractmethod 

432 def put(self, inMemoryDataset: Any, datasetRef: DatasetRef) -> None: 

433 """Write a `InMemoryDataset` with a given `DatasetRef` to the store. 

434 

435 Parameters 

436 ---------- 

437 inMemoryDataset : `object` 

438 The Dataset to store. 

439 datasetRef : `DatasetRef` 

440 Reference to the associated Dataset. 

441 """ 

442 raise NotImplementedError("Must be implemented by subclass") 

443 

444 def _overrideTransferMode(self, *datasets: FileDataset, transfer: Optional[str] = None) -> Optional[str]: 

445 """Allow ingest transfer mode to be defaulted based on datasets. 

446 

447 Parameters 

448 ---------- 

449 datasets : `FileDataset` 

450 Each positional argument is a struct containing information about 

451 a file to be ingested, including its path (either absolute or 

452 relative to the datastore root, if applicable), a complete 

453 `DatasetRef` (with ``dataset_id not None``), and optionally a 

454 formatter class or its fully-qualified string name. If a formatter 

455 is not provided, this method should populate that attribute with 

456 the formatter the datastore would use for `put`. Subclasses are 

457 also permitted to modify the path attribute (typically to put it 

458 in what the datastore considers its standard form). 

459 transfer : `str`, optional 

460 How (and whether) the dataset should be added to the datastore. 

461 See `ingest` for details of transfer modes. 

462 

463 Returns 

464 ------- 

465 newTransfer : `str` 

466 Transfer mode to use. Will be identical to the supplied transfer 

467 mode unless "auto" is used. 

468 """ 

469 if transfer != "auto": 

470 return transfer 

471 raise RuntimeError(f"{transfer} is not allowed without specialization.") 

472 

473 def _prepIngest(self, *datasets: FileDataset, transfer: Optional[str] = None) -> IngestPrepData: 

474 """Process datasets to identify which ones can be ingested. 

475 

476 Parameters 

477 ---------- 

478 datasets : `FileDataset` 

479 Each positional argument is a struct containing information about 

480 a file to be ingested, including its path (either absolute or 

481 relative to the datastore root, if applicable), a complete 

482 `DatasetRef` (with ``dataset_id not None``), and optionally a 

483 formatter class or its fully-qualified string name. If a formatter 

484 is not provided, this method should populate that attribute with 

485 the formatter the datastore would use for `put`. Subclasses are 

486 also permitted to modify the path attribute (typically to put it 

487 in what the datastore considers its standard form). 

488 transfer : `str`, optional 

489 How (and whether) the dataset should be added to the datastore. 

490 See `ingest` for details of transfer modes. 

491 

492 Returns 

493 ------- 

494 data : `IngestPrepData` 

495 An instance of a subclass of `IngestPrepData`, used to pass 

496 arbitrary data from `_prepIngest` to `_finishIngest`. This should 

497 include only the datasets this datastore can actually ingest; 

498 others should be silently ignored (`Datastore.ingest` will inspect 

499 `IngestPrepData.refs` and raise `DatasetTypeNotSupportedError` if 

500 necessary). 

501 

502 Raises 

503 ------ 

504 NotImplementedError 

505 Raised if the datastore does not support the given transfer mode 

506 (including the case where ingest is not supported at all). 

507 FileNotFoundError 

508 Raised if one of the given files does not exist. 

509 FileExistsError 

510 Raised if transfer is not `None` but the (internal) location the 

511 file would be moved to is already occupied. 

512 

513 Notes 

514 ----- 

515 This method (along with `_finishIngest`) should be implemented by 

516 subclasses to provide ingest support instead of implementing `ingest` 

517 directly. 

518 

519 `_prepIngest` should not modify the data repository or given files in 

520 any way; all changes should be deferred to `_finishIngest`. 

521 

522 When possible, exceptions should be raised in `_prepIngest` instead of 

523 `_finishIngest`. `NotImplementedError` exceptions that indicate that 

524 the transfer mode is not supported must be raised by `_prepIngest` 

525 instead of `_finishIngest`. 

526 """ 

527 raise NotImplementedError(f"Datastore {self} does not support direct file-based ingest.") 

528 

529 def _finishIngest(self, prepData: IngestPrepData, *, transfer: Optional[str] = None) -> None: 

530 """Complete an ingest operation. 

531 

532 Parameters 

533 ---------- 

534 data : `IngestPrepData` 

535 An instance of a subclass of `IngestPrepData`. Guaranteed to be 

536 the direct result of a call to `_prepIngest` on this datastore. 

537 transfer : `str`, optional 

538 How (and whether) the dataset should be added to the datastore. 

539 See `ingest` for details of transfer modes. 

540 

541 Raises 

542 ------ 

543 FileNotFoundError 

544 Raised if one of the given files does not exist. 

545 FileExistsError 

546 Raised if transfer is not `None` but the (internal) location the 

547 file would be moved to is already occupied. 

548 

549 Notes 

550 ----- 

551 This method (along with `_prepIngest`) should be implemented by 

552 subclasses to provide ingest support instead of implementing `ingest` 

553 directly. 

554 """ 

555 raise NotImplementedError(f"Datastore {self} does not support direct file-based ingest.") 

556 

557 def ingest(self, *datasets: FileDataset, transfer: Optional[str] = None) -> None: 

558 """Ingest one or more files into the datastore. 

559 

560 Parameters 

561 ---------- 

562 datasets : `FileDataset` 

563 Each positional argument is a struct containing information about 

564 a file to be ingested, including its path (either absolute or 

565 relative to the datastore root, if applicable), a complete 

566 `DatasetRef` (with ``dataset_id not None``), and optionally a 

567 formatter class or its fully-qualified string name. If a formatter 

568 is not provided, the one the datastore would use for ``put`` on 

569 that dataset is assumed. 

570 transfer : `str`, optional 

571 How (and whether) the dataset should be added to the datastore. 

572 If `None` (default), the file must already be in a location 

573 appropriate for the datastore (e.g. within its root directory), 

574 and will not be modified. Other choices include "move", "copy", 

575 "link", "symlink", "relsymlink", and "hardlink". "link" is a 

576 special transfer mode that will first try to make a hardlink and 

577 if that fails a symlink will be used instead. "relsymlink" creates 

578 a relative symlink rather than use an absolute path. 

579 Most datastores do not support all transfer modes. 

580 "auto" is a special option that will let the 

581 data store choose the most natural option for itself. 

582 

583 Raises 

584 ------ 

585 NotImplementedError 

586 Raised if the datastore does not support the given transfer mode 

587 (including the case where ingest is not supported at all). 

588 DatasetTypeNotSupportedError 

589 Raised if one or more files to be ingested have a dataset type that 

590 is not supported by the datastore. 

591 FileNotFoundError 

592 Raised if one of the given files does not exist. 

593 FileExistsError 

594 Raised if transfer is not `None` but the (internal) location the 

595 file would be moved to is already occupied. 

596 

597 Notes 

598 ----- 

599 Subclasses should implement `_prepIngest` and `_finishIngest` instead 

600 of implementing `ingest` directly. Datastores that hold and 

601 delegate to child datastores may want to call those methods as well. 

602 

603 Subclasses are encouraged to document their supported transfer modes 

604 in their class documentation. 

605 """ 

606 # Allow a datastore to select a default transfer mode 

607 transfer = self._overrideTransferMode(*datasets, transfer=transfer) 

608 prepData = self._prepIngest(*datasets, transfer=transfer) 

609 refs = {ref.id: ref for dataset in datasets for ref in dataset.refs} 

610 if None in refs: 

611 # Find the file for the error message. There may be multiple 

612 # bad refs so look for all of them. 

613 unresolved_paths = {} 

614 for dataset in datasets: 

615 unresolved = [] 

616 for ref in dataset.refs: 

617 if ref.id is None: 

618 unresolved.append(ref) 

619 if unresolved: 

620 unresolved_paths[dataset.path] = unresolved 

621 raise RuntimeError( 

622 "Attempt to ingest unresolved DatasetRef from: " 

623 + ",".join(f"{p}: ({[str(r) for r in ref]})" for p, ref in unresolved_paths.items()) 

624 ) 

625 if refs.keys() != prepData.refs.keys(): 

626 unsupported = refs.keys() - prepData.refs.keys() 

627 # Group unsupported refs by DatasetType for an informative 

628 # but still concise error message. 

629 byDatasetType = defaultdict(list) 

630 for datasetId in unsupported: 

631 ref = refs[datasetId] 

632 byDatasetType[ref.datasetType].append(ref) 

633 raise DatasetTypeNotSupportedError( 

634 "DatasetType(s) not supported in ingest: " 

635 + ", ".join(f"{k.name} ({len(v)} dataset(s))" for k, v in byDatasetType.items()) 

636 ) 

637 self._finishIngest(prepData, transfer=transfer) 

638 

639 def transfer_from( 

640 self, 

641 source_datastore: Datastore, 

642 refs: Iterable[DatasetRef], 

643 local_refs: Optional[Iterable[DatasetRef]] = None, 

644 transfer: str = "auto", 

645 artifact_existence: Optional[Dict[ButlerURI, bool]] = None, 

646 ) -> None: 

647 """Transfer dataset artifacts from another datastore to this one. 

648 

649 Parameters 

650 ---------- 

651 source_datastore : `Datastore` 

652 The datastore from which to transfer artifacts. That datastore 

653 must be compatible with this datastore receiving the artifacts. 

654 refs : iterable of `DatasetRef` 

655 The datasets to transfer from the source datastore. 

656 local_refs : iterable of `DatasetRef`, optional 

657 The dataset refs associated with the registry associated with 

658 this datastore. Can be `None` if the source and target datastore 

659 are using UUIDs. 

660 transfer : `str`, optional 

661 How (and whether) the dataset should be added to the datastore. 

662 Choices include "move", "copy", 

663 "link", "symlink", "relsymlink", and "hardlink". "link" is a 

664 special transfer mode that will first try to make a hardlink and 

665 if that fails a symlink will be used instead. "relsymlink" creates 

666 a relative symlink rather than use an absolute path. 

667 Most datastores do not support all transfer modes. 

668 "auto" (the default) is a special option that will let the 

669 data store choose the most natural option for itself. 

670 If the source location and transfer location are identical the 

671 transfer mode will be ignored. 

672 artifact_existence : `dict` of [`ButlerURI`, `bool`], optional 

673 Mapping of datastore artifact to existence. Updated by this 

674 method with details of all artifacts tested. Can be `None` 

675 if the caller is not interested. 

676 

677 Raises 

678 ------ 

679 TypeError 

680 Raised if the two datastores are not compatible. 

681 """ 

682 if type(self) is not type(source_datastore): 

683 raise TypeError( 

684 f"Datastore mismatch between this datastore ({type(self)}) and the " 

685 f"source datastore ({type(source_datastore)})." 

686 ) 

687 

688 raise NotImplementedError(f"Datastore {type(self)} must implement a transfer_from method.") 

689 

690 @abstractmethod 

691 def getURIs( 

692 self, datasetRef: DatasetRef, predict: bool = False 

693 ) -> Tuple[Optional[ButlerURI], Dict[str, ButlerURI]]: 

694 """Return URIs associated with dataset. 

695 

696 Parameters 

697 ---------- 

698 ref : `DatasetRef` 

699 Reference to the required dataset. 

700 predict : `bool`, optional 

701 If the datastore does not know about the dataset, should it 

702 return a predicted URI or not? 

703 

704 Returns 

705 ------- 

706 primary : `ButlerURI` 

707 The URI to the primary artifact associated with this dataset. 

708 If the dataset was disassembled within the datastore this 

709 may be `None`. 

710 components : `dict` 

711 URIs to any components associated with the dataset artifact. 

712 Can be empty if there are no components. 

713 """ 

714 raise NotImplementedError() 

715 

716 @abstractmethod 

717 def getURI(self, datasetRef: DatasetRef, predict: bool = False) -> ButlerURI: 

718 """URI to the Dataset. 

719 

720 Parameters 

721 ---------- 

722 datasetRef : `DatasetRef` 

723 Reference to the required Dataset. 

724 predict : `bool` 

725 If `True` attempt to predict the URI for a dataset if it does 

726 not exist in datastore. 

727 

728 Returns 

729 ------- 

730 uri : `str` 

731 URI string pointing to the Dataset within the datastore. If the 

732 Dataset does not exist in the datastore, the URI may be a guess. 

733 If the datastore does not have entities that relate well 

734 to the concept of a URI the returned URI string will be 

735 descriptive. The returned URI is not guaranteed to be obtainable. 

736 

737 Raises 

738 ------ 

739 FileNotFoundError 

740 A URI has been requested for a dataset that does not exist and 

741 guessing is not allowed. 

742 """ 

743 raise NotImplementedError("Must be implemented by subclass") 

744 

745 @abstractmethod 

746 def retrieveArtifacts( 

747 self, 

748 refs: Iterable[DatasetRef], 

749 destination: ButlerURI, 

750 transfer: str = "auto", 

751 preserve_path: bool = True, 

752 overwrite: bool = False, 

753 ) -> List[ButlerURI]: 

754 """Retrieve the artifacts associated with the supplied refs. 

755 

756 Parameters 

757 ---------- 

758 refs : iterable of `DatasetRef` 

759 The datasets for which artifacts are to be retrieved. 

760 A single ref can result in multiple artifacts. The refs must 

761 be resolved. 

762 destination : `ButlerURI` 

763 Location to write the artifacts. 

764 transfer : `str`, optional 

765 Method to use to transfer the artifacts. Must be one of the options 

766 supported by `ButlerURI.transfer_from()`. "move" is not allowed. 

767 preserve_path : `bool`, optional 

768 If `True` the full path of the artifact within the datastore 

769 is preserved. If `False` the final file component of the path 

770 is used. 

771 overwrite : `bool`, optional 

772 If `True` allow transfers to overwrite existing files at the 

773 destination. 

774 

775 Returns 

776 ------- 

777 targets : `list` of `ButlerURI` 

778 URIs of file artifacts in destination location. Order is not 

779 preserved. 

780 

781 Notes 

782 ----- 

783 For non-file datastores the artifacts written to the destination 

784 may not match the representation inside the datastore. For example 

785 a hierarchichal data structure in a NoSQL database may well be stored 

786 as a JSON file. 

787 """ 

788 raise NotImplementedError() 

789 

790 @abstractmethod 

791 def remove(self, datasetRef: DatasetRef) -> None: 

792 """Indicate to the Datastore that a Dataset can be removed. 

793 

794 Parameters 

795 ---------- 

796 datasetRef : `DatasetRef` 

797 Reference to the required Dataset. 

798 

799 Raises 

800 ------ 

801 FileNotFoundError 

802 When Dataset does not exist. 

803 

804 Notes 

805 ----- 

806 Some Datastores may implement this method as a silent no-op to 

807 disable Dataset deletion through standard interfaces. 

808 """ 

809 raise NotImplementedError("Must be implemented by subclass") 

810 

811 @abstractmethod 

812 def forget(self, refs: Iterable[DatasetRef]) -> None: 

813 """Indicate to the Datastore that it should remove all records of the 

814 given datasets, without actually deleting them. 

815 

816 Parameters 

817 ---------- 

818 refs : `Iterable` [ `DatasetRef` ] 

819 References to the datasets being forgotten. 

820 

821 Notes 

822 ----- 

823 Asking a datastore to forget a `DatasetRef` it does not hold should be 

824 a silent no-op, not an error. 

825 """ 

826 raise NotImplementedError("Must be implemented by subclass") 

827 

828 @abstractmethod 

829 def trash(self, ref: Union[DatasetRef, Iterable[DatasetRef]], ignore_errors: bool = True) -> None: 

830 """Indicate to the Datastore that a Dataset can be moved to the trash. 

831 

832 Parameters 

833 ---------- 

834 ref : `DatasetRef` or iterable thereof 

835 Reference(s) to the required Dataset. 

836 ignore_errors : `bool`, optional 

837 Determine whether errors should be ignored. When multiple 

838 refs are being trashed there will be no per-ref check. 

839 

840 Raises 

841 ------ 

842 FileNotFoundError 

843 When Dataset does not exist and errors are not ignored. Only 

844 checked if a single ref is supplied (and not in a list). 

845 

846 Notes 

847 ----- 

848 Some Datastores may implement this method as a silent no-op to 

849 disable Dataset deletion through standard interfaces. 

850 """ 

851 raise NotImplementedError("Must be implemented by subclass") 

852 

853 @abstractmethod 

854 def emptyTrash(self, ignore_errors: bool = True) -> None: 

855 """Remove all datasets from the trash. 

856 

857 Parameters 

858 ---------- 

859 ignore_errors : `bool`, optional 

860 Determine whether errors should be ignored. 

861 

862 Notes 

863 ----- 

864 Some Datastores may implement this method as a silent no-op to 

865 disable Dataset deletion through standard interfaces. 

866 """ 

867 raise NotImplementedError("Must be implemented by subclass") 

868 

869 @abstractmethod 

870 def transfer(self, inputDatastore: Datastore, datasetRef: DatasetRef) -> None: 

871 """Transfer a dataset from another datastore to this datastore. 

872 

873 Parameters 

874 ---------- 

875 inputDatastore : `Datastore` 

876 The external `Datastore` from which to retrieve the Dataset. 

877 datasetRef : `DatasetRef` 

878 Reference to the required Dataset. 

879 """ 

880 raise NotImplementedError("Must be implemented by subclass") 

881 

882 def export( 

883 self, refs: Iterable[DatasetRef], *, directory: Optional[str] = None, transfer: Optional[str] = None 

884 ) -> Iterable[FileDataset]: 

885 """Export datasets for transfer to another data repository. 

886 

887 Parameters 

888 ---------- 

889 refs : iterable of `DatasetRef` 

890 Dataset references to be exported. 

891 directory : `str`, optional 

892 Path to a directory that should contain files corresponding to 

893 output datasets. Ignored if ``transfer`` is `None`. 

894 transfer : `str`, optional 

895 Mode that should be used to move datasets out of the repository. 

896 Valid options are the same as those of the ``transfer`` argument 

897 to ``ingest``, and datastores may similarly signal that a transfer 

898 mode is not supported by raising `NotImplementedError`. 

899 

900 Returns 

901 ------- 

902 dataset : iterable of `DatasetTransfer` 

903 Structs containing information about the exported datasets, in the 

904 same order as ``refs``. 

905 

906 Raises 

907 ------ 

908 NotImplementedError 

909 Raised if the given transfer mode is not supported. 

910 """ 

911 raise NotImplementedError(f"Transfer mode {transfer} not supported.") 

912 

913 @abstractmethod 

914 def validateConfiguration( 

915 self, entities: Iterable[Union[DatasetRef, DatasetType, StorageClass]], logFailures: bool = False 

916 ) -> None: 

917 """Validate some of the configuration for this datastore. 

918 

919 Parameters 

920 ---------- 

921 entities : iterable of `DatasetRef`, `DatasetType`, or `StorageClass` 

922 Entities to test against this configuration. Can be differing 

923 types. 

924 logFailures : `bool`, optional 

925 If `True`, output a log message for every validation error 

926 detected. 

927 

928 Raises 

929 ------ 

930 DatastoreValidationError 

931 Raised if there is a validation problem with a configuration. 

932 

933 Notes 

934 ----- 

935 Which parts of the configuration are validated is at the discretion 

936 of each Datastore implementation. 

937 """ 

938 raise NotImplementedError("Must be implemented by subclass") 

939 

940 @abstractmethod 

941 def validateKey(self, lookupKey: LookupKey, entity: Union[DatasetRef, DatasetType, StorageClass]) -> None: 

942 """Validate a specific look up key with supplied entity. 

943 

944 Parameters 

945 ---------- 

946 lookupKey : `LookupKey` 

947 Key to use to retrieve information from the datastore 

948 configuration. 

949 entity : `DatasetRef`, `DatasetType`, or `StorageClass` 

950 Entity to compare with configuration retrieved using the 

951 specified lookup key. 

952 

953 Raises 

954 ------ 

955 DatastoreValidationError 

956 Raised if there is a problem with the combination of entity 

957 and lookup key. 

958 

959 Notes 

960 ----- 

961 Bypasses the normal selection priorities by allowing a key that 

962 would normally not be selected to be validated. 

963 """ 

964 raise NotImplementedError("Must be implemented by subclass") 

965 

966 @abstractmethod 

967 def getLookupKeys(self) -> Set[LookupKey]: 

968 """Return all the lookup keys relevant to this datastore. 

969 

970 Returns 

971 ------- 

972 keys : `set` of `LookupKey` 

973 The keys stored internally for looking up information based 

974 on `DatasetType` name or `StorageClass`. 

975 """ 

976 raise NotImplementedError("Must be implemented by subclass") 

977 

978 def needs_expanded_data_ids( 

979 self, 

980 transfer: Optional[str], 

981 entity: Optional[Union[DatasetRef, DatasetType, StorageClass]] = None, 

982 ) -> bool: 

983 """Test whether this datastore needs expanded data IDs to ingest. 

984 

985 Parameters 

986 ---------- 

987 transfer : `str` or `None` 

988 Transfer mode for ingest. 

989 entity, optional 

990 Object representing what will be ingested. If not provided (or not 

991 specific enough), `True` may be returned even if expanded data 

992 IDs aren't necessary. 

993 

994 Returns 

995 ------- 

996 needed : `bool` 

997 If `True`, expanded data IDs may be needed. `False` only if 

998 expansion definitely isn't necessary. 

999 """ 

1000 return True