Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Support for generic data stores.""" 

23 

24from __future__ import annotations 

25 

26__all__ = ("DatastoreConfig", "Datastore", "DatastoreValidationError") 

27 

28import contextlib 

29import logging 

30from collections import defaultdict 

31from typing import ( 

32 TYPE_CHECKING, 

33 Any, 

34 Callable, 

35 ClassVar, 

36 Dict, 

37 Iterable, 

38 Iterator, 

39 List, 

40 Mapping, 

41 Optional, 

42 Set, 

43 Tuple, 

44 Type, 

45 Union, 

46) 

47 

48from dataclasses import dataclass 

49from abc import ABCMeta, abstractmethod 

50 

51from lsst.utils import doImport 

52from .config import ConfigSubset, Config 

53from .exceptions import ValidationError, DatasetTypeNotSupportedError 

54from .constraints import Constraints 

55from .storageClass import StorageClassFactory 

56from .fileDataset import FileDataset 

57 

58if TYPE_CHECKING: 58 ↛ 59line 58 didn't jump to line 59, because the condition on line 58 was never true

59 from ..registry.interfaces import DatastoreRegistryBridgeManager 

60 from .datasets import DatasetRef, DatasetType 

61 from .configSupport import LookupKey 

62 from .storageClass import StorageClass 

63 from ._butlerUri import ButlerURI 

64 

65 

66class DatastoreConfig(ConfigSubset): 

67 """Configuration for Datastores.""" 

68 

69 component = "datastore" 

70 requiredKeys = ("cls",) 

71 defaultConfigFile = "datastore.yaml" 

72 

73 

74class DatastoreValidationError(ValidationError): 

75 """There is a problem with the Datastore configuration.""" 

76 

77 pass 

78 

79 

80@dataclass(frozen=True) 

81class Event: 

82 __slots__ = {"name", "undoFunc", "args", "kwargs"} 

83 name: str 

84 undoFunc: Callable 

85 args: tuple 

86 kwargs: dict 

87 

88 

89class IngestPrepData: 

90 """A helper base class for `Datastore` ingest implementations. 

91 

92 Datastore implementations will generally need a custom implementation of 

93 this class. 

94 

95 Should be accessed as ``Datastore.IngestPrepData`` instead of via direct 

96 import. 

97 

98 Parameters 

99 ---------- 

100 refs : iterable of `DatasetRef` 

101 References for the datasets that can be ingested by this datastore. 

102 """ 

103 

104 def __init__(self, refs: Iterable[DatasetRef]): 

105 self.refs = {ref.id: ref for ref in refs} 

106 

107 

108class DatastoreTransaction: 

109 """Keeps a log of `Datastore` activity and allow rollback. 

110 

111 Parameters 

112 ---------- 

113 parent : `DatastoreTransaction`, optional 

114 The parent transaction (if any) 

115 """ 

116 

117 Event: ClassVar[Type] = Event 

118 

119 parent: Optional['DatastoreTransaction'] 

120 """The parent transaction. (`DatastoreTransaction`, optional)""" 

121 

122 def __init__(self, parent: Optional[DatastoreTransaction] = None): 

123 self.parent = parent 

124 self._log: List[Event] = [] 

125 

126 def registerUndo(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> None: 

127 """Register event with undo function. 

128 

129 Parameters 

130 ---------- 

131 name : `str` 

132 Name of the event. 

133 undoFunc : func 

134 Function to undo this event. 

135 args : `tuple` 

136 Positional arguments to `undoFunc`. 

137 **kwargs 

138 Keyword arguments to `undoFunc`. 

139 """ 

140 self._log.append(self.Event(name, undoFunc, args, kwargs)) 

141 

142 @contextlib.contextmanager 

143 def undoWith(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> Iterator[None]: 

144 """Register undo function if nested operation succeeds. 

145 

146 Calls `registerUndo`. 

147 

148 This can be used to wrap individual undo-able statements within a 

149 DatastoreTransaction block. Multiple statements that can fail 

150 separately should not be part of the same `undoWith` block. 

151 

152 All arguments are forwarded directly to `registerUndo`. 

153 """ 

154 try: 

155 yield None 

156 except BaseException: 

157 raise 

158 else: 

159 self.registerUndo(name, undoFunc, *args, **kwargs) 

160 

161 def rollback(self) -> None: 

162 """Roll back all events in this transaction.""" 

163 log = logging.getLogger(__name__) 

164 while self._log: 

165 ev = self._log.pop() 

166 try: 

167 log.debug("Rolling back transaction: %s: %s(%s,%s)", ev.name, 

168 ev.undoFunc, 

169 ",".join(str(a) for a in ev.args), 

170 ",".join(f"{k}={v}" for k, v in ev.kwargs.items())) 

171 except Exception: 

172 # In case we had a problem in stringification of arguments 

173 log.warning("Rolling back transaction: %s", ev.name) 

174 try: 

175 ev.undoFunc(*ev.args, **ev.kwargs) 

176 except BaseException as e: 

177 # Deliberately swallow error that may occur in unrolling 

178 log.warning("Exception: %s caught while unrolling: %s", e, ev.name) 

179 pass 

180 

181 def commit(self) -> None: 

182 """Commit this transaction.""" 

183 if self.parent is None: 

184 # Just forget about the events, they have already happened. 

185 return 

186 else: 

187 # We may still want to events from this transaction as part of 

188 # the parent. 

189 self.parent._log.extend(self._log) 

190 

191 

192class Datastore(metaclass=ABCMeta): 

193 """Datastore interface. 

194 

195 Parameters 

196 ---------- 

197 config : `DatastoreConfig` or `str` 

198 Load configuration either from an existing config instance or by 

199 referring to a configuration file. 

200 bridgeManager : `DatastoreRegistryBridgeManager` 

201 Object that manages the interface between `Registry` and datastores. 

202 butlerRoot : `str`, optional 

203 New datastore root to use to override the configuration value. 

204 """ 

205 

206 defaultConfigFile: ClassVar[Optional[str]] = None 

207 """Path to configuration defaults. Accessed within the ``config`` resource 

208 or relative to a search path. Can be None if no defaults specified. 

209 """ 

210 

211 containerKey: ClassVar[Optional[str]] = None 

212 """Name of the key containing a list of subconfigurations that also 

213 need to be merged with defaults and will likely use different Python 

214 datastore classes (but all using DatastoreConfig). Assumed to be a 

215 list of configurations that can be represented in a DatastoreConfig 

216 and containing a "cls" definition. None indicates that no containers 

217 are expected in this Datastore.""" 

218 

219 isEphemeral: bool = False 

220 """Indicate whether this Datastore is ephemeral or not. An ephemeral 

221 datastore is one where the contents of the datastore will not exist 

222 across process restarts. This value can change per-instance.""" 

223 

224 config: DatastoreConfig 

225 """Configuration used to create Datastore.""" 

226 

227 name: str 

228 """Label associated with this Datastore.""" 

229 

230 storageClassFactory: StorageClassFactory 

231 """Factory for creating storage class instances from name.""" 

232 

233 constraints: Constraints 

234 """Constraints to apply when putting datasets into the datastore.""" 

235 

236 # MyPy does not like for this to be annotated as any kind of type, because 

237 # it can't do static checking on type variables that can change at runtime. 

238 IngestPrepData: ClassVar[Any] = IngestPrepData 

239 """Helper base class for ingest implementations. 

240 """ 

241 

242 @classmethod 

243 @abstractmethod 

244 def setConfigRoot(cls, root: str, config: Config, full: Config, overwrite: bool = True) -> None: 

245 """Set filesystem-dependent config options for this datastore. 

246 

247 The options will be appropriate for a new empty repository with the 

248 given root. 

249 

250 Parameters 

251 ---------- 

252 root : `str` 

253 Filesystem path to the root of the data repository. 

254 config : `Config` 

255 A `Config` to update. Only the subset understood by 

256 this component will be updated. Will not expand 

257 defaults. 

258 full : `Config` 

259 A complete config with all defaults expanded that can be 

260 converted to a `DatastoreConfig`. Read-only and will not be 

261 modified by this method. 

262 Repository-specific options that should not be obtained 

263 from defaults when Butler instances are constructed 

264 should be copied from ``full`` to ``config``. 

265 overwrite : `bool`, optional 

266 If `False`, do not modify a value in ``config`` if the value 

267 already exists. Default is always to overwrite with the provided 

268 ``root``. 

269 

270 Notes 

271 ----- 

272 If a keyword is explicitly defined in the supplied ``config`` it 

273 will not be overridden by this method if ``overwrite`` is `False`. 

274 This allows explicit values set in external configs to be retained. 

275 """ 

276 raise NotImplementedError() 

277 

278 @staticmethod 

279 def fromConfig(config: Config, bridgeManager: DatastoreRegistryBridgeManager, 

280 butlerRoot: Optional[Union[str, ButlerURI]] = None) -> 'Datastore': 

281 """Create datastore from type specified in config file. 

282 

283 Parameters 

284 ---------- 

285 config : `Config` 

286 Configuration instance. 

287 bridgeManager : `DatastoreRegistryBridgeManager` 

288 Object that manages the interface between `Registry` and 

289 datastores. 

290 butlerRoot : `str`, optional 

291 Butler root directory. 

292 """ 

293 cls = doImport(config["datastore", "cls"]) 

294 return cls(config=config, bridgeManager=bridgeManager, butlerRoot=butlerRoot) 

295 

296 def __init__(self, config: Union[Config, str], 

297 bridgeManager: DatastoreRegistryBridgeManager, butlerRoot: str = None): 

298 self.config = DatastoreConfig(config) 

299 self.name = "ABCDataStore" 

300 self._transaction: Optional[DatastoreTransaction] = None 

301 

302 # All Datastores need storage classes and constraints 

303 self.storageClassFactory = StorageClassFactory() 

304 

305 # And read the constraints list 

306 constraintsConfig = self.config.get("constraints") 

307 self.constraints = Constraints(constraintsConfig, universe=bridgeManager.universe) 

308 

309 def __str__(self) -> str: 

310 return self.name 

311 

312 def __repr__(self) -> str: 

313 return self.name 

314 

315 @property 

316 def names(self) -> Tuple[str, ...]: 

317 """Names associated with this datastore returned as a list. 

318 

319 Can be different to ``name`` for a chaining datastore. 

320 """ 

321 # Default implementation returns solely the name itself 

322 return (self.name, ) 

323 

324 @contextlib.contextmanager 

325 def transaction(self) -> Iterator[DatastoreTransaction]: 

326 """Context manager supporting `Datastore` transactions. 

327 

328 Transactions can be nested, and are to be used in combination with 

329 `Registry.transaction`. 

330 """ 

331 self._transaction = DatastoreTransaction(self._transaction) 

332 try: 

333 yield self._transaction 

334 except BaseException: 

335 self._transaction.rollback() 

336 raise 

337 else: 

338 self._transaction.commit() 

339 self._transaction = self._transaction.parent 

340 

341 @abstractmethod 

342 def knows(self, ref: DatasetRef) -> bool: 

343 """Check if the dataset is known to the datastore. 

344 

345 Does not check for existence of any artifact. 

346 

347 Parameters 

348 ---------- 

349 ref : `DatasetRef` 

350 Reference to the required dataset. 

351 

352 Returns 

353 ------- 

354 exists : `bool` 

355 `True` if the dataset is known to the datastore. 

356 """ 

357 raise NotImplementedError() 

358 

359 @abstractmethod 

360 def exists(self, datasetRef: DatasetRef) -> bool: 

361 """Check if the dataset exists in the datastore. 

362 

363 Parameters 

364 ---------- 

365 datasetRef : `DatasetRef` 

366 Reference to the required dataset. 

367 

368 Returns 

369 ------- 

370 exists : `bool` 

371 `True` if the entity exists in the `Datastore`. 

372 """ 

373 raise NotImplementedError("Must be implemented by subclass") 

374 

375 @abstractmethod 

376 def get(self, datasetRef: DatasetRef, parameters: Mapping[str, Any] = None) -> Any: 

377 """Load an `InMemoryDataset` from the store. 

378 

379 Parameters 

380 ---------- 

381 datasetRef : `DatasetRef` 

382 Reference to the required Dataset. 

383 parameters : `dict` 

384 `StorageClass`-specific parameters that specify a slice of the 

385 Dataset to be loaded. 

386 

387 Returns 

388 ------- 

389 inMemoryDataset : `object` 

390 Requested Dataset or slice thereof as an InMemoryDataset. 

391 """ 

392 raise NotImplementedError("Must be implemented by subclass") 

393 

394 @abstractmethod 

395 def put(self, inMemoryDataset: Any, datasetRef: DatasetRef) -> None: 

396 """Write a `InMemoryDataset` with a given `DatasetRef` to the store. 

397 

398 Parameters 

399 ---------- 

400 inMemoryDataset : `object` 

401 The Dataset to store. 

402 datasetRef : `DatasetRef` 

403 Reference to the associated Dataset. 

404 """ 

405 raise NotImplementedError("Must be implemented by subclass") 

406 

407 def _overrideTransferMode(self, *datasets: FileDataset, transfer: Optional[str] = None) -> Optional[str]: 

408 """Allow ingest transfer mode to be defaulted based on datasets. 

409 

410 Parameters 

411 ---------- 

412 datasets : `FileDataset` 

413 Each positional argument is a struct containing information about 

414 a file to be ingested, including its path (either absolute or 

415 relative to the datastore root, if applicable), a complete 

416 `DatasetRef` (with ``dataset_id not None``), and optionally a 

417 formatter class or its fully-qualified string name. If a formatter 

418 is not provided, this method should populate that attribute with 

419 the formatter the datastore would use for `put`. Subclasses are 

420 also permitted to modify the path attribute (typically to put it 

421 in what the datastore considers its standard form). 

422 transfer : `str`, optional 

423 How (and whether) the dataset should be added to the datastore. 

424 See `ingest` for details of transfer modes. 

425 

426 Returns 

427 ------- 

428 newTransfer : `str` 

429 Transfer mode to use. Will be identical to the supplied transfer 

430 mode unless "auto" is used. 

431 """ 

432 if transfer != "auto": 

433 return transfer 

434 raise RuntimeError(f"{transfer} is not allowed without specialization.") 

435 

436 def _prepIngest(self, *datasets: FileDataset, transfer: Optional[str] = None) -> IngestPrepData: 

437 """Process datasets to identify which ones can be ingested. 

438 

439 Parameters 

440 ---------- 

441 datasets : `FileDataset` 

442 Each positional argument is a struct containing information about 

443 a file to be ingested, including its path (either absolute or 

444 relative to the datastore root, if applicable), a complete 

445 `DatasetRef` (with ``dataset_id not None``), and optionally a 

446 formatter class or its fully-qualified string name. If a formatter 

447 is not provided, this method should populate that attribute with 

448 the formatter the datastore would use for `put`. Subclasses are 

449 also permitted to modify the path attribute (typically to put it 

450 in what the datastore considers its standard form). 

451 transfer : `str`, optional 

452 How (and whether) the dataset should be added to the datastore. 

453 See `ingest` for details of transfer modes. 

454 

455 Returns 

456 ------- 

457 data : `IngestPrepData` 

458 An instance of a subclass of `IngestPrepData`, used to pass 

459 arbitrary data from `_prepIngest` to `_finishIngest`. This should 

460 include only the datasets this datastore can actually ingest; 

461 others should be silently ignored (`Datastore.ingest` will inspect 

462 `IngestPrepData.refs` and raise `DatasetTypeNotSupportedError` if 

463 necessary). 

464 

465 Raises 

466 ------ 

467 NotImplementedError 

468 Raised if the datastore does not support the given transfer mode 

469 (including the case where ingest is not supported at all). 

470 FileNotFoundError 

471 Raised if one of the given files does not exist. 

472 FileExistsError 

473 Raised if transfer is not `None` but the (internal) location the 

474 file would be moved to is already occupied. 

475 

476 Notes 

477 ----- 

478 This method (along with `_finishIngest`) should be implemented by 

479 subclasses to provide ingest support instead of implementing `ingest` 

480 directly. 

481 

482 `_prepIngest` should not modify the data repository or given files in 

483 any way; all changes should be deferred to `_finishIngest`. 

484 

485 When possible, exceptions should be raised in `_prepIngest` instead of 

486 `_finishIngest`. `NotImplementedError` exceptions that indicate that 

487 the transfer mode is not supported must be raised by `_prepIngest` 

488 instead of `_finishIngest`. 

489 """ 

490 raise NotImplementedError( 

491 f"Datastore {self} does not support direct file-based ingest." 

492 ) 

493 

494 def _finishIngest(self, prepData: IngestPrepData, *, transfer: Optional[str] = None) -> None: 

495 """Complete an ingest operation. 

496 

497 Parameters 

498 ---------- 

499 data : `IngestPrepData` 

500 An instance of a subclass of `IngestPrepData`. Guaranteed to be 

501 the direct result of a call to `_prepIngest` on this datastore. 

502 transfer : `str`, optional 

503 How (and whether) the dataset should be added to the datastore. 

504 See `ingest` for details of transfer modes. 

505 

506 Raises 

507 ------ 

508 FileNotFoundError 

509 Raised if one of the given files does not exist. 

510 FileExistsError 

511 Raised if transfer is not `None` but the (internal) location the 

512 file would be moved to is already occupied. 

513 

514 Notes 

515 ----- 

516 This method (along with `_prepIngest`) should be implemented by 

517 subclasses to provide ingest support instead of implementing `ingest` 

518 directly. 

519 """ 

520 raise NotImplementedError( 

521 f"Datastore {self} does not support direct file-based ingest." 

522 ) 

523 

524 def ingest(self, *datasets: FileDataset, transfer: Optional[str] = None) -> None: 

525 """Ingest one or more files into the datastore. 

526 

527 Parameters 

528 ---------- 

529 datasets : `FileDataset` 

530 Each positional argument is a struct containing information about 

531 a file to be ingested, including its path (either absolute or 

532 relative to the datastore root, if applicable), a complete 

533 `DatasetRef` (with ``dataset_id not None``), and optionally a 

534 formatter class or its fully-qualified string name. If a formatter 

535 is not provided, the one the datastore would use for ``put`` on 

536 that dataset is assumed. 

537 transfer : `str`, optional 

538 How (and whether) the dataset should be added to the datastore. 

539 If `None` (default), the file must already be in a location 

540 appropriate for the datastore (e.g. within its root directory), 

541 and will not be modified. Other choices include "move", "copy", 

542 "link", "symlink", "relsymlink", and "hardlink". "link" is a 

543 special transfer mode that will first try to make a hardlink and 

544 if that fails a symlink will be used instead. "relsymlink" creates 

545 a relative symlink rather than use an absolute path. 

546 Most datastores do not support all transfer modes. 

547 "auto" is a special option that will let the 

548 data store choose the most natural option for itself. 

549 

550 Raises 

551 ------ 

552 NotImplementedError 

553 Raised if the datastore does not support the given transfer mode 

554 (including the case where ingest is not supported at all). 

555 DatasetTypeNotSupportedError 

556 Raised if one or more files to be ingested have a dataset type that 

557 is not supported by the datastore. 

558 FileNotFoundError 

559 Raised if one of the given files does not exist. 

560 FileExistsError 

561 Raised if transfer is not `None` but the (internal) location the 

562 file would be moved to is already occupied. 

563 

564 Notes 

565 ----- 

566 Subclasses should implement `_prepIngest` and `_finishIngest` instead 

567 of implementing `ingest` directly. Datastores that hold and 

568 delegate to child datastores may want to call those methods as well. 

569 

570 Subclasses are encouraged to document their supported transfer modes 

571 in their class documentation. 

572 """ 

573 # Allow a datastore to select a default transfer mode 

574 transfer = self._overrideTransferMode(*datasets, transfer=transfer) 

575 prepData = self._prepIngest(*datasets, transfer=transfer) 

576 refs = {ref.id: ref for dataset in datasets for ref in dataset.refs} 

577 if None in refs: 

578 # Find the file for the error message. There may be multiple 

579 # bad refs so look for all of them. 

580 unresolved_paths = {} 

581 for dataset in datasets: 

582 unresolved = [] 

583 for ref in dataset.refs: 

584 if ref.id is None: 

585 unresolved.append(ref) 

586 if unresolved: 

587 unresolved_paths[dataset.path] = unresolved 

588 raise RuntimeError("Attempt to ingest unresolved DatasetRef from: " 

589 + ",".join(f"{p}: ({[str(r) for r in ref]})" 

590 for p, ref in unresolved_paths.items())) 

591 if refs.keys() != prepData.refs.keys(): 

592 unsupported = refs.keys() - prepData.refs.keys() 

593 # Group unsupported refs by DatasetType for an informative 

594 # but still concise error message. 

595 byDatasetType = defaultdict(list) 

596 for datasetId in unsupported: 

597 ref = refs[datasetId] 

598 byDatasetType[ref.datasetType].append(ref) 

599 raise DatasetTypeNotSupportedError( 

600 "DatasetType(s) not supported in ingest: " 

601 + ", ".join(f"{k.name} ({len(v)} dataset(s))" for k, v in byDatasetType.items()) 

602 ) 

603 self._finishIngest(prepData, transfer=transfer) 

604 

605 def transfer_from(self, source_datastore: Datastore, refs: Iterable[DatasetRef], 

606 local_refs: Optional[Iterable[DatasetRef]] = None, 

607 transfer: str = "auto") -> None: 

608 """Transfer dataset artifacts from another datastore to this one. 

609 

610 Parameters 

611 ---------- 

612 source_datastore : `Datastore` 

613 The datastore from which to transfer artifacts. That datastore 

614 must be compatible with this datastore receiving the artifacts. 

615 refs : iterable of `DatasetRef` 

616 The datasets to transfer from the source datastore. 

617 local_refs : iterable of `DatasetRef`, optional 

618 The dataset refs associated with the registry associated with 

619 this datastore. Can be `None` if the source and target datastore 

620 are using UUIDs. 

621 transfer : `str`, optional 

622 How (and whether) the dataset should be added to the datastore. 

623 Choices include "move", "copy", 

624 "link", "symlink", "relsymlink", and "hardlink". "link" is a 

625 special transfer mode that will first try to make a hardlink and 

626 if that fails a symlink will be used instead. "relsymlink" creates 

627 a relative symlink rather than use an absolute path. 

628 Most datastores do not support all transfer modes. 

629 "auto" (the default) is a special option that will let the 

630 data store choose the most natural option for itself. 

631 If the source location and transfer location are identical the 

632 transfer mode will be ignored. 

633 

634 Raises 

635 ------ 

636 TypeError 

637 Raised if the two datastores are not compatible. 

638 """ 

639 if type(self) is not type(source_datastore): 

640 raise TypeError(f"Datastore mismatch between this datastore ({type(self)}) and the " 

641 f"source datastore ({type(source_datastore)}).") 

642 

643 raise NotImplementedError(f"Datastore {type(self)} must implement a transfer_from method.") 

644 

645 @abstractmethod 

646 def getURIs(self, datasetRef: DatasetRef, 

647 predict: bool = False) -> Tuple[Optional[ButlerURI], Dict[str, ButlerURI]]: 

648 """Return URIs associated with dataset. 

649 

650 Parameters 

651 ---------- 

652 ref : `DatasetRef` 

653 Reference to the required dataset. 

654 predict : `bool`, optional 

655 If the datastore does not know about the dataset, should it 

656 return a predicted URI or not? 

657 

658 Returns 

659 ------- 

660 primary : `ButlerURI` 

661 The URI to the primary artifact associated with this dataset. 

662 If the dataset was disassembled within the datastore this 

663 may be `None`. 

664 components : `dict` 

665 URIs to any components associated with the dataset artifact. 

666 Can be empty if there are no components. 

667 """ 

668 raise NotImplementedError() 

669 

670 @abstractmethod 

671 def getURI(self, datasetRef: DatasetRef, predict: bool = False) -> ButlerURI: 

672 """URI to the Dataset. 

673 

674 Parameters 

675 ---------- 

676 datasetRef : `DatasetRef` 

677 Reference to the required Dataset. 

678 predict : `bool` 

679 If `True` attempt to predict the URI for a dataset if it does 

680 not exist in datastore. 

681 

682 Returns 

683 ------- 

684 uri : `str` 

685 URI string pointing to the Dataset within the datastore. If the 

686 Dataset does not exist in the datastore, the URI may be a guess. 

687 If the datastore does not have entities that relate well 

688 to the concept of a URI the returned URI string will be 

689 descriptive. The returned URI is not guaranteed to be obtainable. 

690 

691 Raises 

692 ------ 

693 FileNotFoundError 

694 A URI has been requested for a dataset that does not exist and 

695 guessing is not allowed. 

696 """ 

697 raise NotImplementedError("Must be implemented by subclass") 

698 

699 @abstractmethod 

700 def retrieveArtifacts(self, refs: Iterable[DatasetRef], 

701 destination: ButlerURI, transfer: str = "auto", 

702 preserve_path: bool = True, 

703 overwrite: bool = False) -> List[ButlerURI]: 

704 """Retrieve the artifacts associated with the supplied refs. 

705 

706 Parameters 

707 ---------- 

708 refs : iterable of `DatasetRef` 

709 The datasets for which artifacts are to be retrieved. 

710 A single ref can result in multiple artifacts. The refs must 

711 be resolved. 

712 destination : `ButlerURI` 

713 Location to write the artifacts. 

714 transfer : `str`, optional 

715 Method to use to transfer the artifacts. Must be one of the options 

716 supported by `ButlerURI.transfer_from()`. "move" is not allowed. 

717 preserve_path : `bool`, optional 

718 If `True` the full path of the artifact within the datastore 

719 is preserved. If `False` the final file component of the path 

720 is used. 

721 overwrite : `bool`, optional 

722 If `True` allow transfers to overwrite existing files at the 

723 destination. 

724 

725 Returns 

726 ------- 

727 targets : `list` of `ButlerURI` 

728 URIs of file artifacts in destination location. Order is not 

729 preserved. 

730 

731 Notes 

732 ----- 

733 For non-file datastores the artifacts written to the destination 

734 may not match the representation inside the datastore. For example 

735 a hierarchichal data structure in a NoSQL database may well be stored 

736 as a JSON file. 

737 """ 

738 raise NotImplementedError() 

739 

740 @abstractmethod 

741 def remove(self, datasetRef: DatasetRef) -> None: 

742 """Indicate to the Datastore that a Dataset can be removed. 

743 

744 Parameters 

745 ---------- 

746 datasetRef : `DatasetRef` 

747 Reference to the required Dataset. 

748 

749 Raises 

750 ------ 

751 FileNotFoundError 

752 When Dataset does not exist. 

753 

754 Notes 

755 ----- 

756 Some Datastores may implement this method as a silent no-op to 

757 disable Dataset deletion through standard interfaces. 

758 """ 

759 raise NotImplementedError("Must be implemented by subclass") 

760 

761 @abstractmethod 

762 def forget(self, refs: Iterable[DatasetRef]) -> None: 

763 """Indicate to the Datastore that it should remove all records of the 

764 given datasets, without actually deleting them. 

765 

766 Parameters 

767 ---------- 

768 refs : `Iterable` [ `DatasetRef` ] 

769 References to the datasets being forgotten. 

770 

771 Notes 

772 ----- 

773 Asking a datastore to forget a `DatasetRef` it does not hold should be 

774 a silent no-op, not an error. 

775 """ 

776 raise NotImplementedError("Must be implemented by subclass") 

777 

778 @abstractmethod 

779 def trash(self, ref: Union[DatasetRef, Iterable[DatasetRef]], ignore_errors: bool = True) -> None: 

780 """Indicate to the Datastore that a Dataset can be moved to the trash. 

781 

782 Parameters 

783 ---------- 

784 ref : `DatasetRef` or iterable thereof 

785 Reference(s) to the required Dataset. 

786 ignore_errors : `bool`, optional 

787 Determine whether errors should be ignored. When multiple 

788 refs are being trashed there will be no per-ref check. 

789 

790 Raises 

791 ------ 

792 FileNotFoundError 

793 When Dataset does not exist and errors are not ignored. Only 

794 checked if a single ref is supplied (and not in a list). 

795 

796 Notes 

797 ----- 

798 Some Datastores may implement this method as a silent no-op to 

799 disable Dataset deletion through standard interfaces. 

800 """ 

801 raise NotImplementedError("Must be implemented by subclass") 

802 

803 @abstractmethod 

804 def emptyTrash(self, ignore_errors: bool = True) -> None: 

805 """Remove all datasets from the trash. 

806 

807 Parameters 

808 ---------- 

809 ignore_errors : `bool`, optional 

810 Determine whether errors should be ignored. 

811 

812 Notes 

813 ----- 

814 Some Datastores may implement this method as a silent no-op to 

815 disable Dataset deletion through standard interfaces. 

816 """ 

817 raise NotImplementedError("Must be implemented by subclass") 

818 

819 @abstractmethod 

820 def transfer(self, inputDatastore: Datastore, datasetRef: DatasetRef) -> None: 

821 """Transfer a dataset from another datastore to this datastore. 

822 

823 Parameters 

824 ---------- 

825 inputDatastore : `Datastore` 

826 The external `Datastore` from which to retrieve the Dataset. 

827 datasetRef : `DatasetRef` 

828 Reference to the required Dataset. 

829 """ 

830 raise NotImplementedError("Must be implemented by subclass") 

831 

832 def export(self, refs: Iterable[DatasetRef], *, 

833 directory: Optional[str] = None, transfer: Optional[str] = None) -> Iterable[FileDataset]: 

834 """Export datasets for transfer to another data repository. 

835 

836 Parameters 

837 ---------- 

838 refs : iterable of `DatasetRef` 

839 Dataset references to be exported. 

840 directory : `str`, optional 

841 Path to a directory that should contain files corresponding to 

842 output datasets. Ignored if ``transfer`` is `None`. 

843 transfer : `str`, optional 

844 Mode that should be used to move datasets out of the repository. 

845 Valid options are the same as those of the ``transfer`` argument 

846 to ``ingest``, and datastores may similarly signal that a transfer 

847 mode is not supported by raising `NotImplementedError`. 

848 

849 Returns 

850 ------- 

851 dataset : iterable of `DatasetTransfer` 

852 Structs containing information about the exported datasets, in the 

853 same order as ``refs``. 

854 

855 Raises 

856 ------ 

857 NotImplementedError 

858 Raised if the given transfer mode is not supported. 

859 """ 

860 raise NotImplementedError(f"Transfer mode {transfer} not supported.") 

861 

862 @abstractmethod 

863 def validateConfiguration(self, entities: Iterable[Union[DatasetRef, DatasetType, StorageClass]], 

864 logFailures: bool = False) -> None: 

865 """Validate some of the configuration for this datastore. 

866 

867 Parameters 

868 ---------- 

869 entities : iterable of `DatasetRef`, `DatasetType`, or `StorageClass` 

870 Entities to test against this configuration. Can be differing 

871 types. 

872 logFailures : `bool`, optional 

873 If `True`, output a log message for every validation error 

874 detected. 

875 

876 Raises 

877 ------ 

878 DatastoreValidationError 

879 Raised if there is a validation problem with a configuration. 

880 

881 Notes 

882 ----- 

883 Which parts of the configuration are validated is at the discretion 

884 of each Datastore implementation. 

885 """ 

886 raise NotImplementedError("Must be implemented by subclass") 

887 

888 @abstractmethod 

889 def validateKey(self, 

890 lookupKey: LookupKey, entity: Union[DatasetRef, DatasetType, StorageClass]) -> None: 

891 """Validate a specific look up key with supplied entity. 

892 

893 Parameters 

894 ---------- 

895 lookupKey : `LookupKey` 

896 Key to use to retrieve information from the datastore 

897 configuration. 

898 entity : `DatasetRef`, `DatasetType`, or `StorageClass` 

899 Entity to compare with configuration retrieved using the 

900 specified lookup key. 

901 

902 Raises 

903 ------ 

904 DatastoreValidationError 

905 Raised if there is a problem with the combination of entity 

906 and lookup key. 

907 

908 Notes 

909 ----- 

910 Bypasses the normal selection priorities by allowing a key that 

911 would normally not be selected to be validated. 

912 """ 

913 raise NotImplementedError("Must be implemented by subclass") 

914 

915 @abstractmethod 

916 def getLookupKeys(self) -> Set[LookupKey]: 

917 """Return all the lookup keys relevant to this datastore. 

918 

919 Returns 

920 ------- 

921 keys : `set` of `LookupKey` 

922 The keys stored internally for looking up information based 

923 on `DatasetType` name or `StorageClass`. 

924 """ 

925 raise NotImplementedError("Must be implemented by subclass") 

926 

927 def needs_expanded_data_ids( 

928 self, 

929 transfer: Optional[str], 

930 entity: Optional[Union[DatasetRef, DatasetType, StorageClass]] = None, 

931 ) -> bool: 

932 """Test whether this datastore needs expanded data IDs to ingest. 

933 

934 Parameters 

935 ---------- 

936 transfer : `str` or `None` 

937 Transfer mode for ingest. 

938 entity, optional 

939 Object representing what will be ingested. If not provided (or not 

940 specific enough), `True` may be returned even if expanded data 

941 IDs aren't necessary. 

942 

943 Returns 

944 ------- 

945 needed : `bool` 

946 If `True`, expanded data IDs may be needed. `False` only if 

947 expansion definitely isn't necessary. 

948 """ 

949 return True