Coverage for python/lsst/daf/butler/core/datastore.py: 45%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

200 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Support for generic data stores.""" 

23 

24from __future__ import annotations 

25 

26__all__ = ("DatastoreConfig", "Datastore", "DatastoreValidationError") 

27 

28import contextlib 

29import logging 

30from abc import ABCMeta, abstractmethod 

31from collections import defaultdict 

32from dataclasses import dataclass 

33from typing import ( 

34 TYPE_CHECKING, 

35 Any, 

36 Callable, 

37 ClassVar, 

38 Dict, 

39 Iterable, 

40 Iterator, 

41 List, 

42 Mapping, 

43 Optional, 

44 Set, 

45 Tuple, 

46 Type, 

47 Union, 

48) 

49 

50from lsst.utils import doImportType 

51 

52from .config import Config, ConfigSubset 

53from .constraints import Constraints 

54from .exceptions import DatasetTypeNotSupportedError, ValidationError 

55from .fileDataset import FileDataset 

56from .storageClass import StorageClassFactory 

57 

58if TYPE_CHECKING: 58 ↛ 59line 58 didn't jump to line 59, because the condition on line 58 was never true

59 from lsst.resources import ResourcePath, ResourcePathExpression 

60 

61 from ..registry.interfaces import DatastoreRegistryBridgeManager 

62 from .configSupport import LookupKey 

63 from .datasets import DatasetRef, DatasetType 

64 from .storageClass import StorageClass 

65 

66 

67class DatastoreConfig(ConfigSubset): 

68 """Configuration for Datastores.""" 

69 

70 component = "datastore" 

71 requiredKeys = ("cls",) 

72 defaultConfigFile = "datastore.yaml" 

73 

74 

75class DatastoreValidationError(ValidationError): 

76 """There is a problem with the Datastore configuration.""" 

77 

78 pass 

79 

80 

81@dataclass(frozen=True) 

82class Event: 

83 __slots__ = {"name", "undoFunc", "args", "kwargs"} 

84 name: str 

85 undoFunc: Callable 

86 args: tuple 

87 kwargs: dict 

88 

89 

90class IngestPrepData: 

91 """A helper base class for `Datastore` ingest implementations. 

92 

93 Datastore implementations will generally need a custom implementation of 

94 this class. 

95 

96 Should be accessed as ``Datastore.IngestPrepData`` instead of via direct 

97 import. 

98 

99 Parameters 

100 ---------- 

101 refs : iterable of `DatasetRef` 

102 References for the datasets that can be ingested by this datastore. 

103 """ 

104 

105 def __init__(self, refs: Iterable[DatasetRef]): 

106 self.refs = {ref.id: ref for ref in refs} 

107 

108 

109class DatastoreTransaction: 

110 """Keeps a log of `Datastore` activity and allow rollback. 

111 

112 Parameters 

113 ---------- 

114 parent : `DatastoreTransaction`, optional 

115 The parent transaction (if any) 

116 """ 

117 

118 Event: ClassVar[Type] = Event 

119 

120 parent: Optional["DatastoreTransaction"] 

121 """The parent transaction. (`DatastoreTransaction`, optional)""" 

122 

123 def __init__(self, parent: Optional[DatastoreTransaction] = None): 

124 self.parent = parent 

125 self._log: List[Event] = [] 

126 

127 def registerUndo(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> None: 

128 """Register event with undo function. 

129 

130 Parameters 

131 ---------- 

132 name : `str` 

133 Name of the event. 

134 undoFunc : func 

135 Function to undo this event. 

136 args : `tuple` 

137 Positional arguments to `undoFunc`. 

138 **kwargs 

139 Keyword arguments to `undoFunc`. 

140 """ 

141 self._log.append(self.Event(name, undoFunc, args, kwargs)) 

142 

143 @contextlib.contextmanager 

144 def undoWith(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> Iterator[None]: 

145 """Register undo function if nested operation succeeds. 

146 

147 Calls `registerUndo`. 

148 

149 This can be used to wrap individual undo-able statements within a 

150 DatastoreTransaction block. Multiple statements that can fail 

151 separately should not be part of the same `undoWith` block. 

152 

153 All arguments are forwarded directly to `registerUndo`. 

154 """ 

155 try: 

156 yield None 

157 except BaseException: 

158 raise 

159 else: 

160 self.registerUndo(name, undoFunc, *args, **kwargs) 

161 

162 def rollback(self) -> None: 

163 """Roll back all events in this transaction.""" 

164 log = logging.getLogger(__name__) 

165 while self._log: 

166 ev = self._log.pop() 

167 try: 

168 log.debug( 

169 "Rolling back transaction: %s: %s(%s,%s)", 

170 ev.name, 

171 ev.undoFunc, 

172 ",".join(str(a) for a in ev.args), 

173 ",".join(f"{k}={v}" for k, v in ev.kwargs.items()), 

174 ) 

175 except Exception: 

176 # In case we had a problem in stringification of arguments 

177 log.warning("Rolling back transaction: %s", ev.name) 

178 try: 

179 ev.undoFunc(*ev.args, **ev.kwargs) 

180 except BaseException as e: 

181 # Deliberately swallow error that may occur in unrolling 

182 log.warning("Exception: %s caught while unrolling: %s", e, ev.name) 

183 pass 

184 

185 def commit(self) -> None: 

186 """Commit this transaction.""" 

187 if self.parent is None: 

188 # Just forget about the events, they have already happened. 

189 return 

190 else: 

191 # We may still want to events from this transaction as part of 

192 # the parent. 

193 self.parent._log.extend(self._log) 

194 

195 

196class Datastore(metaclass=ABCMeta): 

197 """Datastore interface. 

198 

199 Parameters 

200 ---------- 

201 config : `DatastoreConfig` or `str` 

202 Load configuration either from an existing config instance or by 

203 referring to a configuration file. 

204 bridgeManager : `DatastoreRegistryBridgeManager` 

205 Object that manages the interface between `Registry` and datastores. 

206 butlerRoot : `str`, optional 

207 New datastore root to use to override the configuration value. 

208 """ 

209 

210 defaultConfigFile: ClassVar[Optional[str]] = None 

211 """Path to configuration defaults. Accessed within the ``config`` resource 

212 or relative to a search path. Can be None if no defaults specified. 

213 """ 

214 

215 containerKey: ClassVar[Optional[str]] = None 

216 """Name of the key containing a list of subconfigurations that also 

217 need to be merged with defaults and will likely use different Python 

218 datastore classes (but all using DatastoreConfig). Assumed to be a 

219 list of configurations that can be represented in a DatastoreConfig 

220 and containing a "cls" definition. None indicates that no containers 

221 are expected in this Datastore.""" 

222 

223 isEphemeral: bool = False 

224 """Indicate whether this Datastore is ephemeral or not. An ephemeral 

225 datastore is one where the contents of the datastore will not exist 

226 across process restarts. This value can change per-instance.""" 

227 

228 config: DatastoreConfig 

229 """Configuration used to create Datastore.""" 

230 

231 name: str 

232 """Label associated with this Datastore.""" 

233 

234 storageClassFactory: StorageClassFactory 

235 """Factory for creating storage class instances from name.""" 

236 

237 constraints: Constraints 

238 """Constraints to apply when putting datasets into the datastore.""" 

239 

240 # MyPy does not like for this to be annotated as any kind of type, because 

241 # it can't do static checking on type variables that can change at runtime. 

242 IngestPrepData: ClassVar[Any] = IngestPrepData 

243 """Helper base class for ingest implementations. 

244 """ 

245 

246 @classmethod 

247 @abstractmethod 

248 def setConfigRoot(cls, root: str, config: Config, full: Config, overwrite: bool = True) -> None: 

249 """Set filesystem-dependent config options for this datastore. 

250 

251 The options will be appropriate for a new empty repository with the 

252 given root. 

253 

254 Parameters 

255 ---------- 

256 root : `str` 

257 Filesystem path to the root of the data repository. 

258 config : `Config` 

259 A `Config` to update. Only the subset understood by 

260 this component will be updated. Will not expand 

261 defaults. 

262 full : `Config` 

263 A complete config with all defaults expanded that can be 

264 converted to a `DatastoreConfig`. Read-only and will not be 

265 modified by this method. 

266 Repository-specific options that should not be obtained 

267 from defaults when Butler instances are constructed 

268 should be copied from ``full`` to ``config``. 

269 overwrite : `bool`, optional 

270 If `False`, do not modify a value in ``config`` if the value 

271 already exists. Default is always to overwrite with the provided 

272 ``root``. 

273 

274 Notes 

275 ----- 

276 If a keyword is explicitly defined in the supplied ``config`` it 

277 will not be overridden by this method if ``overwrite`` is `False`. 

278 This allows explicit values set in external configs to be retained. 

279 """ 

280 raise NotImplementedError() 

281 

282 @staticmethod 

283 def fromConfig( 

284 config: Config, 

285 bridgeManager: DatastoreRegistryBridgeManager, 

286 butlerRoot: Optional[ResourcePathExpression] = None, 

287 ) -> "Datastore": 

288 """Create datastore from type specified in config file. 

289 

290 Parameters 

291 ---------- 

292 config : `Config` 

293 Configuration instance. 

294 bridgeManager : `DatastoreRegistryBridgeManager` 

295 Object that manages the interface between `Registry` and 

296 datastores. 

297 butlerRoot : `str`, optional 

298 Butler root directory. 

299 """ 

300 cls = doImportType(config["datastore", "cls"]) 

301 if not issubclass(cls, Datastore): 

302 raise TypeError(f"Imported child class {config['datastore', 'cls']} is not a Datastore") 

303 return cls(config=config, bridgeManager=bridgeManager, butlerRoot=butlerRoot) 

304 

305 def __init__( 

306 self, 

307 config: Union[Config, str], 

308 bridgeManager: DatastoreRegistryBridgeManager, 

309 butlerRoot: Optional[ResourcePathExpression] = None, 

310 ): 

311 self.config = DatastoreConfig(config) 

312 self.name = "ABCDataStore" 

313 self._transaction: Optional[DatastoreTransaction] = None 

314 

315 # All Datastores need storage classes and constraints 

316 self.storageClassFactory = StorageClassFactory() 

317 

318 # And read the constraints list 

319 constraintsConfig = self.config.get("constraints") 

320 self.constraints = Constraints(constraintsConfig, universe=bridgeManager.universe) 

321 

322 def __str__(self) -> str: 

323 return self.name 

324 

325 def __repr__(self) -> str: 

326 return self.name 

327 

328 @property 

329 def names(self) -> Tuple[str, ...]: 

330 """Names associated with this datastore returned as a list. 

331 

332 Can be different to ``name`` for a chaining datastore. 

333 """ 

334 # Default implementation returns solely the name itself 

335 return (self.name,) 

336 

337 @contextlib.contextmanager 

338 def transaction(self) -> Iterator[DatastoreTransaction]: 

339 """Context manager supporting `Datastore` transactions. 

340 

341 Transactions can be nested, and are to be used in combination with 

342 `Registry.transaction`. 

343 """ 

344 self._transaction = DatastoreTransaction(self._transaction) 

345 try: 

346 yield self._transaction 

347 except BaseException: 

348 self._transaction.rollback() 

349 raise 

350 else: 

351 self._transaction.commit() 

352 self._transaction = self._transaction.parent 

353 

354 @abstractmethod 

355 def knows(self, ref: DatasetRef) -> bool: 

356 """Check if the dataset is known to the datastore. 

357 

358 Does not check for existence of any artifact. 

359 

360 Parameters 

361 ---------- 

362 ref : `DatasetRef` 

363 Reference to the required dataset. 

364 

365 Returns 

366 ------- 

367 exists : `bool` 

368 `True` if the dataset is known to the datastore. 

369 """ 

370 raise NotImplementedError() 

371 

372 def mexists( 

373 self, refs: Iterable[DatasetRef], artifact_existence: Optional[Dict[ResourcePath, bool]] = None 

374 ) -> Dict[DatasetRef, bool]: 

375 """Check the existence of multiple datasets at once. 

376 

377 Parameters 

378 ---------- 

379 refs : iterable of `DatasetRef` 

380 The datasets to be checked. 

381 artifact_existence : `dict` [`lsst.resources.ResourcePath`, `bool`] 

382 Optional mapping of datastore artifact to existence. Updated by 

383 this method with details of all artifacts tested. Can be `None` 

384 if the caller is not interested. 

385 

386 Returns 

387 ------- 

388 existence : `dict` of [`DatasetRef`, `bool`] 

389 Mapping from dataset to boolean indicating existence. 

390 """ 

391 existence: Dict[DatasetRef, bool] = {} 

392 # Non-optimized default. 

393 for ref in refs: 

394 existence[ref] = self.exists(ref) 

395 return existence 

396 

397 @abstractmethod 

398 def exists(self, datasetRef: DatasetRef) -> bool: 

399 """Check if the dataset exists in the datastore. 

400 

401 Parameters 

402 ---------- 

403 datasetRef : `DatasetRef` 

404 Reference to the required dataset. 

405 

406 Returns 

407 ------- 

408 exists : `bool` 

409 `True` if the entity exists in the `Datastore`. 

410 """ 

411 raise NotImplementedError("Must be implemented by subclass") 

412 

413 @abstractmethod 

414 def get(self, datasetRef: DatasetRef, parameters: Mapping[str, Any] = None) -> Any: 

415 """Load an `InMemoryDataset` from the store. 

416 

417 Parameters 

418 ---------- 

419 datasetRef : `DatasetRef` 

420 Reference to the required Dataset. 

421 parameters : `dict` 

422 `StorageClass`-specific parameters that specify a slice of the 

423 Dataset to be loaded. 

424 

425 Returns 

426 ------- 

427 inMemoryDataset : `object` 

428 Requested Dataset or slice thereof as an InMemoryDataset. 

429 """ 

430 raise NotImplementedError("Must be implemented by subclass") 

431 

432 @abstractmethod 

433 def put(self, inMemoryDataset: Any, datasetRef: DatasetRef) -> None: 

434 """Write a `InMemoryDataset` with a given `DatasetRef` to the store. 

435 

436 Parameters 

437 ---------- 

438 inMemoryDataset : `object` 

439 The Dataset to store. 

440 datasetRef : `DatasetRef` 

441 Reference to the associated Dataset. 

442 """ 

443 raise NotImplementedError("Must be implemented by subclass") 

444 

445 def _overrideTransferMode(self, *datasets: FileDataset, transfer: Optional[str] = None) -> Optional[str]: 

446 """Allow ingest transfer mode to be defaulted based on datasets. 

447 

448 Parameters 

449 ---------- 

450 datasets : `FileDataset` 

451 Each positional argument is a struct containing information about 

452 a file to be ingested, including its path (either absolute or 

453 relative to the datastore root, if applicable), a complete 

454 `DatasetRef` (with ``dataset_id not None``), and optionally a 

455 formatter class or its fully-qualified string name. If a formatter 

456 is not provided, this method should populate that attribute with 

457 the formatter the datastore would use for `put`. Subclasses are 

458 also permitted to modify the path attribute (typically to put it 

459 in what the datastore considers its standard form). 

460 transfer : `str`, optional 

461 How (and whether) the dataset should be added to the datastore. 

462 See `ingest` for details of transfer modes. 

463 

464 Returns 

465 ------- 

466 newTransfer : `str` 

467 Transfer mode to use. Will be identical to the supplied transfer 

468 mode unless "auto" is used. 

469 """ 

470 if transfer != "auto": 

471 return transfer 

472 raise RuntimeError(f"{transfer} is not allowed without specialization.") 

473 

474 def _prepIngest(self, *datasets: FileDataset, transfer: Optional[str] = None) -> IngestPrepData: 

475 """Process datasets to identify which ones can be ingested. 

476 

477 Parameters 

478 ---------- 

479 datasets : `FileDataset` 

480 Each positional argument is a struct containing information about 

481 a file to be ingested, including its path (either absolute or 

482 relative to the datastore root, if applicable), a complete 

483 `DatasetRef` (with ``dataset_id not None``), and optionally a 

484 formatter class or its fully-qualified string name. If a formatter 

485 is not provided, this method should populate that attribute with 

486 the formatter the datastore would use for `put`. Subclasses are 

487 also permitted to modify the path attribute (typically to put it 

488 in what the datastore considers its standard form). 

489 transfer : `str`, optional 

490 How (and whether) the dataset should be added to the datastore. 

491 See `ingest` for details of transfer modes. 

492 

493 Returns 

494 ------- 

495 data : `IngestPrepData` 

496 An instance of a subclass of `IngestPrepData`, used to pass 

497 arbitrary data from `_prepIngest` to `_finishIngest`. This should 

498 include only the datasets this datastore can actually ingest; 

499 others should be silently ignored (`Datastore.ingest` will inspect 

500 `IngestPrepData.refs` and raise `DatasetTypeNotSupportedError` if 

501 necessary). 

502 

503 Raises 

504 ------ 

505 NotImplementedError 

506 Raised if the datastore does not support the given transfer mode 

507 (including the case where ingest is not supported at all). 

508 FileNotFoundError 

509 Raised if one of the given files does not exist. 

510 FileExistsError 

511 Raised if transfer is not `None` but the (internal) location the 

512 file would be moved to is already occupied. 

513 

514 Notes 

515 ----- 

516 This method (along with `_finishIngest`) should be implemented by 

517 subclasses to provide ingest support instead of implementing `ingest` 

518 directly. 

519 

520 `_prepIngest` should not modify the data repository or given files in 

521 any way; all changes should be deferred to `_finishIngest`. 

522 

523 When possible, exceptions should be raised in `_prepIngest` instead of 

524 `_finishIngest`. `NotImplementedError` exceptions that indicate that 

525 the transfer mode is not supported must be raised by `_prepIngest` 

526 instead of `_finishIngest`. 

527 """ 

528 raise NotImplementedError(f"Datastore {self} does not support direct file-based ingest.") 

529 

530 def _finishIngest( 

531 self, prepData: IngestPrepData, *, transfer: Optional[str] = None, record_validation_info: bool = True 

532 ) -> None: 

533 """Complete an ingest operation. 

534 

535 Parameters 

536 ---------- 

537 data : `IngestPrepData` 

538 An instance of a subclass of `IngestPrepData`. Guaranteed to be 

539 the direct result of a call to `_prepIngest` on this datastore. 

540 transfer : `str`, optional 

541 How (and whether) the dataset should be added to the datastore. 

542 See `ingest` for details of transfer modes. 

543 record_validation_info : `bool`, optional 

544 If `True`, the default, the datastore can record validation 

545 information associated with the file. If `False` the datastore 

546 will not attempt to track any information such as checksums 

547 or file sizes. This can be useful if such information is tracked 

548 in an external system or if the file is to be compressed in place. 

549 It is up to the datastore whether this parameter is relevant. 

550 

551 Raises 

552 ------ 

553 FileNotFoundError 

554 Raised if one of the given files does not exist. 

555 FileExistsError 

556 Raised if transfer is not `None` but the (internal) location the 

557 file would be moved to is already occupied. 

558 

559 Notes 

560 ----- 

561 This method (along with `_prepIngest`) should be implemented by 

562 subclasses to provide ingest support instead of implementing `ingest` 

563 directly. 

564 """ 

565 raise NotImplementedError(f"Datastore {self} does not support direct file-based ingest.") 

566 

567 def ingest( 

568 self, *datasets: FileDataset, transfer: Optional[str] = None, record_validation_info: bool = True 

569 ) -> None: 

570 """Ingest one or more files into the datastore. 

571 

572 Parameters 

573 ---------- 

574 datasets : `FileDataset` 

575 Each positional argument is a struct containing information about 

576 a file to be ingested, including its path (either absolute or 

577 relative to the datastore root, if applicable), a complete 

578 `DatasetRef` (with ``dataset_id not None``), and optionally a 

579 formatter class or its fully-qualified string name. If a formatter 

580 is not provided, the one the datastore would use for ``put`` on 

581 that dataset is assumed. 

582 transfer : `str`, optional 

583 How (and whether) the dataset should be added to the datastore. 

584 If `None` (default), the file must already be in a location 

585 appropriate for the datastore (e.g. within its root directory), 

586 and will not be modified. Other choices include "move", "copy", 

587 "link", "symlink", "relsymlink", and "hardlink". "link" is a 

588 special transfer mode that will first try to make a hardlink and 

589 if that fails a symlink will be used instead. "relsymlink" creates 

590 a relative symlink rather than use an absolute path. 

591 Most datastores do not support all transfer modes. 

592 "auto" is a special option that will let the 

593 data store choose the most natural option for itself. 

594 record_validation_info : `bool`, optional 

595 If `True`, the default, the datastore can record validation 

596 information associated with the file. If `False` the datastore 

597 will not attempt to track any information such as checksums 

598 or file sizes. This can be useful if such information is tracked 

599 in an external system or if the file is to be compressed in place. 

600 It is up to the datastore whether this parameter is relevant. 

601 

602 Raises 

603 ------ 

604 NotImplementedError 

605 Raised if the datastore does not support the given transfer mode 

606 (including the case where ingest is not supported at all). 

607 DatasetTypeNotSupportedError 

608 Raised if one or more files to be ingested have a dataset type that 

609 is not supported by the datastore. 

610 FileNotFoundError 

611 Raised if one of the given files does not exist. 

612 FileExistsError 

613 Raised if transfer is not `None` but the (internal) location the 

614 file would be moved to is already occupied. 

615 

616 Notes 

617 ----- 

618 Subclasses should implement `_prepIngest` and `_finishIngest` instead 

619 of implementing `ingest` directly. Datastores that hold and 

620 delegate to child datastores may want to call those methods as well. 

621 

622 Subclasses are encouraged to document their supported transfer modes 

623 in their class documentation. 

624 """ 

625 # Allow a datastore to select a default transfer mode 

626 transfer = self._overrideTransferMode(*datasets, transfer=transfer) 

627 prepData = self._prepIngest(*datasets, transfer=transfer) 

628 refs = {ref.id: ref for dataset in datasets for ref in dataset.refs} 

629 if None in refs: 

630 # Find the file for the error message. There may be multiple 

631 # bad refs so look for all of them. 

632 unresolved_paths = {} 

633 for dataset in datasets: 

634 unresolved = [] 

635 for ref in dataset.refs: 

636 if ref.id is None: 

637 unresolved.append(ref) 

638 if unresolved: 

639 unresolved_paths[dataset.path] = unresolved 

640 raise RuntimeError( 

641 "Attempt to ingest unresolved DatasetRef from: " 

642 + ",".join(f"{p}: ({[str(r) for r in ref]})" for p, ref in unresolved_paths.items()) 

643 ) 

644 if refs.keys() != prepData.refs.keys(): 

645 unsupported = refs.keys() - prepData.refs.keys() 

646 # Group unsupported refs by DatasetType for an informative 

647 # but still concise error message. 

648 byDatasetType = defaultdict(list) 

649 for datasetId in unsupported: 

650 ref = refs[datasetId] 

651 byDatasetType[ref.datasetType].append(ref) 

652 raise DatasetTypeNotSupportedError( 

653 "DatasetType(s) not supported in ingest: " 

654 + ", ".join(f"{k.name} ({len(v)} dataset(s))" for k, v in byDatasetType.items()) 

655 ) 

656 self._finishIngest(prepData, transfer=transfer, record_validation_info=record_validation_info) 

657 

658 def transfer_from( 

659 self, 

660 source_datastore: Datastore, 

661 refs: Iterable[DatasetRef], 

662 local_refs: Optional[Iterable[DatasetRef]] = None, 

663 transfer: str = "auto", 

664 artifact_existence: Optional[Dict[ResourcePath, bool]] = None, 

665 ) -> None: 

666 """Transfer dataset artifacts from another datastore to this one. 

667 

668 Parameters 

669 ---------- 

670 source_datastore : `Datastore` 

671 The datastore from which to transfer artifacts. That datastore 

672 must be compatible with this datastore receiving the artifacts. 

673 refs : iterable of `DatasetRef` 

674 The datasets to transfer from the source datastore. 

675 local_refs : iterable of `DatasetRef`, optional 

676 The dataset refs associated with the registry associated with 

677 this datastore. Can be `None` if the source and target datastore 

678 are using UUIDs. 

679 transfer : `str`, optional 

680 How (and whether) the dataset should be added to the datastore. 

681 Choices include "move", "copy", 

682 "link", "symlink", "relsymlink", and "hardlink". "link" is a 

683 special transfer mode that will first try to make a hardlink and 

684 if that fails a symlink will be used instead. "relsymlink" creates 

685 a relative symlink rather than use an absolute path. 

686 Most datastores do not support all transfer modes. 

687 "auto" (the default) is a special option that will let the 

688 data store choose the most natural option for itself. 

689 If the source location and transfer location are identical the 

690 transfer mode will be ignored. 

691 artifact_existence : `dict` [`lsst.resources.ResourcePath`, `bool`] 

692 Optional mapping of datastore artifact to existence. Updated by 

693 this method with details of all artifacts tested. Can be `None` 

694 if the caller is not interested. 

695 

696 Raises 

697 ------ 

698 TypeError 

699 Raised if the two datastores are not compatible. 

700 """ 

701 if type(self) is not type(source_datastore): 

702 raise TypeError( 

703 f"Datastore mismatch between this datastore ({type(self)}) and the " 

704 f"source datastore ({type(source_datastore)})." 

705 ) 

706 

707 raise NotImplementedError(f"Datastore {type(self)} must implement a transfer_from method.") 

708 

709 @abstractmethod 

710 def getURIs( 

711 self, datasetRef: DatasetRef, predict: bool = False 

712 ) -> Tuple[Optional[ResourcePath], Dict[str, ResourcePath]]: 

713 """Return URIs associated with dataset. 

714 

715 Parameters 

716 ---------- 

717 ref : `DatasetRef` 

718 Reference to the required dataset. 

719 predict : `bool`, optional 

720 If the datastore does not know about the dataset, should it 

721 return a predicted URI or not? 

722 

723 Returns 

724 ------- 

725 primary : `lsst.resources.ResourcePath` 

726 The URI to the primary artifact associated with this dataset. 

727 If the dataset was disassembled within the datastore this 

728 may be `None`. 

729 components : `dict` 

730 URIs to any components associated with the dataset artifact. 

731 Can be empty if there are no components. 

732 """ 

733 raise NotImplementedError() 

734 

735 @abstractmethod 

736 def getURI(self, datasetRef: DatasetRef, predict: bool = False) -> ResourcePath: 

737 """URI to the Dataset. 

738 

739 Parameters 

740 ---------- 

741 datasetRef : `DatasetRef` 

742 Reference to the required Dataset. 

743 predict : `bool` 

744 If `True` attempt to predict the URI for a dataset if it does 

745 not exist in datastore. 

746 

747 Returns 

748 ------- 

749 uri : `str` 

750 URI string pointing to the Dataset within the datastore. If the 

751 Dataset does not exist in the datastore, the URI may be a guess. 

752 If the datastore does not have entities that relate well 

753 to the concept of a URI the returned URI string will be 

754 descriptive. The returned URI is not guaranteed to be obtainable. 

755 

756 Raises 

757 ------ 

758 FileNotFoundError 

759 A URI has been requested for a dataset that does not exist and 

760 guessing is not allowed. 

761 """ 

762 raise NotImplementedError("Must be implemented by subclass") 

763 

764 @abstractmethod 

765 def retrieveArtifacts( 

766 self, 

767 refs: Iterable[DatasetRef], 

768 destination: ResourcePath, 

769 transfer: str = "auto", 

770 preserve_path: bool = True, 

771 overwrite: bool = False, 

772 ) -> List[ResourcePath]: 

773 """Retrieve the artifacts associated with the supplied refs. 

774 

775 Parameters 

776 ---------- 

777 refs : iterable of `DatasetRef` 

778 The datasets for which artifacts are to be retrieved. 

779 A single ref can result in multiple artifacts. The refs must 

780 be resolved. 

781 destination : `lsst.resources.ResourcePath` 

782 Location to write the artifacts. 

783 transfer : `str`, optional 

784 Method to use to transfer the artifacts. Must be one of the options 

785 supported by `lsst.resources.ResourcePath.transfer_from()`. 

786 "move" is not allowed. 

787 preserve_path : `bool`, optional 

788 If `True` the full path of the artifact within the datastore 

789 is preserved. If `False` the final file component of the path 

790 is used. 

791 overwrite : `bool`, optional 

792 If `True` allow transfers to overwrite existing files at the 

793 destination. 

794 

795 Returns 

796 ------- 

797 targets : `list` of `lsst.resources.ResourcePath` 

798 URIs of file artifacts in destination location. Order is not 

799 preserved. 

800 

801 Notes 

802 ----- 

803 For non-file datastores the artifacts written to the destination 

804 may not match the representation inside the datastore. For example 

805 a hierarchichal data structure in a NoSQL database may well be stored 

806 as a JSON file. 

807 """ 

808 raise NotImplementedError() 

809 

810 @abstractmethod 

811 def remove(self, datasetRef: DatasetRef) -> None: 

812 """Indicate to the Datastore that a Dataset can be removed. 

813 

814 Parameters 

815 ---------- 

816 datasetRef : `DatasetRef` 

817 Reference to the required Dataset. 

818 

819 Raises 

820 ------ 

821 FileNotFoundError 

822 When Dataset does not exist. 

823 

824 Notes 

825 ----- 

826 Some Datastores may implement this method as a silent no-op to 

827 disable Dataset deletion through standard interfaces. 

828 """ 

829 raise NotImplementedError("Must be implemented by subclass") 

830 

831 @abstractmethod 

832 def forget(self, refs: Iterable[DatasetRef]) -> None: 

833 """Indicate to the Datastore that it should remove all records of the 

834 given datasets, without actually deleting them. 

835 

836 Parameters 

837 ---------- 

838 refs : `Iterable` [ `DatasetRef` ] 

839 References to the datasets being forgotten. 

840 

841 Notes 

842 ----- 

843 Asking a datastore to forget a `DatasetRef` it does not hold should be 

844 a silent no-op, not an error. 

845 """ 

846 raise NotImplementedError("Must be implemented by subclass") 

847 

848 @abstractmethod 

849 def trash(self, ref: Union[DatasetRef, Iterable[DatasetRef]], ignore_errors: bool = True) -> None: 

850 """Indicate to the Datastore that a Dataset can be moved to the trash. 

851 

852 Parameters 

853 ---------- 

854 ref : `DatasetRef` or iterable thereof 

855 Reference(s) to the required Dataset. 

856 ignore_errors : `bool`, optional 

857 Determine whether errors should be ignored. When multiple 

858 refs are being trashed there will be no per-ref check. 

859 

860 Raises 

861 ------ 

862 FileNotFoundError 

863 When Dataset does not exist and errors are not ignored. Only 

864 checked if a single ref is supplied (and not in a list). 

865 

866 Notes 

867 ----- 

868 Some Datastores may implement this method as a silent no-op to 

869 disable Dataset deletion through standard interfaces. 

870 """ 

871 raise NotImplementedError("Must be implemented by subclass") 

872 

873 @abstractmethod 

874 def emptyTrash(self, ignore_errors: bool = True) -> None: 

875 """Remove all datasets from the trash. 

876 

877 Parameters 

878 ---------- 

879 ignore_errors : `bool`, optional 

880 Determine whether errors should be ignored. 

881 

882 Notes 

883 ----- 

884 Some Datastores may implement this method as a silent no-op to 

885 disable Dataset deletion through standard interfaces. 

886 """ 

887 raise NotImplementedError("Must be implemented by subclass") 

888 

889 @abstractmethod 

890 def transfer(self, inputDatastore: Datastore, datasetRef: DatasetRef) -> None: 

891 """Transfer a dataset from another datastore to this datastore. 

892 

893 Parameters 

894 ---------- 

895 inputDatastore : `Datastore` 

896 The external `Datastore` from which to retrieve the Dataset. 

897 datasetRef : `DatasetRef` 

898 Reference to the required Dataset. 

899 """ 

900 raise NotImplementedError("Must be implemented by subclass") 

901 

902 def export( 

903 self, refs: Iterable[DatasetRef], *, directory: Optional[str] = None, transfer: Optional[str] = None 

904 ) -> Iterable[FileDataset]: 

905 """Export datasets for transfer to another data repository. 

906 

907 Parameters 

908 ---------- 

909 refs : iterable of `DatasetRef` 

910 Dataset references to be exported. 

911 directory : `str`, optional 

912 Path to a directory that should contain files corresponding to 

913 output datasets. Ignored if ``transfer`` is `None`. 

914 transfer : `str`, optional 

915 Mode that should be used to move datasets out of the repository. 

916 Valid options are the same as those of the ``transfer`` argument 

917 to ``ingest``, and datastores may similarly signal that a transfer 

918 mode is not supported by raising `NotImplementedError`. 

919 

920 Returns 

921 ------- 

922 dataset : iterable of `DatasetTransfer` 

923 Structs containing information about the exported datasets, in the 

924 same order as ``refs``. 

925 

926 Raises 

927 ------ 

928 NotImplementedError 

929 Raised if the given transfer mode is not supported. 

930 """ 

931 raise NotImplementedError(f"Transfer mode {transfer} not supported.") 

932 

933 @abstractmethod 

934 def validateConfiguration( 

935 self, entities: Iterable[Union[DatasetRef, DatasetType, StorageClass]], logFailures: bool = False 

936 ) -> None: 

937 """Validate some of the configuration for this datastore. 

938 

939 Parameters 

940 ---------- 

941 entities : iterable of `DatasetRef`, `DatasetType`, or `StorageClass` 

942 Entities to test against this configuration. Can be differing 

943 types. 

944 logFailures : `bool`, optional 

945 If `True`, output a log message for every validation error 

946 detected. 

947 

948 Raises 

949 ------ 

950 DatastoreValidationError 

951 Raised if there is a validation problem with a configuration. 

952 

953 Notes 

954 ----- 

955 Which parts of the configuration are validated is at the discretion 

956 of each Datastore implementation. 

957 """ 

958 raise NotImplementedError("Must be implemented by subclass") 

959 

960 @abstractmethod 

961 def validateKey(self, lookupKey: LookupKey, entity: Union[DatasetRef, DatasetType, StorageClass]) -> None: 

962 """Validate a specific look up key with supplied entity. 

963 

964 Parameters 

965 ---------- 

966 lookupKey : `LookupKey` 

967 Key to use to retrieve information from the datastore 

968 configuration. 

969 entity : `DatasetRef`, `DatasetType`, or `StorageClass` 

970 Entity to compare with configuration retrieved using the 

971 specified lookup key. 

972 

973 Raises 

974 ------ 

975 DatastoreValidationError 

976 Raised if there is a problem with the combination of entity 

977 and lookup key. 

978 

979 Notes 

980 ----- 

981 Bypasses the normal selection priorities by allowing a key that 

982 would normally not be selected to be validated. 

983 """ 

984 raise NotImplementedError("Must be implemented by subclass") 

985 

986 @abstractmethod 

987 def getLookupKeys(self) -> Set[LookupKey]: 

988 """Return all the lookup keys relevant to this datastore. 

989 

990 Returns 

991 ------- 

992 keys : `set` of `LookupKey` 

993 The keys stored internally for looking up information based 

994 on `DatasetType` name or `StorageClass`. 

995 """ 

996 raise NotImplementedError("Must be implemented by subclass") 

997 

998 def needs_expanded_data_ids( 

999 self, 

1000 transfer: Optional[str], 

1001 entity: Optional[Union[DatasetRef, DatasetType, StorageClass]] = None, 

1002 ) -> bool: 

1003 """Test whether this datastore needs expanded data IDs to ingest. 

1004 

1005 Parameters 

1006 ---------- 

1007 transfer : `str` or `None` 

1008 Transfer mode for ingest. 

1009 entity, optional 

1010 Object representing what will be ingested. If not provided (or not 

1011 specific enough), `True` may be returned even if expanded data 

1012 IDs aren't necessary. 

1013 

1014 Returns 

1015 ------- 

1016 needed : `bool` 

1017 If `True`, expanded data IDs may be needed. `False` only if 

1018 expansion definitely isn't necessary. 

1019 """ 

1020 return True