Coverage for python/lsst/daf/butler/core/datastore.py: 47%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

212 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Support for generic data stores.""" 

23 

24from __future__ import annotations 

25 

26__all__ = ("DatastoreConfig", "Datastore", "DatastoreValidationError", "DatastoreRecordData") 

27 

28import contextlib 

29import dataclasses 

30import logging 

31from abc import ABCMeta, abstractmethod 

32from collections import defaultdict 

33from typing import ( 

34 TYPE_CHECKING, 

35 Any, 

36 Callable, 

37 ClassVar, 

38 Dict, 

39 Iterable, 

40 Iterator, 

41 List, 

42 Mapping, 

43 Optional, 

44 Set, 

45 Tuple, 

46 Type, 

47 Union, 

48) 

49 

50from lsst.utils import doImportType 

51 

52from .config import Config, ConfigSubset 

53from .constraints import Constraints 

54from .exceptions import DatasetTypeNotSupportedError, ValidationError 

55from .fileDataset import FileDataset 

56from .storageClass import StorageClassFactory 

57 

58if TYPE_CHECKING: 58 ↛ 59line 58 didn't jump to line 59, because the condition on line 58 was never true

59 from lsst.resources import ResourcePath, ResourcePathExpression 

60 

61 from ..registry.interfaces import DatasetIdRef, DatastoreRegistryBridgeManager 

62 from .configSupport import LookupKey 

63 from .datasets import DatasetRef, DatasetType 

64 from .storageClass import StorageClass 

65 from .storedFileInfo import StoredDatastoreItemInfo 

66 

67 

68class DatastoreConfig(ConfigSubset): 

69 """Configuration for Datastores.""" 

70 

71 component = "datastore" 

72 requiredKeys = ("cls",) 

73 defaultConfigFile = "datastore.yaml" 

74 

75 

76class DatastoreValidationError(ValidationError): 

77 """There is a problem with the Datastore configuration.""" 

78 

79 pass 

80 

81 

82@dataclasses.dataclass 

83class DatastoreRecordData: 

84 """A struct that represents a tabular data export from a single 

85 datastore. 

86 """ 

87 

88 refs: List[DatasetIdRef] = dataclasses.field(default_factory=list) 

89 """List of DatasetRefs known to this datastore. 

90 """ 

91 

92 records: Dict[str, List[StoredDatastoreItemInfo]] = dataclasses.field( 92 ↛ exitline 92 didn't jump to the function exit

93 default_factory=lambda: defaultdict(list) 

94 ) 

95 """Opaque table data, grouped by opaque table name. 

96 """ 

97 

98 

99@dataclasses.dataclass(frozen=True) 

100class Event: 

101 __slots__ = {"name", "undoFunc", "args", "kwargs"} 

102 name: str 

103 undoFunc: Callable 

104 args: tuple 

105 kwargs: dict 

106 

107 

108class IngestPrepData: 

109 """A helper base class for `Datastore` ingest implementations. 

110 

111 Datastore implementations will generally need a custom implementation of 

112 this class. 

113 

114 Should be accessed as ``Datastore.IngestPrepData`` instead of via direct 

115 import. 

116 

117 Parameters 

118 ---------- 

119 refs : iterable of `DatasetRef` 

120 References for the datasets that can be ingested by this datastore. 

121 """ 

122 

123 def __init__(self, refs: Iterable[DatasetRef]): 

124 self.refs = {ref.id: ref for ref in refs} 

125 

126 

127class DatastoreTransaction: 

128 """Keeps a log of `Datastore` activity and allow rollback. 

129 

130 Parameters 

131 ---------- 

132 parent : `DatastoreTransaction`, optional 

133 The parent transaction (if any) 

134 """ 

135 

136 Event: ClassVar[Type] = Event 

137 

138 parent: Optional["DatastoreTransaction"] 

139 """The parent transaction. (`DatastoreTransaction`, optional)""" 

140 

141 def __init__(self, parent: Optional[DatastoreTransaction] = None): 

142 self.parent = parent 

143 self._log: List[Event] = [] 

144 

145 def registerUndo(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> None: 

146 """Register event with undo function. 

147 

148 Parameters 

149 ---------- 

150 name : `str` 

151 Name of the event. 

152 undoFunc : func 

153 Function to undo this event. 

154 args : `tuple` 

155 Positional arguments to `undoFunc`. 

156 **kwargs 

157 Keyword arguments to `undoFunc`. 

158 """ 

159 self._log.append(self.Event(name, undoFunc, args, kwargs)) 

160 

161 @contextlib.contextmanager 

162 def undoWith(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> Iterator[None]: 

163 """Register undo function if nested operation succeeds. 

164 

165 Calls `registerUndo`. 

166 

167 This can be used to wrap individual undo-able statements within a 

168 DatastoreTransaction block. Multiple statements that can fail 

169 separately should not be part of the same `undoWith` block. 

170 

171 All arguments are forwarded directly to `registerUndo`. 

172 """ 

173 try: 

174 yield None 

175 except BaseException: 

176 raise 

177 else: 

178 self.registerUndo(name, undoFunc, *args, **kwargs) 

179 

180 def rollback(self) -> None: 

181 """Roll back all events in this transaction.""" 

182 log = logging.getLogger(__name__) 

183 while self._log: 

184 ev = self._log.pop() 

185 try: 

186 log.debug( 

187 "Rolling back transaction: %s: %s(%s,%s)", 

188 ev.name, 

189 ev.undoFunc, 

190 ",".join(str(a) for a in ev.args), 

191 ",".join(f"{k}={v}" for k, v in ev.kwargs.items()), 

192 ) 

193 except Exception: 

194 # In case we had a problem in stringification of arguments 

195 log.warning("Rolling back transaction: %s", ev.name) 

196 try: 

197 ev.undoFunc(*ev.args, **ev.kwargs) 

198 except BaseException as e: 

199 # Deliberately swallow error that may occur in unrolling 

200 log.warning("Exception: %s caught while unrolling: %s", e, ev.name) 

201 pass 

202 

203 def commit(self) -> None: 

204 """Commit this transaction.""" 

205 if self.parent is None: 

206 # Just forget about the events, they have already happened. 

207 return 

208 else: 

209 # We may still want to events from this transaction as part of 

210 # the parent. 

211 self.parent._log.extend(self._log) 

212 

213 

214class Datastore(metaclass=ABCMeta): 

215 """Datastore interface. 

216 

217 Parameters 

218 ---------- 

219 config : `DatastoreConfig` or `str` 

220 Load configuration either from an existing config instance or by 

221 referring to a configuration file. 

222 bridgeManager : `DatastoreRegistryBridgeManager` 

223 Object that manages the interface between `Registry` and datastores. 

224 butlerRoot : `str`, optional 

225 New datastore root to use to override the configuration value. 

226 """ 

227 

228 defaultConfigFile: ClassVar[Optional[str]] = None 

229 """Path to configuration defaults. Accessed within the ``config`` resource 

230 or relative to a search path. Can be None if no defaults specified. 

231 """ 

232 

233 containerKey: ClassVar[Optional[str]] = None 

234 """Name of the key containing a list of subconfigurations that also 

235 need to be merged with defaults and will likely use different Python 

236 datastore classes (but all using DatastoreConfig). Assumed to be a 

237 list of configurations that can be represented in a DatastoreConfig 

238 and containing a "cls" definition. None indicates that no containers 

239 are expected in this Datastore.""" 

240 

241 isEphemeral: bool = False 

242 """Indicate whether this Datastore is ephemeral or not. An ephemeral 

243 datastore is one where the contents of the datastore will not exist 

244 across process restarts. This value can change per-instance.""" 

245 

246 config: DatastoreConfig 

247 """Configuration used to create Datastore.""" 

248 

249 name: str 

250 """Label associated with this Datastore.""" 

251 

252 storageClassFactory: StorageClassFactory 

253 """Factory for creating storage class instances from name.""" 

254 

255 constraints: Constraints 

256 """Constraints to apply when putting datasets into the datastore.""" 

257 

258 # MyPy does not like for this to be annotated as any kind of type, because 

259 # it can't do static checking on type variables that can change at runtime. 

260 IngestPrepData: ClassVar[Any] = IngestPrepData 

261 """Helper base class for ingest implementations. 

262 """ 

263 

264 @classmethod 

265 @abstractmethod 

266 def setConfigRoot(cls, root: str, config: Config, full: Config, overwrite: bool = True) -> None: 

267 """Set filesystem-dependent config options for this datastore. 

268 

269 The options will be appropriate for a new empty repository with the 

270 given root. 

271 

272 Parameters 

273 ---------- 

274 root : `str` 

275 Filesystem path to the root of the data repository. 

276 config : `Config` 

277 A `Config` to update. Only the subset understood by 

278 this component will be updated. Will not expand 

279 defaults. 

280 full : `Config` 

281 A complete config with all defaults expanded that can be 

282 converted to a `DatastoreConfig`. Read-only and will not be 

283 modified by this method. 

284 Repository-specific options that should not be obtained 

285 from defaults when Butler instances are constructed 

286 should be copied from ``full`` to ``config``. 

287 overwrite : `bool`, optional 

288 If `False`, do not modify a value in ``config`` if the value 

289 already exists. Default is always to overwrite with the provided 

290 ``root``. 

291 

292 Notes 

293 ----- 

294 If a keyword is explicitly defined in the supplied ``config`` it 

295 will not be overridden by this method if ``overwrite`` is `False`. 

296 This allows explicit values set in external configs to be retained. 

297 """ 

298 raise NotImplementedError() 

299 

300 @staticmethod 

301 def fromConfig( 

302 config: Config, 

303 bridgeManager: DatastoreRegistryBridgeManager, 

304 butlerRoot: Optional[ResourcePathExpression] = None, 

305 ) -> "Datastore": 

306 """Create datastore from type specified in config file. 

307 

308 Parameters 

309 ---------- 

310 config : `Config` 

311 Configuration instance. 

312 bridgeManager : `DatastoreRegistryBridgeManager` 

313 Object that manages the interface between `Registry` and 

314 datastores. 

315 butlerRoot : `str`, optional 

316 Butler root directory. 

317 """ 

318 cls = doImportType(config["datastore", "cls"]) 

319 if not issubclass(cls, Datastore): 

320 raise TypeError(f"Imported child class {config['datastore', 'cls']} is not a Datastore") 

321 return cls(config=config, bridgeManager=bridgeManager, butlerRoot=butlerRoot) 

322 

323 def __init__( 

324 self, 

325 config: Union[Config, str], 

326 bridgeManager: DatastoreRegistryBridgeManager, 

327 butlerRoot: Optional[ResourcePathExpression] = None, 

328 ): 

329 self.config = DatastoreConfig(config) 

330 self.name = "ABCDataStore" 

331 self._transaction: Optional[DatastoreTransaction] = None 

332 

333 # All Datastores need storage classes and constraints 

334 self.storageClassFactory = StorageClassFactory() 

335 

336 # And read the constraints list 

337 constraintsConfig = self.config.get("constraints") 

338 self.constraints = Constraints(constraintsConfig, universe=bridgeManager.universe) 

339 

340 def __str__(self) -> str: 

341 return self.name 

342 

343 def __repr__(self) -> str: 

344 return self.name 

345 

346 @property 

347 def names(self) -> Tuple[str, ...]: 

348 """Names associated with this datastore returned as a list. 

349 

350 Can be different to ``name`` for a chaining datastore. 

351 """ 

352 # Default implementation returns solely the name itself 

353 return (self.name,) 

354 

355 @contextlib.contextmanager 

356 def transaction(self) -> Iterator[DatastoreTransaction]: 

357 """Context manager supporting `Datastore` transactions. 

358 

359 Transactions can be nested, and are to be used in combination with 

360 `Registry.transaction`. 

361 """ 

362 self._transaction = DatastoreTransaction(self._transaction) 

363 try: 

364 yield self._transaction 

365 except BaseException: 

366 self._transaction.rollback() 

367 raise 

368 else: 

369 self._transaction.commit() 

370 self._transaction = self._transaction.parent 

371 

372 @abstractmethod 

373 def knows(self, ref: DatasetRef) -> bool: 

374 """Check if the dataset is known to the datastore. 

375 

376 Does not check for existence of any artifact. 

377 

378 Parameters 

379 ---------- 

380 ref : `DatasetRef` 

381 Reference to the required dataset. 

382 

383 Returns 

384 ------- 

385 exists : `bool` 

386 `True` if the dataset is known to the datastore. 

387 """ 

388 raise NotImplementedError() 

389 

390 def mexists( 

391 self, refs: Iterable[DatasetRef], artifact_existence: Optional[Dict[ResourcePath, bool]] = None 

392 ) -> Dict[DatasetRef, bool]: 

393 """Check the existence of multiple datasets at once. 

394 

395 Parameters 

396 ---------- 

397 refs : iterable of `DatasetRef` 

398 The datasets to be checked. 

399 artifact_existence : `dict` [`lsst.resources.ResourcePath`, `bool`] 

400 Optional mapping of datastore artifact to existence. Updated by 

401 this method with details of all artifacts tested. Can be `None` 

402 if the caller is not interested. 

403 

404 Returns 

405 ------- 

406 existence : `dict` of [`DatasetRef`, `bool`] 

407 Mapping from dataset to boolean indicating existence. 

408 """ 

409 existence: Dict[DatasetRef, bool] = {} 

410 # Non-optimized default. 

411 for ref in refs: 

412 existence[ref] = self.exists(ref) 

413 return existence 

414 

415 @abstractmethod 

416 def exists(self, datasetRef: DatasetRef) -> bool: 

417 """Check if the dataset exists in the datastore. 

418 

419 Parameters 

420 ---------- 

421 datasetRef : `DatasetRef` 

422 Reference to the required dataset. 

423 

424 Returns 

425 ------- 

426 exists : `bool` 

427 `True` if the entity exists in the `Datastore`. 

428 """ 

429 raise NotImplementedError("Must be implemented by subclass") 

430 

431 @abstractmethod 

432 def get(self, datasetRef: DatasetRef, parameters: Mapping[str, Any] = None) -> Any: 

433 """Load an `InMemoryDataset` from the store. 

434 

435 Parameters 

436 ---------- 

437 datasetRef : `DatasetRef` 

438 Reference to the required Dataset. 

439 parameters : `dict` 

440 `StorageClass`-specific parameters that specify a slice of the 

441 Dataset to be loaded. 

442 

443 Returns 

444 ------- 

445 inMemoryDataset : `object` 

446 Requested Dataset or slice thereof as an InMemoryDataset. 

447 """ 

448 raise NotImplementedError("Must be implemented by subclass") 

449 

450 @abstractmethod 

451 def put(self, inMemoryDataset: Any, datasetRef: DatasetRef) -> None: 

452 """Write a `InMemoryDataset` with a given `DatasetRef` to the store. 

453 

454 Parameters 

455 ---------- 

456 inMemoryDataset : `object` 

457 The Dataset to store. 

458 datasetRef : `DatasetRef` 

459 Reference to the associated Dataset. 

460 """ 

461 raise NotImplementedError("Must be implemented by subclass") 

462 

463 def _overrideTransferMode(self, *datasets: FileDataset, transfer: Optional[str] = None) -> Optional[str]: 

464 """Allow ingest transfer mode to be defaulted based on datasets. 

465 

466 Parameters 

467 ---------- 

468 datasets : `FileDataset` 

469 Each positional argument is a struct containing information about 

470 a file to be ingested, including its path (either absolute or 

471 relative to the datastore root, if applicable), a complete 

472 `DatasetRef` (with ``dataset_id not None``), and optionally a 

473 formatter class or its fully-qualified string name. If a formatter 

474 is not provided, this method should populate that attribute with 

475 the formatter the datastore would use for `put`. Subclasses are 

476 also permitted to modify the path attribute (typically to put it 

477 in what the datastore considers its standard form). 

478 transfer : `str`, optional 

479 How (and whether) the dataset should be added to the datastore. 

480 See `ingest` for details of transfer modes. 

481 

482 Returns 

483 ------- 

484 newTransfer : `str` 

485 Transfer mode to use. Will be identical to the supplied transfer 

486 mode unless "auto" is used. 

487 """ 

488 if transfer != "auto": 

489 return transfer 

490 raise RuntimeError(f"{transfer} is not allowed without specialization.") 

491 

492 def _prepIngest(self, *datasets: FileDataset, transfer: Optional[str] = None) -> IngestPrepData: 

493 """Process datasets to identify which ones can be ingested. 

494 

495 Parameters 

496 ---------- 

497 datasets : `FileDataset` 

498 Each positional argument is a struct containing information about 

499 a file to be ingested, including its path (either absolute or 

500 relative to the datastore root, if applicable), a complete 

501 `DatasetRef` (with ``dataset_id not None``), and optionally a 

502 formatter class or its fully-qualified string name. If a formatter 

503 is not provided, this method should populate that attribute with 

504 the formatter the datastore would use for `put`. Subclasses are 

505 also permitted to modify the path attribute (typically to put it 

506 in what the datastore considers its standard form). 

507 transfer : `str`, optional 

508 How (and whether) the dataset should be added to the datastore. 

509 See `ingest` for details of transfer modes. 

510 

511 Returns 

512 ------- 

513 data : `IngestPrepData` 

514 An instance of a subclass of `IngestPrepData`, used to pass 

515 arbitrary data from `_prepIngest` to `_finishIngest`. This should 

516 include only the datasets this datastore can actually ingest; 

517 others should be silently ignored (`Datastore.ingest` will inspect 

518 `IngestPrepData.refs` and raise `DatasetTypeNotSupportedError` if 

519 necessary). 

520 

521 Raises 

522 ------ 

523 NotImplementedError 

524 Raised if the datastore does not support the given transfer mode 

525 (including the case where ingest is not supported at all). 

526 FileNotFoundError 

527 Raised if one of the given files does not exist. 

528 FileExistsError 

529 Raised if transfer is not `None` but the (internal) location the 

530 file would be moved to is already occupied. 

531 

532 Notes 

533 ----- 

534 This method (along with `_finishIngest`) should be implemented by 

535 subclasses to provide ingest support instead of implementing `ingest` 

536 directly. 

537 

538 `_prepIngest` should not modify the data repository or given files in 

539 any way; all changes should be deferred to `_finishIngest`. 

540 

541 When possible, exceptions should be raised in `_prepIngest` instead of 

542 `_finishIngest`. `NotImplementedError` exceptions that indicate that 

543 the transfer mode is not supported must be raised by `_prepIngest` 

544 instead of `_finishIngest`. 

545 """ 

546 raise NotImplementedError(f"Datastore {self} does not support direct file-based ingest.") 

547 

548 def _finishIngest( 

549 self, prepData: IngestPrepData, *, transfer: Optional[str] = None, record_validation_info: bool = True 

550 ) -> None: 

551 """Complete an ingest operation. 

552 

553 Parameters 

554 ---------- 

555 data : `IngestPrepData` 

556 An instance of a subclass of `IngestPrepData`. Guaranteed to be 

557 the direct result of a call to `_prepIngest` on this datastore. 

558 transfer : `str`, optional 

559 How (and whether) the dataset should be added to the datastore. 

560 See `ingest` for details of transfer modes. 

561 record_validation_info : `bool`, optional 

562 If `True`, the default, the datastore can record validation 

563 information associated with the file. If `False` the datastore 

564 will not attempt to track any information such as checksums 

565 or file sizes. This can be useful if such information is tracked 

566 in an external system or if the file is to be compressed in place. 

567 It is up to the datastore whether this parameter is relevant. 

568 

569 Raises 

570 ------ 

571 FileNotFoundError 

572 Raised if one of the given files does not exist. 

573 FileExistsError 

574 Raised if transfer is not `None` but the (internal) location the 

575 file would be moved to is already occupied. 

576 

577 Notes 

578 ----- 

579 This method (along with `_prepIngest`) should be implemented by 

580 subclasses to provide ingest support instead of implementing `ingest` 

581 directly. 

582 """ 

583 raise NotImplementedError(f"Datastore {self} does not support direct file-based ingest.") 

584 

585 def ingest( 

586 self, *datasets: FileDataset, transfer: Optional[str] = None, record_validation_info: bool = True 

587 ) -> None: 

588 """Ingest one or more files into the datastore. 

589 

590 Parameters 

591 ---------- 

592 datasets : `FileDataset` 

593 Each positional argument is a struct containing information about 

594 a file to be ingested, including its path (either absolute or 

595 relative to the datastore root, if applicable), a complete 

596 `DatasetRef` (with ``dataset_id not None``), and optionally a 

597 formatter class or its fully-qualified string name. If a formatter 

598 is not provided, the one the datastore would use for ``put`` on 

599 that dataset is assumed. 

600 transfer : `str`, optional 

601 How (and whether) the dataset should be added to the datastore. 

602 If `None` (default), the file must already be in a location 

603 appropriate for the datastore (e.g. within its root directory), 

604 and will not be modified. Other choices include "move", "copy", 

605 "link", "symlink", "relsymlink", and "hardlink". "link" is a 

606 special transfer mode that will first try to make a hardlink and 

607 if that fails a symlink will be used instead. "relsymlink" creates 

608 a relative symlink rather than use an absolute path. 

609 Most datastores do not support all transfer modes. 

610 "auto" is a special option that will let the 

611 data store choose the most natural option for itself. 

612 record_validation_info : `bool`, optional 

613 If `True`, the default, the datastore can record validation 

614 information associated with the file. If `False` the datastore 

615 will not attempt to track any information such as checksums 

616 or file sizes. This can be useful if such information is tracked 

617 in an external system or if the file is to be compressed in place. 

618 It is up to the datastore whether this parameter is relevant. 

619 

620 Raises 

621 ------ 

622 NotImplementedError 

623 Raised if the datastore does not support the given transfer mode 

624 (including the case where ingest is not supported at all). 

625 DatasetTypeNotSupportedError 

626 Raised if one or more files to be ingested have a dataset type that 

627 is not supported by the datastore. 

628 FileNotFoundError 

629 Raised if one of the given files does not exist. 

630 FileExistsError 

631 Raised if transfer is not `None` but the (internal) location the 

632 file would be moved to is already occupied. 

633 

634 Notes 

635 ----- 

636 Subclasses should implement `_prepIngest` and `_finishIngest` instead 

637 of implementing `ingest` directly. Datastores that hold and 

638 delegate to child datastores may want to call those methods as well. 

639 

640 Subclasses are encouraged to document their supported transfer modes 

641 in their class documentation. 

642 """ 

643 # Allow a datastore to select a default transfer mode 

644 transfer = self._overrideTransferMode(*datasets, transfer=transfer) 

645 prepData = self._prepIngest(*datasets, transfer=transfer) 

646 refs = {ref.id: ref for dataset in datasets for ref in dataset.refs} 

647 if None in refs: 

648 # Find the file for the error message. There may be multiple 

649 # bad refs so look for all of them. 

650 unresolved_paths = {} 

651 for dataset in datasets: 

652 unresolved = [] 

653 for ref in dataset.refs: 

654 if ref.id is None: 

655 unresolved.append(ref) 

656 if unresolved: 

657 unresolved_paths[dataset.path] = unresolved 

658 raise RuntimeError( 

659 "Attempt to ingest unresolved DatasetRef from: " 

660 + ",".join(f"{p}: ({[str(r) for r in ref]})" for p, ref in unresolved_paths.items()) 

661 ) 

662 if refs.keys() != prepData.refs.keys(): 

663 unsupported = refs.keys() - prepData.refs.keys() 

664 # Group unsupported refs by DatasetType for an informative 

665 # but still concise error message. 

666 byDatasetType = defaultdict(list) 

667 for datasetId in unsupported: 

668 ref = refs[datasetId] 

669 byDatasetType[ref.datasetType].append(ref) 

670 raise DatasetTypeNotSupportedError( 

671 "DatasetType(s) not supported in ingest: " 

672 + ", ".join(f"{k.name} ({len(v)} dataset(s))" for k, v in byDatasetType.items()) 

673 ) 

674 self._finishIngest(prepData, transfer=transfer, record_validation_info=record_validation_info) 

675 

676 def transfer_from( 

677 self, 

678 source_datastore: Datastore, 

679 refs: Iterable[DatasetRef], 

680 local_refs: Optional[Iterable[DatasetRef]] = None, 

681 transfer: str = "auto", 

682 artifact_existence: Optional[Dict[ResourcePath, bool]] = None, 

683 ) -> None: 

684 """Transfer dataset artifacts from another datastore to this one. 

685 

686 Parameters 

687 ---------- 

688 source_datastore : `Datastore` 

689 The datastore from which to transfer artifacts. That datastore 

690 must be compatible with this datastore receiving the artifacts. 

691 refs : iterable of `DatasetRef` 

692 The datasets to transfer from the source datastore. 

693 local_refs : iterable of `DatasetRef`, optional 

694 The dataset refs associated with the registry associated with 

695 this datastore. Can be `None` if the source and target datastore 

696 are using UUIDs. 

697 transfer : `str`, optional 

698 How (and whether) the dataset should be added to the datastore. 

699 Choices include "move", "copy", 

700 "link", "symlink", "relsymlink", and "hardlink". "link" is a 

701 special transfer mode that will first try to make a hardlink and 

702 if that fails a symlink will be used instead. "relsymlink" creates 

703 a relative symlink rather than use an absolute path. 

704 Most datastores do not support all transfer modes. 

705 "auto" (the default) is a special option that will let the 

706 data store choose the most natural option for itself. 

707 If the source location and transfer location are identical the 

708 transfer mode will be ignored. 

709 artifact_existence : `dict` [`lsst.resources.ResourcePath`, `bool`] 

710 Optional mapping of datastore artifact to existence. Updated by 

711 this method with details of all artifacts tested. Can be `None` 

712 if the caller is not interested. 

713 

714 Raises 

715 ------ 

716 TypeError 

717 Raised if the two datastores are not compatible. 

718 """ 

719 if type(self) is not type(source_datastore): 

720 raise TypeError( 

721 f"Datastore mismatch between this datastore ({type(self)}) and the " 

722 f"source datastore ({type(source_datastore)})." 

723 ) 

724 

725 raise NotImplementedError(f"Datastore {type(self)} must implement a transfer_from method.") 

726 

727 @abstractmethod 

728 def getURIs( 

729 self, datasetRef: DatasetRef, predict: bool = False 

730 ) -> Tuple[Optional[ResourcePath], Dict[str, ResourcePath]]: 

731 """Return URIs associated with dataset. 

732 

733 Parameters 

734 ---------- 

735 ref : `DatasetRef` 

736 Reference to the required dataset. 

737 predict : `bool`, optional 

738 If the datastore does not know about the dataset, should it 

739 return a predicted URI or not? 

740 

741 Returns 

742 ------- 

743 primary : `lsst.resources.ResourcePath` 

744 The URI to the primary artifact associated with this dataset. 

745 If the dataset was disassembled within the datastore this 

746 may be `None`. 

747 components : `dict` 

748 URIs to any components associated with the dataset artifact. 

749 Can be empty if there are no components. 

750 """ 

751 raise NotImplementedError() 

752 

753 @abstractmethod 

754 def getURI(self, datasetRef: DatasetRef, predict: bool = False) -> ResourcePath: 

755 """URI to the Dataset. 

756 

757 Parameters 

758 ---------- 

759 datasetRef : `DatasetRef` 

760 Reference to the required Dataset. 

761 predict : `bool` 

762 If `True` attempt to predict the URI for a dataset if it does 

763 not exist in datastore. 

764 

765 Returns 

766 ------- 

767 uri : `str` 

768 URI string pointing to the Dataset within the datastore. If the 

769 Dataset does not exist in the datastore, the URI may be a guess. 

770 If the datastore does not have entities that relate well 

771 to the concept of a URI the returned URI string will be 

772 descriptive. The returned URI is not guaranteed to be obtainable. 

773 

774 Raises 

775 ------ 

776 FileNotFoundError 

777 A URI has been requested for a dataset that does not exist and 

778 guessing is not allowed. 

779 """ 

780 raise NotImplementedError("Must be implemented by subclass") 

781 

782 @abstractmethod 

783 def retrieveArtifacts( 

784 self, 

785 refs: Iterable[DatasetRef], 

786 destination: ResourcePath, 

787 transfer: str = "auto", 

788 preserve_path: bool = True, 

789 overwrite: bool = False, 

790 ) -> List[ResourcePath]: 

791 """Retrieve the artifacts associated with the supplied refs. 

792 

793 Parameters 

794 ---------- 

795 refs : iterable of `DatasetRef` 

796 The datasets for which artifacts are to be retrieved. 

797 A single ref can result in multiple artifacts. The refs must 

798 be resolved. 

799 destination : `lsst.resources.ResourcePath` 

800 Location to write the artifacts. 

801 transfer : `str`, optional 

802 Method to use to transfer the artifacts. Must be one of the options 

803 supported by `lsst.resources.ResourcePath.transfer_from()`. 

804 "move" is not allowed. 

805 preserve_path : `bool`, optional 

806 If `True` the full path of the artifact within the datastore 

807 is preserved. If `False` the final file component of the path 

808 is used. 

809 overwrite : `bool`, optional 

810 If `True` allow transfers to overwrite existing files at the 

811 destination. 

812 

813 Returns 

814 ------- 

815 targets : `list` of `lsst.resources.ResourcePath` 

816 URIs of file artifacts in destination location. Order is not 

817 preserved. 

818 

819 Notes 

820 ----- 

821 For non-file datastores the artifacts written to the destination 

822 may not match the representation inside the datastore. For example 

823 a hierarchichal data structure in a NoSQL database may well be stored 

824 as a JSON file. 

825 """ 

826 raise NotImplementedError() 

827 

828 @abstractmethod 

829 def remove(self, datasetRef: DatasetRef) -> None: 

830 """Indicate to the Datastore that a Dataset can be removed. 

831 

832 Parameters 

833 ---------- 

834 datasetRef : `DatasetRef` 

835 Reference to the required Dataset. 

836 

837 Raises 

838 ------ 

839 FileNotFoundError 

840 When Dataset does not exist. 

841 

842 Notes 

843 ----- 

844 Some Datastores may implement this method as a silent no-op to 

845 disable Dataset deletion through standard interfaces. 

846 """ 

847 raise NotImplementedError("Must be implemented by subclass") 

848 

849 @abstractmethod 

850 def forget(self, refs: Iterable[DatasetRef]) -> None: 

851 """Indicate to the Datastore that it should remove all records of the 

852 given datasets, without actually deleting them. 

853 

854 Parameters 

855 ---------- 

856 refs : `Iterable` [ `DatasetRef` ] 

857 References to the datasets being forgotten. 

858 

859 Notes 

860 ----- 

861 Asking a datastore to forget a `DatasetRef` it does not hold should be 

862 a silent no-op, not an error. 

863 """ 

864 raise NotImplementedError("Must be implemented by subclass") 

865 

866 @abstractmethod 

867 def trash(self, ref: Union[DatasetRef, Iterable[DatasetRef]], ignore_errors: bool = True) -> None: 

868 """Indicate to the Datastore that a Dataset can be moved to the trash. 

869 

870 Parameters 

871 ---------- 

872 ref : `DatasetRef` or iterable thereof 

873 Reference(s) to the required Dataset. 

874 ignore_errors : `bool`, optional 

875 Determine whether errors should be ignored. When multiple 

876 refs are being trashed there will be no per-ref check. 

877 

878 Raises 

879 ------ 

880 FileNotFoundError 

881 When Dataset does not exist and errors are not ignored. Only 

882 checked if a single ref is supplied (and not in a list). 

883 

884 Notes 

885 ----- 

886 Some Datastores may implement this method as a silent no-op to 

887 disable Dataset deletion through standard interfaces. 

888 """ 

889 raise NotImplementedError("Must be implemented by subclass") 

890 

891 @abstractmethod 

892 def emptyTrash(self, ignore_errors: bool = True) -> None: 

893 """Remove all datasets from the trash. 

894 

895 Parameters 

896 ---------- 

897 ignore_errors : `bool`, optional 

898 Determine whether errors should be ignored. 

899 

900 Notes 

901 ----- 

902 Some Datastores may implement this method as a silent no-op to 

903 disable Dataset deletion through standard interfaces. 

904 """ 

905 raise NotImplementedError("Must be implemented by subclass") 

906 

907 @abstractmethod 

908 def transfer(self, inputDatastore: Datastore, datasetRef: DatasetRef) -> None: 

909 """Transfer a dataset from another datastore to this datastore. 

910 

911 Parameters 

912 ---------- 

913 inputDatastore : `Datastore` 

914 The external `Datastore` from which to retrieve the Dataset. 

915 datasetRef : `DatasetRef` 

916 Reference to the required Dataset. 

917 """ 

918 raise NotImplementedError("Must be implemented by subclass") 

919 

920 def export( 

921 self, refs: Iterable[DatasetRef], *, directory: Optional[str] = None, transfer: Optional[str] = None 

922 ) -> Iterable[FileDataset]: 

923 """Export datasets for transfer to another data repository. 

924 

925 Parameters 

926 ---------- 

927 refs : iterable of `DatasetRef` 

928 Dataset references to be exported. 

929 directory : `str`, optional 

930 Path to a directory that should contain files corresponding to 

931 output datasets. Ignored if ``transfer`` is `None`. 

932 transfer : `str`, optional 

933 Mode that should be used to move datasets out of the repository. 

934 Valid options are the same as those of the ``transfer`` argument 

935 to ``ingest``, and datastores may similarly signal that a transfer 

936 mode is not supported by raising `NotImplementedError`. 

937 

938 Returns 

939 ------- 

940 dataset : iterable of `DatasetTransfer` 

941 Structs containing information about the exported datasets, in the 

942 same order as ``refs``. 

943 

944 Raises 

945 ------ 

946 NotImplementedError 

947 Raised if the given transfer mode is not supported. 

948 """ 

949 raise NotImplementedError(f"Transfer mode {transfer} not supported.") 

950 

951 @abstractmethod 

952 def validateConfiguration( 

953 self, entities: Iterable[Union[DatasetRef, DatasetType, StorageClass]], logFailures: bool = False 

954 ) -> None: 

955 """Validate some of the configuration for this datastore. 

956 

957 Parameters 

958 ---------- 

959 entities : iterable of `DatasetRef`, `DatasetType`, or `StorageClass` 

960 Entities to test against this configuration. Can be differing 

961 types. 

962 logFailures : `bool`, optional 

963 If `True`, output a log message for every validation error 

964 detected. 

965 

966 Raises 

967 ------ 

968 DatastoreValidationError 

969 Raised if there is a validation problem with a configuration. 

970 

971 Notes 

972 ----- 

973 Which parts of the configuration are validated is at the discretion 

974 of each Datastore implementation. 

975 """ 

976 raise NotImplementedError("Must be implemented by subclass") 

977 

978 @abstractmethod 

979 def validateKey(self, lookupKey: LookupKey, entity: Union[DatasetRef, DatasetType, StorageClass]) -> None: 

980 """Validate a specific look up key with supplied entity. 

981 

982 Parameters 

983 ---------- 

984 lookupKey : `LookupKey` 

985 Key to use to retrieve information from the datastore 

986 configuration. 

987 entity : `DatasetRef`, `DatasetType`, or `StorageClass` 

988 Entity to compare with configuration retrieved using the 

989 specified lookup key. 

990 

991 Raises 

992 ------ 

993 DatastoreValidationError 

994 Raised if there is a problem with the combination of entity 

995 and lookup key. 

996 

997 Notes 

998 ----- 

999 Bypasses the normal selection priorities by allowing a key that 

1000 would normally not be selected to be validated. 

1001 """ 

1002 raise NotImplementedError("Must be implemented by subclass") 

1003 

1004 @abstractmethod 

1005 def getLookupKeys(self) -> Set[LookupKey]: 

1006 """Return all the lookup keys relevant to this datastore. 

1007 

1008 Returns 

1009 ------- 

1010 keys : `set` of `LookupKey` 

1011 The keys stored internally for looking up information based 

1012 on `DatasetType` name or `StorageClass`. 

1013 """ 

1014 raise NotImplementedError("Must be implemented by subclass") 

1015 

1016 def needs_expanded_data_ids( 

1017 self, 

1018 transfer: Optional[str], 

1019 entity: Optional[Union[DatasetRef, DatasetType, StorageClass]] = None, 

1020 ) -> bool: 

1021 """Test whether this datastore needs expanded data IDs to ingest. 

1022 

1023 Parameters 

1024 ---------- 

1025 transfer : `str` or `None` 

1026 Transfer mode for ingest. 

1027 entity, optional 

1028 Object representing what will be ingested. If not provided (or not 

1029 specific enough), `True` may be returned even if expanded data 

1030 IDs aren't necessary. 

1031 

1032 Returns 

1033 ------- 

1034 needed : `bool` 

1035 If `True`, expanded data IDs may be needed. `False` only if 

1036 expansion definitely isn't necessary. 

1037 """ 

1038 return True 

1039 

1040 @abstractmethod 

1041 def import_records( 

1042 self, 

1043 data: Mapping[str, DatastoreRecordData], 

1044 ) -> None: 

1045 """Import datastore location and record data from an in-memory data 

1046 structure. 

1047 

1048 Parameters 

1049 ---------- 

1050 data : `Mapping` [ `str`, `DatastoreRecordData` ] 

1051 Datastore records indexed by datastore name. May contain data for 

1052 other `Datastore` instances (generally because they are chained to 

1053 this one), which should be ignored. 

1054 

1055 Notes 

1056 ----- 

1057 Implementations should generally not check that any external resources 

1058 (e.g. files) referred to by these records actually exist, for 

1059 performance reasons; we expect higher-level code to guarantee that they 

1060 do. 

1061 

1062 Implementations are responsible for calling 

1063 `DatastoreRegistryBridge.insert` on all datasets in ``data.locations`` 

1064 where the key is in `names`, as well as loading any opaque table data. 

1065 """ 

1066 raise NotImplementedError() 

1067 

1068 @abstractmethod 

1069 def export_records( 

1070 self, 

1071 refs: Iterable[DatasetIdRef], 

1072 ) -> Mapping[str, DatastoreRecordData]: 

1073 """Export datastore records and locations to an in-memory data 

1074 structure. 

1075 

1076 Parameters 

1077 ---------- 

1078 refs : `Iterable` [ `DatasetIdRef` ] 

1079 Datasets to save. This may include datasets not known to this 

1080 datastore, which should be ignored. 

1081 

1082 Returns 

1083 ------- 

1084 data : `Mapping` [ `str`, `DatastoreRecordData` ] 

1085 Exported datastore records indexed by datastore name. 

1086 """ 

1087 raise NotImplementedError()