Coverage for python/lsst/daf/butler/core/datastore.py: 46%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

209 statements  

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Support for generic data stores.""" 

23 

24from __future__ import annotations 

25 

26__all__ = ("DatastoreConfig", "Datastore", "DatastoreValidationError", "DatastoreRecordData") 

27 

28import contextlib 

29import dataclasses 

30import logging 

31from abc import ABCMeta, abstractmethod 

32from collections import defaultdict 

33from typing import ( 

34 TYPE_CHECKING, 

35 Any, 

36 Callable, 

37 ClassVar, 

38 Dict, 

39 Iterable, 

40 Iterator, 

41 List, 

42 Mapping, 

43 Optional, 

44 Set, 

45 Tuple, 

46 Type, 

47 Union, 

48) 

49 

50from lsst.utils import doImportType 

51 

52from .config import Config, ConfigSubset 

53from .constraints import Constraints 

54from .exceptions import DatasetTypeNotSupportedError, ValidationError 

55from .fileDataset import FileDataset 

56from .storageClass import StorageClassFactory 

57 

58if TYPE_CHECKING: 58 ↛ 59line 58 didn't jump to line 59, because the condition on line 58 was never true

59 from lsst.resources import ResourcePath, ResourcePathExpression 

60 

61 from ..registry.interfaces import DatasetIdRef, DatastoreRegistryBridgeManager 

62 from .configSupport import LookupKey 

63 from .datasets import DatasetRef, DatasetType 

64 from .storageClass import StorageClass 

65 

66 

67class DatastoreConfig(ConfigSubset): 

68 """Configuration for Datastores.""" 

69 

70 component = "datastore" 

71 requiredKeys = ("cls",) 

72 defaultConfigFile = "datastore.yaml" 

73 

74 

75class DatastoreValidationError(ValidationError): 

76 """There is a problem with the Datastore configuration.""" 

77 

78 pass 

79 

80 

81@dataclasses.dataclass 

82class DatastoreRecordData: 

83 """A struct that represents a tabular data export from one or more 

84 datastores. 

85 """ 

86 

87 locations: Dict[str, List[DatasetIdRef]] = dataclasses.field(default_factory=lambda: defaultdict(list)) 87 ↛ exitline 87 didn't run the lambda on line 87

88 """Mapping from datastore name to the datasets in that datastore. 

89 """ 

90 

91 records: Dict[str, List[Dict[str, Any]]] = dataclasses.field(default_factory=lambda: defaultdict(list)) 91 ↛ exitline 91 didn't run the lambda on line 91

92 """Opaque table data that backs one or more datastores, grouped by 

93 opaque table name. 

94 """ 

95 

96 

97@dataclasses.dataclass(frozen=True) 

98class Event: 

99 __slots__ = {"name", "undoFunc", "args", "kwargs"} 

100 name: str 

101 undoFunc: Callable 

102 args: tuple 

103 kwargs: dict 

104 

105 

106class IngestPrepData: 

107 """A helper base class for `Datastore` ingest implementations. 

108 

109 Datastore implementations will generally need a custom implementation of 

110 this class. 

111 

112 Should be accessed as ``Datastore.IngestPrepData`` instead of via direct 

113 import. 

114 

115 Parameters 

116 ---------- 

117 refs : iterable of `DatasetRef` 

118 References for the datasets that can be ingested by this datastore. 

119 """ 

120 

121 def __init__(self, refs: Iterable[DatasetRef]): 

122 self.refs = {ref.id: ref for ref in refs} 

123 

124 

125class DatastoreTransaction: 

126 """Keeps a log of `Datastore` activity and allow rollback. 

127 

128 Parameters 

129 ---------- 

130 parent : `DatastoreTransaction`, optional 

131 The parent transaction (if any) 

132 """ 

133 

134 Event: ClassVar[Type] = Event 

135 

136 parent: Optional["DatastoreTransaction"] 

137 """The parent transaction. (`DatastoreTransaction`, optional)""" 

138 

139 def __init__(self, parent: Optional[DatastoreTransaction] = None): 

140 self.parent = parent 

141 self._log: List[Event] = [] 

142 

143 def registerUndo(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> None: 

144 """Register event with undo function. 

145 

146 Parameters 

147 ---------- 

148 name : `str` 

149 Name of the event. 

150 undoFunc : func 

151 Function to undo this event. 

152 args : `tuple` 

153 Positional arguments to `undoFunc`. 

154 **kwargs 

155 Keyword arguments to `undoFunc`. 

156 """ 

157 self._log.append(self.Event(name, undoFunc, args, kwargs)) 

158 

159 @contextlib.contextmanager 

160 def undoWith(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> Iterator[None]: 

161 """Register undo function if nested operation succeeds. 

162 

163 Calls `registerUndo`. 

164 

165 This can be used to wrap individual undo-able statements within a 

166 DatastoreTransaction block. Multiple statements that can fail 

167 separately should not be part of the same `undoWith` block. 

168 

169 All arguments are forwarded directly to `registerUndo`. 

170 """ 

171 try: 

172 yield None 

173 except BaseException: 

174 raise 

175 else: 

176 self.registerUndo(name, undoFunc, *args, **kwargs) 

177 

178 def rollback(self) -> None: 

179 """Roll back all events in this transaction.""" 

180 log = logging.getLogger(__name__) 

181 while self._log: 

182 ev = self._log.pop() 

183 try: 

184 log.debug( 

185 "Rolling back transaction: %s: %s(%s,%s)", 

186 ev.name, 

187 ev.undoFunc, 

188 ",".join(str(a) for a in ev.args), 

189 ",".join(f"{k}={v}" for k, v in ev.kwargs.items()), 

190 ) 

191 except Exception: 

192 # In case we had a problem in stringification of arguments 

193 log.warning("Rolling back transaction: %s", ev.name) 

194 try: 

195 ev.undoFunc(*ev.args, **ev.kwargs) 

196 except BaseException as e: 

197 # Deliberately swallow error that may occur in unrolling 

198 log.warning("Exception: %s caught while unrolling: %s", e, ev.name) 

199 pass 

200 

201 def commit(self) -> None: 

202 """Commit this transaction.""" 

203 if self.parent is None: 

204 # Just forget about the events, they have already happened. 

205 return 

206 else: 

207 # We may still want to events from this transaction as part of 

208 # the parent. 

209 self.parent._log.extend(self._log) 

210 

211 

212class Datastore(metaclass=ABCMeta): 

213 """Datastore interface. 

214 

215 Parameters 

216 ---------- 

217 config : `DatastoreConfig` or `str` 

218 Load configuration either from an existing config instance or by 

219 referring to a configuration file. 

220 bridgeManager : `DatastoreRegistryBridgeManager` 

221 Object that manages the interface between `Registry` and datastores. 

222 butlerRoot : `str`, optional 

223 New datastore root to use to override the configuration value. 

224 """ 

225 

226 defaultConfigFile: ClassVar[Optional[str]] = None 

227 """Path to configuration defaults. Accessed within the ``config`` resource 

228 or relative to a search path. Can be None if no defaults specified. 

229 """ 

230 

231 containerKey: ClassVar[Optional[str]] = None 

232 """Name of the key containing a list of subconfigurations that also 

233 need to be merged with defaults and will likely use different Python 

234 datastore classes (but all using DatastoreConfig). Assumed to be a 

235 list of configurations that can be represented in a DatastoreConfig 

236 and containing a "cls" definition. None indicates that no containers 

237 are expected in this Datastore.""" 

238 

239 isEphemeral: bool = False 

240 """Indicate whether this Datastore is ephemeral or not. An ephemeral 

241 datastore is one where the contents of the datastore will not exist 

242 across process restarts. This value can change per-instance.""" 

243 

244 config: DatastoreConfig 

245 """Configuration used to create Datastore.""" 

246 

247 name: str 

248 """Label associated with this Datastore.""" 

249 

250 storageClassFactory: StorageClassFactory 

251 """Factory for creating storage class instances from name.""" 

252 

253 constraints: Constraints 

254 """Constraints to apply when putting datasets into the datastore.""" 

255 

256 # MyPy does not like for this to be annotated as any kind of type, because 

257 # it can't do static checking on type variables that can change at runtime. 

258 IngestPrepData: ClassVar[Any] = IngestPrepData 

259 """Helper base class for ingest implementations. 

260 """ 

261 

262 @classmethod 

263 @abstractmethod 

264 def setConfigRoot(cls, root: str, config: Config, full: Config, overwrite: bool = True) -> None: 

265 """Set filesystem-dependent config options for this datastore. 

266 

267 The options will be appropriate for a new empty repository with the 

268 given root. 

269 

270 Parameters 

271 ---------- 

272 root : `str` 

273 Filesystem path to the root of the data repository. 

274 config : `Config` 

275 A `Config` to update. Only the subset understood by 

276 this component will be updated. Will not expand 

277 defaults. 

278 full : `Config` 

279 A complete config with all defaults expanded that can be 

280 converted to a `DatastoreConfig`. Read-only and will not be 

281 modified by this method. 

282 Repository-specific options that should not be obtained 

283 from defaults when Butler instances are constructed 

284 should be copied from ``full`` to ``config``. 

285 overwrite : `bool`, optional 

286 If `False`, do not modify a value in ``config`` if the value 

287 already exists. Default is always to overwrite with the provided 

288 ``root``. 

289 

290 Notes 

291 ----- 

292 If a keyword is explicitly defined in the supplied ``config`` it 

293 will not be overridden by this method if ``overwrite`` is `False`. 

294 This allows explicit values set in external configs to be retained. 

295 """ 

296 raise NotImplementedError() 

297 

298 @staticmethod 

299 def fromConfig( 

300 config: Config, 

301 bridgeManager: DatastoreRegistryBridgeManager, 

302 butlerRoot: Optional[ResourcePathExpression] = None, 

303 ) -> "Datastore": 

304 """Create datastore from type specified in config file. 

305 

306 Parameters 

307 ---------- 

308 config : `Config` 

309 Configuration instance. 

310 bridgeManager : `DatastoreRegistryBridgeManager` 

311 Object that manages the interface between `Registry` and 

312 datastores. 

313 butlerRoot : `str`, optional 

314 Butler root directory. 

315 """ 

316 cls = doImportType(config["datastore", "cls"]) 

317 if not issubclass(cls, Datastore): 

318 raise TypeError(f"Imported child class {config['datastore', 'cls']} is not a Datastore") 

319 return cls(config=config, bridgeManager=bridgeManager, butlerRoot=butlerRoot) 

320 

321 def __init__( 

322 self, 

323 config: Union[Config, str], 

324 bridgeManager: DatastoreRegistryBridgeManager, 

325 butlerRoot: Optional[ResourcePathExpression] = None, 

326 ): 

327 self.config = DatastoreConfig(config) 

328 self.name = "ABCDataStore" 

329 self._transaction: Optional[DatastoreTransaction] = None 

330 

331 # All Datastores need storage classes and constraints 

332 self.storageClassFactory = StorageClassFactory() 

333 

334 # And read the constraints list 

335 constraintsConfig = self.config.get("constraints") 

336 self.constraints = Constraints(constraintsConfig, universe=bridgeManager.universe) 

337 

338 def __str__(self) -> str: 

339 return self.name 

340 

341 def __repr__(self) -> str: 

342 return self.name 

343 

344 @property 

345 def names(self) -> Tuple[str, ...]: 

346 """Names associated with this datastore returned as a list. 

347 

348 Can be different to ``name`` for a chaining datastore. 

349 """ 

350 # Default implementation returns solely the name itself 

351 return (self.name,) 

352 

353 @contextlib.contextmanager 

354 def transaction(self) -> Iterator[DatastoreTransaction]: 

355 """Context manager supporting `Datastore` transactions. 

356 

357 Transactions can be nested, and are to be used in combination with 

358 `Registry.transaction`. 

359 """ 

360 self._transaction = DatastoreTransaction(self._transaction) 

361 try: 

362 yield self._transaction 

363 except BaseException: 

364 self._transaction.rollback() 

365 raise 

366 else: 

367 self._transaction.commit() 

368 self._transaction = self._transaction.parent 

369 

370 @abstractmethod 

371 def knows(self, ref: DatasetRef) -> bool: 

372 """Check if the dataset is known to the datastore. 

373 

374 Does not check for existence of any artifact. 

375 

376 Parameters 

377 ---------- 

378 ref : `DatasetRef` 

379 Reference to the required dataset. 

380 

381 Returns 

382 ------- 

383 exists : `bool` 

384 `True` if the dataset is known to the datastore. 

385 """ 

386 raise NotImplementedError() 

387 

388 def mexists( 

389 self, refs: Iterable[DatasetRef], artifact_existence: Optional[Dict[ResourcePath, bool]] = None 

390 ) -> Dict[DatasetRef, bool]: 

391 """Check the existence of multiple datasets at once. 

392 

393 Parameters 

394 ---------- 

395 refs : iterable of `DatasetRef` 

396 The datasets to be checked. 

397 artifact_existence : `dict` [`lsst.resources.ResourcePath`, `bool`] 

398 Optional mapping of datastore artifact to existence. Updated by 

399 this method with details of all artifacts tested. Can be `None` 

400 if the caller is not interested. 

401 

402 Returns 

403 ------- 

404 existence : `dict` of [`DatasetRef`, `bool`] 

405 Mapping from dataset to boolean indicating existence. 

406 """ 

407 existence: Dict[DatasetRef, bool] = {} 

408 # Non-optimized default. 

409 for ref in refs: 

410 existence[ref] = self.exists(ref) 

411 return existence 

412 

413 @abstractmethod 

414 def exists(self, datasetRef: DatasetRef) -> bool: 

415 """Check if the dataset exists in the datastore. 

416 

417 Parameters 

418 ---------- 

419 datasetRef : `DatasetRef` 

420 Reference to the required dataset. 

421 

422 Returns 

423 ------- 

424 exists : `bool` 

425 `True` if the entity exists in the `Datastore`. 

426 """ 

427 raise NotImplementedError("Must be implemented by subclass") 

428 

429 @abstractmethod 

430 def get(self, datasetRef: DatasetRef, parameters: Mapping[str, Any] = None) -> Any: 

431 """Load an `InMemoryDataset` from the store. 

432 

433 Parameters 

434 ---------- 

435 datasetRef : `DatasetRef` 

436 Reference to the required Dataset. 

437 parameters : `dict` 

438 `StorageClass`-specific parameters that specify a slice of the 

439 Dataset to be loaded. 

440 

441 Returns 

442 ------- 

443 inMemoryDataset : `object` 

444 Requested Dataset or slice thereof as an InMemoryDataset. 

445 """ 

446 raise NotImplementedError("Must be implemented by subclass") 

447 

448 @abstractmethod 

449 def put(self, inMemoryDataset: Any, datasetRef: DatasetRef) -> None: 

450 """Write a `InMemoryDataset` with a given `DatasetRef` to the store. 

451 

452 Parameters 

453 ---------- 

454 inMemoryDataset : `object` 

455 The Dataset to store. 

456 datasetRef : `DatasetRef` 

457 Reference to the associated Dataset. 

458 """ 

459 raise NotImplementedError("Must be implemented by subclass") 

460 

461 def _overrideTransferMode(self, *datasets: FileDataset, transfer: Optional[str] = None) -> Optional[str]: 

462 """Allow ingest transfer mode to be defaulted based on datasets. 

463 

464 Parameters 

465 ---------- 

466 datasets : `FileDataset` 

467 Each positional argument is a struct containing information about 

468 a file to be ingested, including its path (either absolute or 

469 relative to the datastore root, if applicable), a complete 

470 `DatasetRef` (with ``dataset_id not None``), and optionally a 

471 formatter class or its fully-qualified string name. If a formatter 

472 is not provided, this method should populate that attribute with 

473 the formatter the datastore would use for `put`. Subclasses are 

474 also permitted to modify the path attribute (typically to put it 

475 in what the datastore considers its standard form). 

476 transfer : `str`, optional 

477 How (and whether) the dataset should be added to the datastore. 

478 See `ingest` for details of transfer modes. 

479 

480 Returns 

481 ------- 

482 newTransfer : `str` 

483 Transfer mode to use. Will be identical to the supplied transfer 

484 mode unless "auto" is used. 

485 """ 

486 if transfer != "auto": 

487 return transfer 

488 raise RuntimeError(f"{transfer} is not allowed without specialization.") 

489 

490 def _prepIngest(self, *datasets: FileDataset, transfer: Optional[str] = None) -> IngestPrepData: 

491 """Process datasets to identify which ones can be ingested. 

492 

493 Parameters 

494 ---------- 

495 datasets : `FileDataset` 

496 Each positional argument is a struct containing information about 

497 a file to be ingested, including its path (either absolute or 

498 relative to the datastore root, if applicable), a complete 

499 `DatasetRef` (with ``dataset_id not None``), and optionally a 

500 formatter class or its fully-qualified string name. If a formatter 

501 is not provided, this method should populate that attribute with 

502 the formatter the datastore would use for `put`. Subclasses are 

503 also permitted to modify the path attribute (typically to put it 

504 in what the datastore considers its standard form). 

505 transfer : `str`, optional 

506 How (and whether) the dataset should be added to the datastore. 

507 See `ingest` for details of transfer modes. 

508 

509 Returns 

510 ------- 

511 data : `IngestPrepData` 

512 An instance of a subclass of `IngestPrepData`, used to pass 

513 arbitrary data from `_prepIngest` to `_finishIngest`. This should 

514 include only the datasets this datastore can actually ingest; 

515 others should be silently ignored (`Datastore.ingest` will inspect 

516 `IngestPrepData.refs` and raise `DatasetTypeNotSupportedError` if 

517 necessary). 

518 

519 Raises 

520 ------ 

521 NotImplementedError 

522 Raised if the datastore does not support the given transfer mode 

523 (including the case where ingest is not supported at all). 

524 FileNotFoundError 

525 Raised if one of the given files does not exist. 

526 FileExistsError 

527 Raised if transfer is not `None` but the (internal) location the 

528 file would be moved to is already occupied. 

529 

530 Notes 

531 ----- 

532 This method (along with `_finishIngest`) should be implemented by 

533 subclasses to provide ingest support instead of implementing `ingest` 

534 directly. 

535 

536 `_prepIngest` should not modify the data repository or given files in 

537 any way; all changes should be deferred to `_finishIngest`. 

538 

539 When possible, exceptions should be raised in `_prepIngest` instead of 

540 `_finishIngest`. `NotImplementedError` exceptions that indicate that 

541 the transfer mode is not supported must be raised by `_prepIngest` 

542 instead of `_finishIngest`. 

543 """ 

544 raise NotImplementedError(f"Datastore {self} does not support direct file-based ingest.") 

545 

546 def _finishIngest( 

547 self, prepData: IngestPrepData, *, transfer: Optional[str] = None, record_validation_info: bool = True 

548 ) -> None: 

549 """Complete an ingest operation. 

550 

551 Parameters 

552 ---------- 

553 data : `IngestPrepData` 

554 An instance of a subclass of `IngestPrepData`. Guaranteed to be 

555 the direct result of a call to `_prepIngest` on this datastore. 

556 transfer : `str`, optional 

557 How (and whether) the dataset should be added to the datastore. 

558 See `ingest` for details of transfer modes. 

559 record_validation_info : `bool`, optional 

560 If `True`, the default, the datastore can record validation 

561 information associated with the file. If `False` the datastore 

562 will not attempt to track any information such as checksums 

563 or file sizes. This can be useful if such information is tracked 

564 in an external system or if the file is to be compressed in place. 

565 It is up to the datastore whether this parameter is relevant. 

566 

567 Raises 

568 ------ 

569 FileNotFoundError 

570 Raised if one of the given files does not exist. 

571 FileExistsError 

572 Raised if transfer is not `None` but the (internal) location the 

573 file would be moved to is already occupied. 

574 

575 Notes 

576 ----- 

577 This method (along with `_prepIngest`) should be implemented by 

578 subclasses to provide ingest support instead of implementing `ingest` 

579 directly. 

580 """ 

581 raise NotImplementedError(f"Datastore {self} does not support direct file-based ingest.") 

582 

583 def ingest( 

584 self, *datasets: FileDataset, transfer: Optional[str] = None, record_validation_info: bool = True 

585 ) -> None: 

586 """Ingest one or more files into the datastore. 

587 

588 Parameters 

589 ---------- 

590 datasets : `FileDataset` 

591 Each positional argument is a struct containing information about 

592 a file to be ingested, including its path (either absolute or 

593 relative to the datastore root, if applicable), a complete 

594 `DatasetRef` (with ``dataset_id not None``), and optionally a 

595 formatter class or its fully-qualified string name. If a formatter 

596 is not provided, the one the datastore would use for ``put`` on 

597 that dataset is assumed. 

598 transfer : `str`, optional 

599 How (and whether) the dataset should be added to the datastore. 

600 If `None` (default), the file must already be in a location 

601 appropriate for the datastore (e.g. within its root directory), 

602 and will not be modified. Other choices include "move", "copy", 

603 "link", "symlink", "relsymlink", and "hardlink". "link" is a 

604 special transfer mode that will first try to make a hardlink and 

605 if that fails a symlink will be used instead. "relsymlink" creates 

606 a relative symlink rather than use an absolute path. 

607 Most datastores do not support all transfer modes. 

608 "auto" is a special option that will let the 

609 data store choose the most natural option for itself. 

610 record_validation_info : `bool`, optional 

611 If `True`, the default, the datastore can record validation 

612 information associated with the file. If `False` the datastore 

613 will not attempt to track any information such as checksums 

614 or file sizes. This can be useful if such information is tracked 

615 in an external system or if the file is to be compressed in place. 

616 It is up to the datastore whether this parameter is relevant. 

617 

618 Raises 

619 ------ 

620 NotImplementedError 

621 Raised if the datastore does not support the given transfer mode 

622 (including the case where ingest is not supported at all). 

623 DatasetTypeNotSupportedError 

624 Raised if one or more files to be ingested have a dataset type that 

625 is not supported by the datastore. 

626 FileNotFoundError 

627 Raised if one of the given files does not exist. 

628 FileExistsError 

629 Raised if transfer is not `None` but the (internal) location the 

630 file would be moved to is already occupied. 

631 

632 Notes 

633 ----- 

634 Subclasses should implement `_prepIngest` and `_finishIngest` instead 

635 of implementing `ingest` directly. Datastores that hold and 

636 delegate to child datastores may want to call those methods as well. 

637 

638 Subclasses are encouraged to document their supported transfer modes 

639 in their class documentation. 

640 """ 

641 # Allow a datastore to select a default transfer mode 

642 transfer = self._overrideTransferMode(*datasets, transfer=transfer) 

643 prepData = self._prepIngest(*datasets, transfer=transfer) 

644 refs = {ref.id: ref for dataset in datasets for ref in dataset.refs} 

645 if None in refs: 

646 # Find the file for the error message. There may be multiple 

647 # bad refs so look for all of them. 

648 unresolved_paths = {} 

649 for dataset in datasets: 

650 unresolved = [] 

651 for ref in dataset.refs: 

652 if ref.id is None: 

653 unresolved.append(ref) 

654 if unresolved: 

655 unresolved_paths[dataset.path] = unresolved 

656 raise RuntimeError( 

657 "Attempt to ingest unresolved DatasetRef from: " 

658 + ",".join(f"{p}: ({[str(r) for r in ref]})" for p, ref in unresolved_paths.items()) 

659 ) 

660 if refs.keys() != prepData.refs.keys(): 

661 unsupported = refs.keys() - prepData.refs.keys() 

662 # Group unsupported refs by DatasetType for an informative 

663 # but still concise error message. 

664 byDatasetType = defaultdict(list) 

665 for datasetId in unsupported: 

666 ref = refs[datasetId] 

667 byDatasetType[ref.datasetType].append(ref) 

668 raise DatasetTypeNotSupportedError( 

669 "DatasetType(s) not supported in ingest: " 

670 + ", ".join(f"{k.name} ({len(v)} dataset(s))" for k, v in byDatasetType.items()) 

671 ) 

672 self._finishIngest(prepData, transfer=transfer, record_validation_info=record_validation_info) 

673 

674 def transfer_from( 

675 self, 

676 source_datastore: Datastore, 

677 refs: Iterable[DatasetRef], 

678 local_refs: Optional[Iterable[DatasetRef]] = None, 

679 transfer: str = "auto", 

680 artifact_existence: Optional[Dict[ResourcePath, bool]] = None, 

681 ) -> None: 

682 """Transfer dataset artifacts from another datastore to this one. 

683 

684 Parameters 

685 ---------- 

686 source_datastore : `Datastore` 

687 The datastore from which to transfer artifacts. That datastore 

688 must be compatible with this datastore receiving the artifacts. 

689 refs : iterable of `DatasetRef` 

690 The datasets to transfer from the source datastore. 

691 local_refs : iterable of `DatasetRef`, optional 

692 The dataset refs associated with the registry associated with 

693 this datastore. Can be `None` if the source and target datastore 

694 are using UUIDs. 

695 transfer : `str`, optional 

696 How (and whether) the dataset should be added to the datastore. 

697 Choices include "move", "copy", 

698 "link", "symlink", "relsymlink", and "hardlink". "link" is a 

699 special transfer mode that will first try to make a hardlink and 

700 if that fails a symlink will be used instead. "relsymlink" creates 

701 a relative symlink rather than use an absolute path. 

702 Most datastores do not support all transfer modes. 

703 "auto" (the default) is a special option that will let the 

704 data store choose the most natural option for itself. 

705 If the source location and transfer location are identical the 

706 transfer mode will be ignored. 

707 artifact_existence : `dict` [`lsst.resources.ResourcePath`, `bool`] 

708 Optional mapping of datastore artifact to existence. Updated by 

709 this method with details of all artifacts tested. Can be `None` 

710 if the caller is not interested. 

711 

712 Raises 

713 ------ 

714 TypeError 

715 Raised if the two datastores are not compatible. 

716 """ 

717 if type(self) is not type(source_datastore): 

718 raise TypeError( 

719 f"Datastore mismatch between this datastore ({type(self)}) and the " 

720 f"source datastore ({type(source_datastore)})." 

721 ) 

722 

723 raise NotImplementedError(f"Datastore {type(self)} must implement a transfer_from method.") 

724 

725 @abstractmethod 

726 def getURIs( 

727 self, datasetRef: DatasetRef, predict: bool = False 

728 ) -> Tuple[Optional[ResourcePath], Dict[str, ResourcePath]]: 

729 """Return URIs associated with dataset. 

730 

731 Parameters 

732 ---------- 

733 ref : `DatasetRef` 

734 Reference to the required dataset. 

735 predict : `bool`, optional 

736 If the datastore does not know about the dataset, should it 

737 return a predicted URI or not? 

738 

739 Returns 

740 ------- 

741 primary : `lsst.resources.ResourcePath` 

742 The URI to the primary artifact associated with this dataset. 

743 If the dataset was disassembled within the datastore this 

744 may be `None`. 

745 components : `dict` 

746 URIs to any components associated with the dataset artifact. 

747 Can be empty if there are no components. 

748 """ 

749 raise NotImplementedError() 

750 

751 @abstractmethod 

752 def getURI(self, datasetRef: DatasetRef, predict: bool = False) -> ResourcePath: 

753 """URI to the Dataset. 

754 

755 Parameters 

756 ---------- 

757 datasetRef : `DatasetRef` 

758 Reference to the required Dataset. 

759 predict : `bool` 

760 If `True` attempt to predict the URI for a dataset if it does 

761 not exist in datastore. 

762 

763 Returns 

764 ------- 

765 uri : `str` 

766 URI string pointing to the Dataset within the datastore. If the 

767 Dataset does not exist in the datastore, the URI may be a guess. 

768 If the datastore does not have entities that relate well 

769 to the concept of a URI the returned URI string will be 

770 descriptive. The returned URI is not guaranteed to be obtainable. 

771 

772 Raises 

773 ------ 

774 FileNotFoundError 

775 A URI has been requested for a dataset that does not exist and 

776 guessing is not allowed. 

777 """ 

778 raise NotImplementedError("Must be implemented by subclass") 

779 

780 @abstractmethod 

781 def retrieveArtifacts( 

782 self, 

783 refs: Iterable[DatasetRef], 

784 destination: ResourcePath, 

785 transfer: str = "auto", 

786 preserve_path: bool = True, 

787 overwrite: bool = False, 

788 ) -> List[ResourcePath]: 

789 """Retrieve the artifacts associated with the supplied refs. 

790 

791 Parameters 

792 ---------- 

793 refs : iterable of `DatasetRef` 

794 The datasets for which artifacts are to be retrieved. 

795 A single ref can result in multiple artifacts. The refs must 

796 be resolved. 

797 destination : `lsst.resources.ResourcePath` 

798 Location to write the artifacts. 

799 transfer : `str`, optional 

800 Method to use to transfer the artifacts. Must be one of the options 

801 supported by `lsst.resources.ResourcePath.transfer_from()`. 

802 "move" is not allowed. 

803 preserve_path : `bool`, optional 

804 If `True` the full path of the artifact within the datastore 

805 is preserved. If `False` the final file component of the path 

806 is used. 

807 overwrite : `bool`, optional 

808 If `True` allow transfers to overwrite existing files at the 

809 destination. 

810 

811 Returns 

812 ------- 

813 targets : `list` of `lsst.resources.ResourcePath` 

814 URIs of file artifacts in destination location. Order is not 

815 preserved. 

816 

817 Notes 

818 ----- 

819 For non-file datastores the artifacts written to the destination 

820 may not match the representation inside the datastore. For example 

821 a hierarchichal data structure in a NoSQL database may well be stored 

822 as a JSON file. 

823 """ 

824 raise NotImplementedError() 

825 

826 @abstractmethod 

827 def remove(self, datasetRef: DatasetRef) -> None: 

828 """Indicate to the Datastore that a Dataset can be removed. 

829 

830 Parameters 

831 ---------- 

832 datasetRef : `DatasetRef` 

833 Reference to the required Dataset. 

834 

835 Raises 

836 ------ 

837 FileNotFoundError 

838 When Dataset does not exist. 

839 

840 Notes 

841 ----- 

842 Some Datastores may implement this method as a silent no-op to 

843 disable Dataset deletion through standard interfaces. 

844 """ 

845 raise NotImplementedError("Must be implemented by subclass") 

846 

847 @abstractmethod 

848 def forget(self, refs: Iterable[DatasetRef]) -> None: 

849 """Indicate to the Datastore that it should remove all records of the 

850 given datasets, without actually deleting them. 

851 

852 Parameters 

853 ---------- 

854 refs : `Iterable` [ `DatasetRef` ] 

855 References to the datasets being forgotten. 

856 

857 Notes 

858 ----- 

859 Asking a datastore to forget a `DatasetRef` it does not hold should be 

860 a silent no-op, not an error. 

861 """ 

862 raise NotImplementedError("Must be implemented by subclass") 

863 

864 @abstractmethod 

865 def trash(self, ref: Union[DatasetRef, Iterable[DatasetRef]], ignore_errors: bool = True) -> None: 

866 """Indicate to the Datastore that a Dataset can be moved to the trash. 

867 

868 Parameters 

869 ---------- 

870 ref : `DatasetRef` or iterable thereof 

871 Reference(s) to the required Dataset. 

872 ignore_errors : `bool`, optional 

873 Determine whether errors should be ignored. When multiple 

874 refs are being trashed there will be no per-ref check. 

875 

876 Raises 

877 ------ 

878 FileNotFoundError 

879 When Dataset does not exist and errors are not ignored. Only 

880 checked if a single ref is supplied (and not in a list). 

881 

882 Notes 

883 ----- 

884 Some Datastores may implement this method as a silent no-op to 

885 disable Dataset deletion through standard interfaces. 

886 """ 

887 raise NotImplementedError("Must be implemented by subclass") 

888 

889 @abstractmethod 

890 def emptyTrash(self, ignore_errors: bool = True) -> None: 

891 """Remove all datasets from the trash. 

892 

893 Parameters 

894 ---------- 

895 ignore_errors : `bool`, optional 

896 Determine whether errors should be ignored. 

897 

898 Notes 

899 ----- 

900 Some Datastores may implement this method as a silent no-op to 

901 disable Dataset deletion through standard interfaces. 

902 """ 

903 raise NotImplementedError("Must be implemented by subclass") 

904 

905 @abstractmethod 

906 def transfer(self, inputDatastore: Datastore, datasetRef: DatasetRef) -> None: 

907 """Transfer a dataset from another datastore to this datastore. 

908 

909 Parameters 

910 ---------- 

911 inputDatastore : `Datastore` 

912 The external `Datastore` from which to retrieve the Dataset. 

913 datasetRef : `DatasetRef` 

914 Reference to the required Dataset. 

915 """ 

916 raise NotImplementedError("Must be implemented by subclass") 

917 

918 def export( 

919 self, refs: Iterable[DatasetRef], *, directory: Optional[str] = None, transfer: Optional[str] = None 

920 ) -> Iterable[FileDataset]: 

921 """Export datasets for transfer to another data repository. 

922 

923 Parameters 

924 ---------- 

925 refs : iterable of `DatasetRef` 

926 Dataset references to be exported. 

927 directory : `str`, optional 

928 Path to a directory that should contain files corresponding to 

929 output datasets. Ignored if ``transfer`` is `None`. 

930 transfer : `str`, optional 

931 Mode that should be used to move datasets out of the repository. 

932 Valid options are the same as those of the ``transfer`` argument 

933 to ``ingest``, and datastores may similarly signal that a transfer 

934 mode is not supported by raising `NotImplementedError`. 

935 

936 Returns 

937 ------- 

938 dataset : iterable of `DatasetTransfer` 

939 Structs containing information about the exported datasets, in the 

940 same order as ``refs``. 

941 

942 Raises 

943 ------ 

944 NotImplementedError 

945 Raised if the given transfer mode is not supported. 

946 """ 

947 raise NotImplementedError(f"Transfer mode {transfer} not supported.") 

948 

949 @abstractmethod 

950 def validateConfiguration( 

951 self, entities: Iterable[Union[DatasetRef, DatasetType, StorageClass]], logFailures: bool = False 

952 ) -> None: 

953 """Validate some of the configuration for this datastore. 

954 

955 Parameters 

956 ---------- 

957 entities : iterable of `DatasetRef`, `DatasetType`, or `StorageClass` 

958 Entities to test against this configuration. Can be differing 

959 types. 

960 logFailures : `bool`, optional 

961 If `True`, output a log message for every validation error 

962 detected. 

963 

964 Raises 

965 ------ 

966 DatastoreValidationError 

967 Raised if there is a validation problem with a configuration. 

968 

969 Notes 

970 ----- 

971 Which parts of the configuration are validated is at the discretion 

972 of each Datastore implementation. 

973 """ 

974 raise NotImplementedError("Must be implemented by subclass") 

975 

976 @abstractmethod 

977 def validateKey(self, lookupKey: LookupKey, entity: Union[DatasetRef, DatasetType, StorageClass]) -> None: 

978 """Validate a specific look up key with supplied entity. 

979 

980 Parameters 

981 ---------- 

982 lookupKey : `LookupKey` 

983 Key to use to retrieve information from the datastore 

984 configuration. 

985 entity : `DatasetRef`, `DatasetType`, or `StorageClass` 

986 Entity to compare with configuration retrieved using the 

987 specified lookup key. 

988 

989 Raises 

990 ------ 

991 DatastoreValidationError 

992 Raised if there is a problem with the combination of entity 

993 and lookup key. 

994 

995 Notes 

996 ----- 

997 Bypasses the normal selection priorities by allowing a key that 

998 would normally not be selected to be validated. 

999 """ 

1000 raise NotImplementedError("Must be implemented by subclass") 

1001 

1002 @abstractmethod 

1003 def getLookupKeys(self) -> Set[LookupKey]: 

1004 """Return all the lookup keys relevant to this datastore. 

1005 

1006 Returns 

1007 ------- 

1008 keys : `set` of `LookupKey` 

1009 The keys stored internally for looking up information based 

1010 on `DatasetType` name or `StorageClass`. 

1011 """ 

1012 raise NotImplementedError("Must be implemented by subclass") 

1013 

1014 def needs_expanded_data_ids( 

1015 self, 

1016 transfer: Optional[str], 

1017 entity: Optional[Union[DatasetRef, DatasetType, StorageClass]] = None, 

1018 ) -> bool: 

1019 """Test whether this datastore needs expanded data IDs to ingest. 

1020 

1021 Parameters 

1022 ---------- 

1023 transfer : `str` or `None` 

1024 Transfer mode for ingest. 

1025 entity, optional 

1026 Object representing what will be ingested. If not provided (or not 

1027 specific enough), `True` may be returned even if expanded data 

1028 IDs aren't necessary. 

1029 

1030 Returns 

1031 ------- 

1032 needed : `bool` 

1033 If `True`, expanded data IDs may be needed. `False` only if 

1034 expansion definitely isn't necessary. 

1035 """ 

1036 return True 

1037 

1038 # TODO: make abstract, implement in all concrete datastores 

1039 def import_records( 

1040 self, 

1041 data: DatastoreRecordData, 

1042 ) -> None: 

1043 """Import datastore location and record data from an in-memory data 

1044 structure. 

1045 

1046 Parameters 

1047 ---------- 

1048 data : `DatastoreRecordData` 

1049 Data structure to load from. May contain data for other 

1050 `Datastore` instances (generally because they are chained to this 

1051 one), which should be ignored. 

1052 

1053 Notes 

1054 ----- 

1055 Implementations should generally not check that any external resources 

1056 (e.g. files) referred to by these records actually exist, for 

1057 performance reasons; we expect higher-level code to guarantee that they 

1058 do. 

1059 

1060 Implementations are responsible for calling 

1061 `DatastoreRegistryBridge.insert` on all datasets in ``data.locations`` 

1062 where the key is in `names`, as well as loading any opaque table data. 

1063 """ 

1064 raise NotImplementedError() 

1065 

1066 # TODO: make abstract, implement in all concrete datastores 

1067 def export_records( 

1068 self, 

1069 refs: Iterable[DatasetIdRef], 

1070 ) -> DatastoreRecordData: 

1071 """Export datastore records and locations from an in-memory data 

1072 structure. 

1073 

1074 Parameters 

1075 ---------- 

1076 refs : `Iterable` [ `DatasetIdRef` ] 

1077 Datasets to save. This may include datasets not known to this 

1078 datastore, which should be ignored. 

1079 

1080 Returns 

1081 ------- 

1082 data : `DatastoreRecordData` 

1083 Populated data structure. 

1084 """ 

1085 raise NotImplementedError()