Coverage for python/lsst/daf/butler/core/datastore.py: 42%

244 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-10-26 02:02 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Support for generic data stores.""" 

23 

24from __future__ import annotations 

25 

26__all__ = ("DatastoreConfig", "Datastore", "DatastoreValidationError", "DatasetRefURIs") 

27 

28import contextlib 

29import dataclasses 

30import logging 

31from abc import ABCMeta, abstractmethod 

32from collections import abc, defaultdict 

33from typing import ( 

34 TYPE_CHECKING, 

35 Any, 

36 Callable, 

37 ClassVar, 

38 Dict, 

39 Iterable, 

40 Iterator, 

41 List, 

42 Mapping, 

43 Optional, 

44 Set, 

45 Tuple, 

46 Type, 

47 Union, 

48) 

49 

50from lsst.utils import doImportType 

51 

52from .config import Config, ConfigSubset 

53from .constraints import Constraints 

54from .exceptions import DatasetTypeNotSupportedError, ValidationError 

55from .fileDataset import FileDataset 

56from .storageClass import StorageClassFactory 

57 

58if TYPE_CHECKING: 58 ↛ 59line 58 didn't jump to line 59, because the condition on line 58 was never true

59 from lsst.resources import ResourcePath, ResourcePathExpression 

60 

61 from ..registry.interfaces import DatasetIdRef, DatastoreRegistryBridgeManager 

62 from .configSupport import LookupKey 

63 from .datasets import DatasetRef, DatasetType 

64 from .datastoreRecordData import DatastoreRecordData 

65 from .storageClass import StorageClass 

66 

67 

68class DatastoreConfig(ConfigSubset): 

69 """Configuration for Datastores.""" 

70 

71 component = "datastore" 

72 requiredKeys = ("cls",) 

73 defaultConfigFile = "datastore.yaml" 

74 

75 

76class DatastoreValidationError(ValidationError): 

77 """There is a problem with the Datastore configuration.""" 

78 

79 pass 

80 

81 

82@dataclasses.dataclass(frozen=True) 

83class Event: 

84 __slots__ = {"name", "undoFunc", "args", "kwargs"} 

85 name: str 

86 undoFunc: Callable 

87 args: tuple 

88 kwargs: dict 

89 

90 

91class IngestPrepData: 

92 """A helper base class for `Datastore` ingest implementations. 

93 

94 Datastore implementations will generally need a custom implementation of 

95 this class. 

96 

97 Should be accessed as ``Datastore.IngestPrepData`` instead of via direct 

98 import. 

99 

100 Parameters 

101 ---------- 

102 refs : iterable of `DatasetRef` 

103 References for the datasets that can be ingested by this datastore. 

104 """ 

105 

106 def __init__(self, refs: Iterable[DatasetRef]): 

107 self.refs = {ref.id: ref for ref in refs} 

108 

109 

110class DatastoreTransaction: 

111 """Keeps a log of `Datastore` activity and allow rollback. 

112 

113 Parameters 

114 ---------- 

115 parent : `DatastoreTransaction`, optional 

116 The parent transaction (if any) 

117 """ 

118 

119 Event: ClassVar[Type] = Event 

120 

121 parent: Optional[DatastoreTransaction] 

122 """The parent transaction. (`DatastoreTransaction`, optional)""" 

123 

124 def __init__(self, parent: Optional[DatastoreTransaction] = None): 

125 self.parent = parent 

126 self._log: List[Event] = [] 

127 

128 def registerUndo(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> None: 

129 """Register event with undo function. 

130 

131 Parameters 

132 ---------- 

133 name : `str` 

134 Name of the event. 

135 undoFunc : func 

136 Function to undo this event. 

137 args : `tuple` 

138 Positional arguments to `undoFunc`. 

139 **kwargs 

140 Keyword arguments to `undoFunc`. 

141 """ 

142 self._log.append(self.Event(name, undoFunc, args, kwargs)) 

143 

144 @contextlib.contextmanager 

145 def undoWith(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> Iterator[None]: 

146 """Register undo function if nested operation succeeds. 

147 

148 Calls `registerUndo`. 

149 

150 This can be used to wrap individual undo-able statements within a 

151 DatastoreTransaction block. Multiple statements that can fail 

152 separately should not be part of the same `undoWith` block. 

153 

154 All arguments are forwarded directly to `registerUndo`. 

155 """ 

156 try: 

157 yield None 

158 except BaseException: 

159 raise 

160 else: 

161 self.registerUndo(name, undoFunc, *args, **kwargs) 

162 

163 def rollback(self) -> None: 

164 """Roll back all events in this transaction.""" 

165 log = logging.getLogger(__name__) 

166 while self._log: 

167 ev = self._log.pop() 

168 try: 

169 log.debug( 

170 "Rolling back transaction: %s: %s(%s,%s)", 

171 ev.name, 

172 ev.undoFunc, 

173 ",".join(str(a) for a in ev.args), 

174 ",".join(f"{k}={v}" for k, v in ev.kwargs.items()), 

175 ) 

176 except Exception: 

177 # In case we had a problem in stringification of arguments 

178 log.warning("Rolling back transaction: %s", ev.name) 

179 try: 

180 ev.undoFunc(*ev.args, **ev.kwargs) 

181 except BaseException as e: 

182 # Deliberately swallow error that may occur in unrolling 

183 log.warning("Exception: %s caught while unrolling: %s", e, ev.name) 

184 pass 

185 

186 def commit(self) -> None: 

187 """Commit this transaction.""" 

188 if self.parent is None: 

189 # Just forget about the events, they have already happened. 

190 return 

191 else: 

192 # We may still want to events from this transaction as part of 

193 # the parent. 

194 self.parent._log.extend(self._log) 

195 

196 

197@dataclasses.dataclass 

198class DatasetRefURIs(abc.Sequence): 

199 """Represents the primary and component ResourcePath(s) associated with a 

200 DatasetRef. 

201 

202 This is used in places where its members used to be represented as a tuple 

203 `(primaryURI, componentURIs)`. To maintain backward compatibility this 

204 inherits from Sequence and so instances can be treated as a two-item 

205 tuple. 

206 """ 

207 

208 def __init__( 

209 self, 

210 primaryURI: Optional[ResourcePath] = None, 

211 componentURIs: Optional[Dict[str, ResourcePath]] = None, 

212 ): 

213 

214 self.primaryURI = primaryURI 

215 """The URI to the primary artifact associated with this dataset. If the 

216 dataset was disassembled within the datastore this may be `None`. 

217 """ 

218 

219 self.componentURIs = componentURIs or {} 

220 """The URIs to any components associated with the dataset artifact 

221 indexed by component name. This can be empty if there are no 

222 components. 

223 """ 

224 

225 def __getitem__(self, index: Any) -> Any: 

226 """Get primaryURI and componentURIs by index. 

227 

228 Provides support for tuple-like access. 

229 """ 

230 if index == 0: 

231 return self.primaryURI 

232 elif index == 1: 

233 return self.componentURIs 

234 raise IndexError("list index out of range") 

235 

236 def __len__(self) -> int: 

237 """Get the number of data members. 

238 

239 Provides support for tuple-like access. 

240 """ 

241 return 2 

242 

243 def __repr__(self) -> str: 

244 return f"DatasetRefURIs({repr(self.primaryURI)}, {repr(self.componentURIs)})" 

245 

246 

247class Datastore(metaclass=ABCMeta): 

248 """Datastore interface. 

249 

250 Parameters 

251 ---------- 

252 config : `DatastoreConfig` or `str` 

253 Load configuration either from an existing config instance or by 

254 referring to a configuration file. 

255 bridgeManager : `DatastoreRegistryBridgeManager` 

256 Object that manages the interface between `Registry` and datastores. 

257 butlerRoot : `str`, optional 

258 New datastore root to use to override the configuration value. 

259 """ 

260 

261 defaultConfigFile: ClassVar[Optional[str]] = None 

262 """Path to configuration defaults. Accessed within the ``config`` resource 

263 or relative to a search path. Can be None if no defaults specified. 

264 """ 

265 

266 containerKey: ClassVar[Optional[str]] = None 

267 """Name of the key containing a list of subconfigurations that also 

268 need to be merged with defaults and will likely use different Python 

269 datastore classes (but all using DatastoreConfig). Assumed to be a 

270 list of configurations that can be represented in a DatastoreConfig 

271 and containing a "cls" definition. None indicates that no containers 

272 are expected in this Datastore.""" 

273 

274 isEphemeral: bool = False 

275 """Indicate whether this Datastore is ephemeral or not. An ephemeral 

276 datastore is one where the contents of the datastore will not exist 

277 across process restarts. This value can change per-instance.""" 

278 

279 config: DatastoreConfig 

280 """Configuration used to create Datastore.""" 

281 

282 name: str 

283 """Label associated with this Datastore.""" 

284 

285 storageClassFactory: StorageClassFactory 

286 """Factory for creating storage class instances from name.""" 

287 

288 constraints: Constraints 

289 """Constraints to apply when putting datasets into the datastore.""" 

290 

291 # MyPy does not like for this to be annotated as any kind of type, because 

292 # it can't do static checking on type variables that can change at runtime. 

293 IngestPrepData: ClassVar[Any] = IngestPrepData 

294 """Helper base class for ingest implementations. 

295 """ 

296 

297 @classmethod 

298 @abstractmethod 

299 def setConfigRoot(cls, root: str, config: Config, full: Config, overwrite: bool = True) -> None: 

300 """Set filesystem-dependent config options for this datastore. 

301 

302 The options will be appropriate for a new empty repository with the 

303 given root. 

304 

305 Parameters 

306 ---------- 

307 root : `str` 

308 Filesystem path to the root of the data repository. 

309 config : `Config` 

310 A `Config` to update. Only the subset understood by 

311 this component will be updated. Will not expand 

312 defaults. 

313 full : `Config` 

314 A complete config with all defaults expanded that can be 

315 converted to a `DatastoreConfig`. Read-only and will not be 

316 modified by this method. 

317 Repository-specific options that should not be obtained 

318 from defaults when Butler instances are constructed 

319 should be copied from ``full`` to ``config``. 

320 overwrite : `bool`, optional 

321 If `False`, do not modify a value in ``config`` if the value 

322 already exists. Default is always to overwrite with the provided 

323 ``root``. 

324 

325 Notes 

326 ----- 

327 If a keyword is explicitly defined in the supplied ``config`` it 

328 will not be overridden by this method if ``overwrite`` is `False`. 

329 This allows explicit values set in external configs to be retained. 

330 """ 

331 raise NotImplementedError() 

332 

333 @staticmethod 

334 def fromConfig( 

335 config: Config, 

336 bridgeManager: DatastoreRegistryBridgeManager, 

337 butlerRoot: Optional[ResourcePathExpression] = None, 

338 ) -> "Datastore": 

339 """Create datastore from type specified in config file. 

340 

341 Parameters 

342 ---------- 

343 config : `Config` 

344 Configuration instance. 

345 bridgeManager : `DatastoreRegistryBridgeManager` 

346 Object that manages the interface between `Registry` and 

347 datastores. 

348 butlerRoot : `str`, optional 

349 Butler root directory. 

350 """ 

351 cls = doImportType(config["datastore", "cls"]) 

352 if not issubclass(cls, Datastore): 

353 raise TypeError(f"Imported child class {config['datastore', 'cls']} is not a Datastore") 

354 return cls(config=config, bridgeManager=bridgeManager, butlerRoot=butlerRoot) 

355 

356 def __init__( 

357 self, 

358 config: Union[Config, str], 

359 bridgeManager: DatastoreRegistryBridgeManager, 

360 butlerRoot: Optional[ResourcePathExpression] = None, 

361 ): 

362 self.config = DatastoreConfig(config) 

363 self.name = "ABCDataStore" 

364 self._transaction: Optional[DatastoreTransaction] = None 

365 

366 # All Datastores need storage classes and constraints 

367 self.storageClassFactory = StorageClassFactory() 

368 

369 # And read the constraints list 

370 constraintsConfig = self.config.get("constraints") 

371 self.constraints = Constraints(constraintsConfig, universe=bridgeManager.universe) 

372 

373 def __str__(self) -> str: 

374 return self.name 

375 

376 def __repr__(self) -> str: 

377 return self.name 

378 

379 @property 

380 def names(self) -> Tuple[str, ...]: 

381 """Names associated with this datastore returned as a list. 

382 

383 Can be different to ``name`` for a chaining datastore. 

384 """ 

385 # Default implementation returns solely the name itself 

386 return (self.name,) 

387 

388 @contextlib.contextmanager 

389 def transaction(self) -> Iterator[DatastoreTransaction]: 

390 """Context manager supporting `Datastore` transactions. 

391 

392 Transactions can be nested, and are to be used in combination with 

393 `Registry.transaction`. 

394 """ 

395 self._transaction = DatastoreTransaction(self._transaction) 

396 try: 

397 yield self._transaction 

398 except BaseException: 

399 self._transaction.rollback() 

400 raise 

401 else: 

402 self._transaction.commit() 

403 self._transaction = self._transaction.parent 

404 

405 @abstractmethod 

406 def knows(self, ref: DatasetRef) -> bool: 

407 """Check if the dataset is known to the datastore. 

408 

409 Does not check for existence of any artifact. 

410 

411 Parameters 

412 ---------- 

413 ref : `DatasetRef` 

414 Reference to the required dataset. 

415 

416 Returns 

417 ------- 

418 exists : `bool` 

419 `True` if the dataset is known to the datastore. 

420 """ 

421 raise NotImplementedError() 

422 

423 def mexists( 

424 self, refs: Iterable[DatasetRef], artifact_existence: Optional[Dict[ResourcePath, bool]] = None 

425 ) -> Dict[DatasetRef, bool]: 

426 """Check the existence of multiple datasets at once. 

427 

428 Parameters 

429 ---------- 

430 refs : iterable of `DatasetRef` 

431 The datasets to be checked. 

432 artifact_existence : `dict` [`lsst.resources.ResourcePath`, `bool`] 

433 Optional mapping of datastore artifact to existence. Updated by 

434 this method with details of all artifacts tested. Can be `None` 

435 if the caller is not interested. 

436 

437 Returns 

438 ------- 

439 existence : `dict` of [`DatasetRef`, `bool`] 

440 Mapping from dataset to boolean indicating existence. 

441 """ 

442 existence: Dict[DatasetRef, bool] = {} 

443 # Non-optimized default. 

444 for ref in refs: 

445 existence[ref] = self.exists(ref) 

446 return existence 

447 

448 @abstractmethod 

449 def exists(self, datasetRef: DatasetRef) -> bool: 

450 """Check if the dataset exists in the datastore. 

451 

452 Parameters 

453 ---------- 

454 datasetRef : `DatasetRef` 

455 Reference to the required dataset. 

456 

457 Returns 

458 ------- 

459 exists : `bool` 

460 `True` if the entity exists in the `Datastore`. 

461 """ 

462 raise NotImplementedError("Must be implemented by subclass") 

463 

464 @abstractmethod 

465 def get( 

466 self, 

467 datasetRef: DatasetRef, 

468 parameters: Mapping[str, Any] = None, 

469 storageClass: Optional[Union[StorageClass, str]] = None, 

470 ) -> Any: 

471 """Load an `InMemoryDataset` from the store. 

472 

473 Parameters 

474 ---------- 

475 datasetRef : `DatasetRef` 

476 Reference to the required Dataset. 

477 parameters : `dict` 

478 `StorageClass`-specific parameters that specify a slice of the 

479 Dataset to be loaded. 

480 storageClass : `StorageClass` or `str`, optional 

481 The storage class to be used to override the Python type 

482 returned by this method. By default the returned type matches 

483 the dataset type definition for this dataset. Specifying a 

484 read `StorageClass` can force a different type to be returned. 

485 This type must be compatible with the original type. 

486 

487 Returns 

488 ------- 

489 inMemoryDataset : `object` 

490 Requested Dataset or slice thereof as an InMemoryDataset. 

491 """ 

492 raise NotImplementedError("Must be implemented by subclass") 

493 

494 @abstractmethod 

495 def put(self, inMemoryDataset: Any, datasetRef: DatasetRef) -> None: 

496 """Write a `InMemoryDataset` with a given `DatasetRef` to the store. 

497 

498 Parameters 

499 ---------- 

500 inMemoryDataset : `object` 

501 The Dataset to store. 

502 datasetRef : `DatasetRef` 

503 Reference to the associated Dataset. 

504 """ 

505 raise NotImplementedError("Must be implemented by subclass") 

506 

507 def _overrideTransferMode(self, *datasets: FileDataset, transfer: Optional[str] = None) -> Optional[str]: 

508 """Allow ingest transfer mode to be defaulted based on datasets. 

509 

510 Parameters 

511 ---------- 

512 datasets : `FileDataset` 

513 Each positional argument is a struct containing information about 

514 a file to be ingested, including its path (either absolute or 

515 relative to the datastore root, if applicable), a complete 

516 `DatasetRef` (with ``dataset_id not None``), and optionally a 

517 formatter class or its fully-qualified string name. If a formatter 

518 is not provided, this method should populate that attribute with 

519 the formatter the datastore would use for `put`. Subclasses are 

520 also permitted to modify the path attribute (typically to put it 

521 in what the datastore considers its standard form). 

522 transfer : `str`, optional 

523 How (and whether) the dataset should be added to the datastore. 

524 See `ingest` for details of transfer modes. 

525 

526 Returns 

527 ------- 

528 newTransfer : `str` 

529 Transfer mode to use. Will be identical to the supplied transfer 

530 mode unless "auto" is used. 

531 """ 

532 if transfer != "auto": 

533 return transfer 

534 raise RuntimeError(f"{transfer} is not allowed without specialization.") 

535 

536 def _prepIngest(self, *datasets: FileDataset, transfer: Optional[str] = None) -> IngestPrepData: 

537 """Process datasets to identify which ones can be ingested. 

538 

539 Parameters 

540 ---------- 

541 datasets : `FileDataset` 

542 Each positional argument is a struct containing information about 

543 a file to be ingested, including its path (either absolute or 

544 relative to the datastore root, if applicable), a complete 

545 `DatasetRef` (with ``dataset_id not None``), and optionally a 

546 formatter class or its fully-qualified string name. If a formatter 

547 is not provided, this method should populate that attribute with 

548 the formatter the datastore would use for `put`. Subclasses are 

549 also permitted to modify the path attribute (typically to put it 

550 in what the datastore considers its standard form). 

551 transfer : `str`, optional 

552 How (and whether) the dataset should be added to the datastore. 

553 See `ingest` for details of transfer modes. 

554 

555 Returns 

556 ------- 

557 data : `IngestPrepData` 

558 An instance of a subclass of `IngestPrepData`, used to pass 

559 arbitrary data from `_prepIngest` to `_finishIngest`. This should 

560 include only the datasets this datastore can actually ingest; 

561 others should be silently ignored (`Datastore.ingest` will inspect 

562 `IngestPrepData.refs` and raise `DatasetTypeNotSupportedError` if 

563 necessary). 

564 

565 Raises 

566 ------ 

567 NotImplementedError 

568 Raised if the datastore does not support the given transfer mode 

569 (including the case where ingest is not supported at all). 

570 FileNotFoundError 

571 Raised if one of the given files does not exist. 

572 FileExistsError 

573 Raised if transfer is not `None` but the (internal) location the 

574 file would be moved to is already occupied. 

575 

576 Notes 

577 ----- 

578 This method (along with `_finishIngest`) should be implemented by 

579 subclasses to provide ingest support instead of implementing `ingest` 

580 directly. 

581 

582 `_prepIngest` should not modify the data repository or given files in 

583 any way; all changes should be deferred to `_finishIngest`. 

584 

585 When possible, exceptions should be raised in `_prepIngest` instead of 

586 `_finishIngest`. `NotImplementedError` exceptions that indicate that 

587 the transfer mode is not supported must be raised by `_prepIngest` 

588 instead of `_finishIngest`. 

589 """ 

590 raise NotImplementedError(f"Datastore {self} does not support direct file-based ingest.") 

591 

592 def _finishIngest( 

593 self, prepData: IngestPrepData, *, transfer: Optional[str] = None, record_validation_info: bool = True 

594 ) -> None: 

595 """Complete an ingest operation. 

596 

597 Parameters 

598 ---------- 

599 data : `IngestPrepData` 

600 An instance of a subclass of `IngestPrepData`. Guaranteed to be 

601 the direct result of a call to `_prepIngest` on this datastore. 

602 transfer : `str`, optional 

603 How (and whether) the dataset should be added to the datastore. 

604 See `ingest` for details of transfer modes. 

605 record_validation_info : `bool`, optional 

606 If `True`, the default, the datastore can record validation 

607 information associated with the file. If `False` the datastore 

608 will not attempt to track any information such as checksums 

609 or file sizes. This can be useful if such information is tracked 

610 in an external system or if the file is to be compressed in place. 

611 It is up to the datastore whether this parameter is relevant. 

612 

613 Raises 

614 ------ 

615 FileNotFoundError 

616 Raised if one of the given files does not exist. 

617 FileExistsError 

618 Raised if transfer is not `None` but the (internal) location the 

619 file would be moved to is already occupied. 

620 

621 Notes 

622 ----- 

623 This method (along with `_prepIngest`) should be implemented by 

624 subclasses to provide ingest support instead of implementing `ingest` 

625 directly. 

626 """ 

627 raise NotImplementedError(f"Datastore {self} does not support direct file-based ingest.") 

628 

629 def ingest( 

630 self, *datasets: FileDataset, transfer: Optional[str] = None, record_validation_info: bool = True 

631 ) -> None: 

632 """Ingest one or more files into the datastore. 

633 

634 Parameters 

635 ---------- 

636 datasets : `FileDataset` 

637 Each positional argument is a struct containing information about 

638 a file to be ingested, including its path (either absolute or 

639 relative to the datastore root, if applicable), a complete 

640 `DatasetRef` (with ``dataset_id not None``), and optionally a 

641 formatter class or its fully-qualified string name. If a formatter 

642 is not provided, the one the datastore would use for ``put`` on 

643 that dataset is assumed. 

644 transfer : `str`, optional 

645 How (and whether) the dataset should be added to the datastore. 

646 If `None` (default), the file must already be in a location 

647 appropriate for the datastore (e.g. within its root directory), 

648 and will not be modified. Other choices include "move", "copy", 

649 "link", "symlink", "relsymlink", and "hardlink". "link" is a 

650 special transfer mode that will first try to make a hardlink and 

651 if that fails a symlink will be used instead. "relsymlink" creates 

652 a relative symlink rather than use an absolute path. 

653 Most datastores do not support all transfer modes. 

654 "auto" is a special option that will let the 

655 data store choose the most natural option for itself. 

656 record_validation_info : `bool`, optional 

657 If `True`, the default, the datastore can record validation 

658 information associated with the file. If `False` the datastore 

659 will not attempt to track any information such as checksums 

660 or file sizes. This can be useful if such information is tracked 

661 in an external system or if the file is to be compressed in place. 

662 It is up to the datastore whether this parameter is relevant. 

663 

664 Raises 

665 ------ 

666 NotImplementedError 

667 Raised if the datastore does not support the given transfer mode 

668 (including the case where ingest is not supported at all). 

669 DatasetTypeNotSupportedError 

670 Raised if one or more files to be ingested have a dataset type that 

671 is not supported by the datastore. 

672 FileNotFoundError 

673 Raised if one of the given files does not exist. 

674 FileExistsError 

675 Raised if transfer is not `None` but the (internal) location the 

676 file would be moved to is already occupied. 

677 

678 Notes 

679 ----- 

680 Subclasses should implement `_prepIngest` and `_finishIngest` instead 

681 of implementing `ingest` directly. Datastores that hold and 

682 delegate to child datastores may want to call those methods as well. 

683 

684 Subclasses are encouraged to document their supported transfer modes 

685 in their class documentation. 

686 """ 

687 # Allow a datastore to select a default transfer mode 

688 transfer = self._overrideTransferMode(*datasets, transfer=transfer) 

689 prepData = self._prepIngest(*datasets, transfer=transfer) 

690 refs = {ref.id: ref for dataset in datasets for ref in dataset.refs} 

691 if None in refs: 

692 # Find the file for the error message. There may be multiple 

693 # bad refs so look for all of them. 

694 unresolved_paths = {} 

695 for dataset in datasets: 

696 unresolved = [] 

697 for ref in dataset.refs: 

698 if ref.id is None: 

699 unresolved.append(ref) 

700 if unresolved: 

701 unresolved_paths[dataset.path] = unresolved 

702 raise RuntimeError( 

703 "Attempt to ingest unresolved DatasetRef from: " 

704 + ",".join(f"{p}: ({[str(r) for r in ref]})" for p, ref in unresolved_paths.items()) 

705 ) 

706 if refs.keys() != prepData.refs.keys(): 

707 unsupported = refs.keys() - prepData.refs.keys() 

708 # Group unsupported refs by DatasetType for an informative 

709 # but still concise error message. 

710 byDatasetType = defaultdict(list) 

711 for datasetId in unsupported: 

712 ref = refs[datasetId] 

713 byDatasetType[ref.datasetType].append(ref) 

714 raise DatasetTypeNotSupportedError( 

715 "DatasetType(s) not supported in ingest: " 

716 + ", ".join(f"{k.name} ({len(v)} dataset(s))" for k, v in byDatasetType.items()) 

717 ) 

718 self._finishIngest(prepData, transfer=transfer, record_validation_info=record_validation_info) 

719 

720 def transfer_from( 

721 self, 

722 source_datastore: Datastore, 

723 refs: Iterable[DatasetRef], 

724 local_refs: Optional[Iterable[DatasetRef]] = None, 

725 transfer: str = "auto", 

726 artifact_existence: Optional[Dict[ResourcePath, bool]] = None, 

727 ) -> None: 

728 """Transfer dataset artifacts from another datastore to this one. 

729 

730 Parameters 

731 ---------- 

732 source_datastore : `Datastore` 

733 The datastore from which to transfer artifacts. That datastore 

734 must be compatible with this datastore receiving the artifacts. 

735 refs : iterable of `DatasetRef` 

736 The datasets to transfer from the source datastore. 

737 local_refs : iterable of `DatasetRef`, optional 

738 The dataset refs associated with the registry associated with 

739 this datastore. Can be `None` if the source and target datastore 

740 are using UUIDs. 

741 transfer : `str`, optional 

742 How (and whether) the dataset should be added to the datastore. 

743 Choices include "move", "copy", 

744 "link", "symlink", "relsymlink", and "hardlink". "link" is a 

745 special transfer mode that will first try to make a hardlink and 

746 if that fails a symlink will be used instead. "relsymlink" creates 

747 a relative symlink rather than use an absolute path. 

748 Most datastores do not support all transfer modes. 

749 "auto" (the default) is a special option that will let the 

750 data store choose the most natural option for itself. 

751 If the source location and transfer location are identical the 

752 transfer mode will be ignored. 

753 artifact_existence : `dict` [`lsst.resources.ResourcePath`, `bool`] 

754 Optional mapping of datastore artifact to existence. Updated by 

755 this method with details of all artifacts tested. Can be `None` 

756 if the caller is not interested. 

757 

758 Raises 

759 ------ 

760 TypeError 

761 Raised if the two datastores are not compatible. 

762 """ 

763 if type(self) is not type(source_datastore): 

764 raise TypeError( 

765 f"Datastore mismatch between this datastore ({type(self)}) and the " 

766 f"source datastore ({type(source_datastore)})." 

767 ) 

768 

769 raise NotImplementedError(f"Datastore {type(self)} must implement a transfer_from method.") 

770 

771 def getManyURIs( 

772 self, 

773 refs: Iterable[DatasetRef], 

774 predict: bool = False, 

775 allow_missing: bool = False, 

776 ) -> Dict[DatasetRef, DatasetRefURIs]: 

777 """Return URIs associated with many datasets. 

778 

779 Parameters 

780 ---------- 

781 refs : iterable of `DatasetIdRef` 

782 References to the required datasets. 

783 predict : `bool`, optional 

784 If the datastore does not know about a dataset, should it 

785 return a predicted URI or not? 

786 allow_missing : `bool` 

787 If `False`, and `predict` is `False`, will raise if a `DatasetRef` 

788 does not exist. 

789 

790 Returns 

791 ------- 

792 URIs : `dict` of [`DatasetRef`, `DatasetRefUris`] 

793 A dict of primary and component URIs, indexed by the passed-in 

794 refs. 

795 

796 Raises 

797 ------ 

798 FileNotFoundError 

799 A URI has been requested for a dataset that does not exist and 

800 guessing is not allowed. 

801 

802 Notes 

803 ----- 

804 In file-based datastores, getManuURIs does not check that the file is 

805 really there, it's assuming it is if datastore is aware of the file 

806 then it actually exists. 

807 """ 

808 uris: Dict[DatasetRef, DatasetRefURIs] = {} 

809 missing_refs = [] 

810 for ref in refs: 

811 try: 

812 uris[ref] = self.getURIs(ref, predict=predict) 

813 except FileNotFoundError: 

814 missing_refs.append(ref) 

815 if missing_refs and not allow_missing: 

816 raise FileNotFoundError( 

817 "Missing {} refs from datastore out of {} and predict=False.".format( 

818 num_missing := len(missing_refs), num_missing + len(uris) 

819 ) 

820 ) 

821 return uris 

822 

823 @abstractmethod 

824 def getURIs(self, datasetRef: DatasetRef, predict: bool = False) -> DatasetRefURIs: 

825 """Return URIs associated with dataset. 

826 

827 Parameters 

828 ---------- 

829 ref : `DatasetRef` 

830 Reference to the required dataset. 

831 predict : `bool`, optional 

832 If the datastore does not know about the dataset, should it 

833 return a predicted URI or not? 

834 

835 Returns 

836 ------- 

837 uris : `DatasetRefURIs` 

838 The URI to the primary artifact associated with this dataset (if 

839 the dataset was disassembled within the datastore this may be 

840 `None`), and the URIs to any components associated with the dataset 

841 artifact. (can be empty if there are no components). 

842 """ 

843 raise NotImplementedError() 

844 

845 @abstractmethod 

846 def getURI(self, datasetRef: DatasetRef, predict: bool = False) -> ResourcePath: 

847 """URI to the Dataset. 

848 

849 Parameters 

850 ---------- 

851 datasetRef : `DatasetRef` 

852 Reference to the required Dataset. 

853 predict : `bool` 

854 If `True` attempt to predict the URI for a dataset if it does 

855 not exist in datastore. 

856 

857 Returns 

858 ------- 

859 uri : `str` 

860 URI string pointing to the Dataset within the datastore. If the 

861 Dataset does not exist in the datastore, the URI may be a guess. 

862 If the datastore does not have entities that relate well 

863 to the concept of a URI the returned URI string will be 

864 descriptive. The returned URI is not guaranteed to be obtainable. 

865 

866 Raises 

867 ------ 

868 FileNotFoundError 

869 A URI has been requested for a dataset that does not exist and 

870 guessing is not allowed. 

871 """ 

872 raise NotImplementedError("Must be implemented by subclass") 

873 

874 @abstractmethod 

875 def retrieveArtifacts( 

876 self, 

877 refs: Iterable[DatasetRef], 

878 destination: ResourcePath, 

879 transfer: str = "auto", 

880 preserve_path: bool = True, 

881 overwrite: bool = False, 

882 ) -> List[ResourcePath]: 

883 """Retrieve the artifacts associated with the supplied refs. 

884 

885 Parameters 

886 ---------- 

887 refs : iterable of `DatasetRef` 

888 The datasets for which artifacts are to be retrieved. 

889 A single ref can result in multiple artifacts. The refs must 

890 be resolved. 

891 destination : `lsst.resources.ResourcePath` 

892 Location to write the artifacts. 

893 transfer : `str`, optional 

894 Method to use to transfer the artifacts. Must be one of the options 

895 supported by `lsst.resources.ResourcePath.transfer_from()`. 

896 "move" is not allowed. 

897 preserve_path : `bool`, optional 

898 If `True` the full path of the artifact within the datastore 

899 is preserved. If `False` the final file component of the path 

900 is used. 

901 overwrite : `bool`, optional 

902 If `True` allow transfers to overwrite existing files at the 

903 destination. 

904 

905 Returns 

906 ------- 

907 targets : `list` of `lsst.resources.ResourcePath` 

908 URIs of file artifacts in destination location. Order is not 

909 preserved. 

910 

911 Notes 

912 ----- 

913 For non-file datastores the artifacts written to the destination 

914 may not match the representation inside the datastore. For example 

915 a hierarchichal data structure in a NoSQL database may well be stored 

916 as a JSON file. 

917 """ 

918 raise NotImplementedError() 

919 

920 @abstractmethod 

921 def remove(self, datasetRef: DatasetRef) -> None: 

922 """Indicate to the Datastore that a Dataset can be removed. 

923 

924 Parameters 

925 ---------- 

926 datasetRef : `DatasetRef` 

927 Reference to the required Dataset. 

928 

929 Raises 

930 ------ 

931 FileNotFoundError 

932 When Dataset does not exist. 

933 

934 Notes 

935 ----- 

936 Some Datastores may implement this method as a silent no-op to 

937 disable Dataset deletion through standard interfaces. 

938 """ 

939 raise NotImplementedError("Must be implemented by subclass") 

940 

941 @abstractmethod 

942 def forget(self, refs: Iterable[DatasetRef]) -> None: 

943 """Indicate to the Datastore that it should remove all records of the 

944 given datasets, without actually deleting them. 

945 

946 Parameters 

947 ---------- 

948 refs : `Iterable` [ `DatasetRef` ] 

949 References to the datasets being forgotten. 

950 

951 Notes 

952 ----- 

953 Asking a datastore to forget a `DatasetRef` it does not hold should be 

954 a silent no-op, not an error. 

955 """ 

956 raise NotImplementedError("Must be implemented by subclass") 

957 

958 @abstractmethod 

959 def trash(self, ref: Union[DatasetRef, Iterable[DatasetRef]], ignore_errors: bool = True) -> None: 

960 """Indicate to the Datastore that a Dataset can be moved to the trash. 

961 

962 Parameters 

963 ---------- 

964 ref : `DatasetRef` or iterable thereof 

965 Reference(s) to the required Dataset. 

966 ignore_errors : `bool`, optional 

967 Determine whether errors should be ignored. When multiple 

968 refs are being trashed there will be no per-ref check. 

969 

970 Raises 

971 ------ 

972 FileNotFoundError 

973 When Dataset does not exist and errors are not ignored. Only 

974 checked if a single ref is supplied (and not in a list). 

975 

976 Notes 

977 ----- 

978 Some Datastores may implement this method as a silent no-op to 

979 disable Dataset deletion through standard interfaces. 

980 """ 

981 raise NotImplementedError("Must be implemented by subclass") 

982 

983 @abstractmethod 

984 def emptyTrash(self, ignore_errors: bool = True) -> None: 

985 """Remove all datasets from the trash. 

986 

987 Parameters 

988 ---------- 

989 ignore_errors : `bool`, optional 

990 Determine whether errors should be ignored. 

991 

992 Notes 

993 ----- 

994 Some Datastores may implement this method as a silent no-op to 

995 disable Dataset deletion through standard interfaces. 

996 """ 

997 raise NotImplementedError("Must be implemented by subclass") 

998 

999 @abstractmethod 

1000 def transfer(self, inputDatastore: Datastore, datasetRef: DatasetRef) -> None: 

1001 """Transfer a dataset from another datastore to this datastore. 

1002 

1003 Parameters 

1004 ---------- 

1005 inputDatastore : `Datastore` 

1006 The external `Datastore` from which to retrieve the Dataset. 

1007 datasetRef : `DatasetRef` 

1008 Reference to the required Dataset. 

1009 """ 

1010 raise NotImplementedError("Must be implemented by subclass") 

1011 

1012 def export( 

1013 self, refs: Iterable[DatasetRef], *, directory: Optional[str] = None, transfer: Optional[str] = None 

1014 ) -> Iterable[FileDataset]: 

1015 """Export datasets for transfer to another data repository. 

1016 

1017 Parameters 

1018 ---------- 

1019 refs : iterable of `DatasetRef` 

1020 Dataset references to be exported. 

1021 directory : `str`, optional 

1022 Path to a directory that should contain files corresponding to 

1023 output datasets. Ignored if ``transfer`` is `None`. 

1024 transfer : `str`, optional 

1025 Mode that should be used to move datasets out of the repository. 

1026 Valid options are the same as those of the ``transfer`` argument 

1027 to ``ingest``, and datastores may similarly signal that a transfer 

1028 mode is not supported by raising `NotImplementedError`. 

1029 

1030 Returns 

1031 ------- 

1032 dataset : iterable of `DatasetTransfer` 

1033 Structs containing information about the exported datasets, in the 

1034 same order as ``refs``. 

1035 

1036 Raises 

1037 ------ 

1038 NotImplementedError 

1039 Raised if the given transfer mode is not supported. 

1040 """ 

1041 raise NotImplementedError(f"Transfer mode {transfer} not supported.") 

1042 

1043 @abstractmethod 

1044 def validateConfiguration( 

1045 self, entities: Iterable[Union[DatasetRef, DatasetType, StorageClass]], logFailures: bool = False 

1046 ) -> None: 

1047 """Validate some of the configuration for this datastore. 

1048 

1049 Parameters 

1050 ---------- 

1051 entities : iterable of `DatasetRef`, `DatasetType`, or `StorageClass` 

1052 Entities to test against this configuration. Can be differing 

1053 types. 

1054 logFailures : `bool`, optional 

1055 If `True`, output a log message for every validation error 

1056 detected. 

1057 

1058 Raises 

1059 ------ 

1060 DatastoreValidationError 

1061 Raised if there is a validation problem with a configuration. 

1062 

1063 Notes 

1064 ----- 

1065 Which parts of the configuration are validated is at the discretion 

1066 of each Datastore implementation. 

1067 """ 

1068 raise NotImplementedError("Must be implemented by subclass") 

1069 

1070 @abstractmethod 

1071 def validateKey(self, lookupKey: LookupKey, entity: Union[DatasetRef, DatasetType, StorageClass]) -> None: 

1072 """Validate a specific look up key with supplied entity. 

1073 

1074 Parameters 

1075 ---------- 

1076 lookupKey : `LookupKey` 

1077 Key to use to retrieve information from the datastore 

1078 configuration. 

1079 entity : `DatasetRef`, `DatasetType`, or `StorageClass` 

1080 Entity to compare with configuration retrieved using the 

1081 specified lookup key. 

1082 

1083 Raises 

1084 ------ 

1085 DatastoreValidationError 

1086 Raised if there is a problem with the combination of entity 

1087 and lookup key. 

1088 

1089 Notes 

1090 ----- 

1091 Bypasses the normal selection priorities by allowing a key that 

1092 would normally not be selected to be validated. 

1093 """ 

1094 raise NotImplementedError("Must be implemented by subclass") 

1095 

1096 @abstractmethod 

1097 def getLookupKeys(self) -> Set[LookupKey]: 

1098 """Return all the lookup keys relevant to this datastore. 

1099 

1100 Returns 

1101 ------- 

1102 keys : `set` of `LookupKey` 

1103 The keys stored internally for looking up information based 

1104 on `DatasetType` name or `StorageClass`. 

1105 """ 

1106 raise NotImplementedError("Must be implemented by subclass") 

1107 

1108 def needs_expanded_data_ids( 

1109 self, 

1110 transfer: Optional[str], 

1111 entity: Optional[Union[DatasetRef, DatasetType, StorageClass]] = None, 

1112 ) -> bool: 

1113 """Test whether this datastore needs expanded data IDs to ingest. 

1114 

1115 Parameters 

1116 ---------- 

1117 transfer : `str` or `None` 

1118 Transfer mode for ingest. 

1119 entity, optional 

1120 Object representing what will be ingested. If not provided (or not 

1121 specific enough), `True` may be returned even if expanded data 

1122 IDs aren't necessary. 

1123 

1124 Returns 

1125 ------- 

1126 needed : `bool` 

1127 If `True`, expanded data IDs may be needed. `False` only if 

1128 expansion definitely isn't necessary. 

1129 """ 

1130 return True 

1131 

1132 @abstractmethod 

1133 def import_records( 

1134 self, 

1135 data: Mapping[str, DatastoreRecordData], 

1136 ) -> None: 

1137 """Import datastore location and record data from an in-memory data 

1138 structure. 

1139 

1140 Parameters 

1141 ---------- 

1142 data : `Mapping` [ `str`, `DatastoreRecordData` ] 

1143 Datastore records indexed by datastore name. May contain data for 

1144 other `Datastore` instances (generally because they are chained to 

1145 this one), which should be ignored. 

1146 

1147 Notes 

1148 ----- 

1149 Implementations should generally not check that any external resources 

1150 (e.g. files) referred to by these records actually exist, for 

1151 performance reasons; we expect higher-level code to guarantee that they 

1152 do. 

1153 

1154 Implementations are responsible for calling 

1155 `DatastoreRegistryBridge.insert` on all datasets in ``data.locations`` 

1156 where the key is in `names`, as well as loading any opaque table data. 

1157 """ 

1158 raise NotImplementedError() 

1159 

1160 @abstractmethod 

1161 def export_records( 

1162 self, 

1163 refs: Iterable[DatasetIdRef], 

1164 ) -> Mapping[str, DatastoreRecordData]: 

1165 """Export datastore records and locations to an in-memory data 

1166 structure. 

1167 

1168 Parameters 

1169 ---------- 

1170 refs : `Iterable` [ `DatasetIdRef` ] 

1171 Datasets to save. This may include datasets not known to this 

1172 datastore, which should be ignored. 

1173 

1174 Returns 

1175 ------- 

1176 data : `Mapping` [ `str`, `DatastoreRecordData` ] 

1177 Exported datastore records indexed by datastore name. 

1178 """ 

1179 raise NotImplementedError()