Coverage for python/lsst/daf/butler/core/datastore.py: 42%

244 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-12-08 14:18 -0800

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Support for generic data stores.""" 

23 

24from __future__ import annotations 

25 

26__all__ = ("DatastoreConfig", "Datastore", "DatastoreValidationError", "DatasetRefURIs") 

27 

28import contextlib 

29import dataclasses 

30import logging 

31from abc import ABCMeta, abstractmethod 

32from collections import abc, defaultdict 

33from typing import ( 

34 TYPE_CHECKING, 

35 Any, 

36 Callable, 

37 ClassVar, 

38 Dict, 

39 Iterable, 

40 Iterator, 

41 List, 

42 Mapping, 

43 Optional, 

44 Set, 

45 Tuple, 

46 Type, 

47 Union, 

48) 

49 

50from lsst.utils import doImportType 

51 

52from .config import Config, ConfigSubset 

53from .constraints import Constraints 

54from .exceptions import DatasetTypeNotSupportedError, ValidationError 

55from .fileDataset import FileDataset 

56from .storageClass import StorageClassFactory 

57 

58if TYPE_CHECKING: 58 ↛ 59line 58 didn't jump to line 59, because the condition on line 58 was never true

59 from lsst.resources import ResourcePath, ResourcePathExpression 

60 

61 from ..registry.interfaces import DatasetIdRef, DatastoreRegistryBridgeManager 

62 from .configSupport import LookupKey 

63 from .datasets import DatasetRef, DatasetType 

64 from .datastoreRecordData import DatastoreRecordData 

65 from .storageClass import StorageClass 

66 

67 

68class DatastoreConfig(ConfigSubset): 

69 """Configuration for Datastores.""" 

70 

71 component = "datastore" 

72 requiredKeys = ("cls",) 

73 defaultConfigFile = "datastore.yaml" 

74 

75 

76class DatastoreValidationError(ValidationError): 

77 """There is a problem with the Datastore configuration.""" 

78 

79 pass 

80 

81 

82@dataclasses.dataclass(frozen=True) 

83class Event: 

84 __slots__ = {"name", "undoFunc", "args", "kwargs"} 

85 name: str 

86 undoFunc: Callable 

87 args: tuple 

88 kwargs: dict 

89 

90 

91class IngestPrepData: 

92 """A helper base class for `Datastore` ingest implementations. 

93 

94 Datastore implementations will generally need a custom implementation of 

95 this class. 

96 

97 Should be accessed as ``Datastore.IngestPrepData`` instead of via direct 

98 import. 

99 

100 Parameters 

101 ---------- 

102 refs : iterable of `DatasetRef` 

103 References for the datasets that can be ingested by this datastore. 

104 """ 

105 

106 def __init__(self, refs: Iterable[DatasetRef]): 

107 self.refs = {ref.id: ref for ref in refs} 

108 

109 

110class DatastoreTransaction: 

111 """Keeps a log of `Datastore` activity and allow rollback. 

112 

113 Parameters 

114 ---------- 

115 parent : `DatastoreTransaction`, optional 

116 The parent transaction (if any) 

117 """ 

118 

119 Event: ClassVar[Type] = Event 

120 

121 parent: Optional[DatastoreTransaction] 

122 """The parent transaction. (`DatastoreTransaction`, optional)""" 

123 

124 def __init__(self, parent: Optional[DatastoreTransaction] = None): 

125 self.parent = parent 

126 self._log: List[Event] = [] 

127 

128 def registerUndo(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> None: 

129 """Register event with undo function. 

130 

131 Parameters 

132 ---------- 

133 name : `str` 

134 Name of the event. 

135 undoFunc : func 

136 Function to undo this event. 

137 args : `tuple` 

138 Positional arguments to `undoFunc`. 

139 **kwargs 

140 Keyword arguments to `undoFunc`. 

141 """ 

142 self._log.append(self.Event(name, undoFunc, args, kwargs)) 

143 

144 @contextlib.contextmanager 

145 def undoWith(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> Iterator[None]: 

146 """Register undo function if nested operation succeeds. 

147 

148 Calls `registerUndo`. 

149 

150 This can be used to wrap individual undo-able statements within a 

151 DatastoreTransaction block. Multiple statements that can fail 

152 separately should not be part of the same `undoWith` block. 

153 

154 All arguments are forwarded directly to `registerUndo`. 

155 """ 

156 try: 

157 yield None 

158 except BaseException: 

159 raise 

160 else: 

161 self.registerUndo(name, undoFunc, *args, **kwargs) 

162 

163 def rollback(self) -> None: 

164 """Roll back all events in this transaction.""" 

165 log = logging.getLogger(__name__) 

166 while self._log: 

167 ev = self._log.pop() 

168 try: 

169 log.debug( 

170 "Rolling back transaction: %s: %s(%s,%s)", 

171 ev.name, 

172 ev.undoFunc, 

173 ",".join(str(a) for a in ev.args), 

174 ",".join(f"{k}={v}" for k, v in ev.kwargs.items()), 

175 ) 

176 except Exception: 

177 # In case we had a problem in stringification of arguments 

178 log.warning("Rolling back transaction: %s", ev.name) 

179 try: 

180 ev.undoFunc(*ev.args, **ev.kwargs) 

181 except BaseException as e: 

182 # Deliberately swallow error that may occur in unrolling 

183 log.warning("Exception: %s caught while unrolling: %s", e, ev.name) 

184 pass 

185 

186 def commit(self) -> None: 

187 """Commit this transaction.""" 

188 if self.parent is None: 

189 # Just forget about the events, they have already happened. 

190 return 

191 else: 

192 # We may still want to events from this transaction as part of 

193 # the parent. 

194 self.parent._log.extend(self._log) 

195 

196 

197@dataclasses.dataclass 

198class DatasetRefURIs(abc.Sequence): 

199 """Represents the primary and component ResourcePath(s) associated with a 

200 DatasetRef. 

201 

202 This is used in places where its members used to be represented as a tuple 

203 `(primaryURI, componentURIs)`. To maintain backward compatibility this 

204 inherits from Sequence and so instances can be treated as a two-item 

205 tuple. 

206 """ 

207 

208 def __init__( 

209 self, 

210 primaryURI: Optional[ResourcePath] = None, 

211 componentURIs: Optional[Dict[str, ResourcePath]] = None, 

212 ): 

213 

214 self.primaryURI = primaryURI 

215 """The URI to the primary artifact associated with this dataset. If the 

216 dataset was disassembled within the datastore this may be `None`. 

217 """ 

218 

219 self.componentURIs = componentURIs or {} 

220 """The URIs to any components associated with the dataset artifact 

221 indexed by component name. This can be empty if there are no 

222 components. 

223 """ 

224 

225 def __getitem__(self, index: Any) -> Any: 

226 """Get primaryURI and componentURIs by index. 

227 

228 Provides support for tuple-like access. 

229 """ 

230 if index == 0: 

231 return self.primaryURI 

232 elif index == 1: 

233 return self.componentURIs 

234 raise IndexError("list index out of range") 

235 

236 def __len__(self) -> int: 

237 """Get the number of data members. 

238 

239 Provides support for tuple-like access. 

240 """ 

241 return 2 

242 

243 def __repr__(self) -> str: 

244 return f"DatasetRefURIs({repr(self.primaryURI)}, {repr(self.componentURIs)})" 

245 

246 

247class Datastore(metaclass=ABCMeta): 

248 """Datastore interface. 

249 

250 Parameters 

251 ---------- 

252 config : `DatastoreConfig` or `str` 

253 Load configuration either from an existing config instance or by 

254 referring to a configuration file. 

255 bridgeManager : `DatastoreRegistryBridgeManager` 

256 Object that manages the interface between `Registry` and datastores. 

257 butlerRoot : `str`, optional 

258 New datastore root to use to override the configuration value. 

259 """ 

260 

261 defaultConfigFile: ClassVar[Optional[str]] = None 

262 """Path to configuration defaults. Accessed within the ``config`` resource 

263 or relative to a search path. Can be None if no defaults specified. 

264 """ 

265 

266 containerKey: ClassVar[Optional[str]] = None 

267 """Name of the key containing a list of subconfigurations that also 

268 need to be merged with defaults and will likely use different Python 

269 datastore classes (but all using DatastoreConfig). Assumed to be a 

270 list of configurations that can be represented in a DatastoreConfig 

271 and containing a "cls" definition. None indicates that no containers 

272 are expected in this Datastore.""" 

273 

274 isEphemeral: bool = False 

275 """Indicate whether this Datastore is ephemeral or not. An ephemeral 

276 datastore is one where the contents of the datastore will not exist 

277 across process restarts. This value can change per-instance.""" 

278 

279 config: DatastoreConfig 

280 """Configuration used to create Datastore.""" 

281 

282 name: str 

283 """Label associated with this Datastore.""" 

284 

285 storageClassFactory: StorageClassFactory 

286 """Factory for creating storage class instances from name.""" 

287 

288 constraints: Constraints 

289 """Constraints to apply when putting datasets into the datastore.""" 

290 

291 # MyPy does not like for this to be annotated as any kind of type, because 

292 # it can't do static checking on type variables that can change at runtime. 

293 IngestPrepData: ClassVar[Any] = IngestPrepData 

294 """Helper base class for ingest implementations. 

295 """ 

296 

297 @classmethod 

298 @abstractmethod 

299 def setConfigRoot(cls, root: str, config: Config, full: Config, overwrite: bool = True) -> None: 

300 """Set filesystem-dependent config options for this datastore. 

301 

302 The options will be appropriate for a new empty repository with the 

303 given root. 

304 

305 Parameters 

306 ---------- 

307 root : `str` 

308 Filesystem path to the root of the data repository. 

309 config : `Config` 

310 A `Config` to update. Only the subset understood by 

311 this component will be updated. Will not expand 

312 defaults. 

313 full : `Config` 

314 A complete config with all defaults expanded that can be 

315 converted to a `DatastoreConfig`. Read-only and will not be 

316 modified by this method. 

317 Repository-specific options that should not be obtained 

318 from defaults when Butler instances are constructed 

319 should be copied from ``full`` to ``config``. 

320 overwrite : `bool`, optional 

321 If `False`, do not modify a value in ``config`` if the value 

322 already exists. Default is always to overwrite with the provided 

323 ``root``. 

324 

325 Notes 

326 ----- 

327 If a keyword is explicitly defined in the supplied ``config`` it 

328 will not be overridden by this method if ``overwrite`` is `False`. 

329 This allows explicit values set in external configs to be retained. 

330 """ 

331 raise NotImplementedError() 

332 

333 @staticmethod 

334 def fromConfig( 

335 config: Config, 

336 bridgeManager: DatastoreRegistryBridgeManager, 

337 butlerRoot: Optional[ResourcePathExpression] = None, 

338 ) -> "Datastore": 

339 """Create datastore from type specified in config file. 

340 

341 Parameters 

342 ---------- 

343 config : `Config` 

344 Configuration instance. 

345 bridgeManager : `DatastoreRegistryBridgeManager` 

346 Object that manages the interface between `Registry` and 

347 datastores. 

348 butlerRoot : `str`, optional 

349 Butler root directory. 

350 """ 

351 cls = doImportType(config["datastore", "cls"]) 

352 if not issubclass(cls, Datastore): 

353 raise TypeError(f"Imported child class {config['datastore', 'cls']} is not a Datastore") 

354 return cls(config=config, bridgeManager=bridgeManager, butlerRoot=butlerRoot) 

355 

356 def __init__( 

357 self, 

358 config: Union[Config, str], 

359 bridgeManager: DatastoreRegistryBridgeManager, 

360 butlerRoot: Optional[ResourcePathExpression] = None, 

361 ): 

362 self.config = DatastoreConfig(config) 

363 self.name = "ABCDataStore" 

364 self._transaction: Optional[DatastoreTransaction] = None 

365 

366 # All Datastores need storage classes and constraints 

367 self.storageClassFactory = StorageClassFactory() 

368 

369 # And read the constraints list 

370 constraintsConfig = self.config.get("constraints") 

371 self.constraints = Constraints(constraintsConfig, universe=bridgeManager.universe) 

372 

373 def __str__(self) -> str: 

374 return self.name 

375 

376 def __repr__(self) -> str: 

377 return self.name 

378 

379 @property 

380 def names(self) -> Tuple[str, ...]: 

381 """Names associated with this datastore returned as a list. 

382 

383 Can be different to ``name`` for a chaining datastore. 

384 """ 

385 # Default implementation returns solely the name itself 

386 return (self.name,) 

387 

388 @contextlib.contextmanager 

389 def transaction(self) -> Iterator[DatastoreTransaction]: 

390 """Context manager supporting `Datastore` transactions. 

391 

392 Transactions can be nested, and are to be used in combination with 

393 `Registry.transaction`. 

394 """ 

395 self._transaction = DatastoreTransaction(self._transaction) 

396 try: 

397 yield self._transaction 

398 except BaseException: 

399 self._transaction.rollback() 

400 raise 

401 else: 

402 self._transaction.commit() 

403 self._transaction = self._transaction.parent 

404 

405 @abstractmethod 

406 def knows(self, ref: DatasetRef) -> bool: 

407 """Check if the dataset is known to the datastore. 

408 

409 Does not check for existence of any artifact. 

410 

411 Parameters 

412 ---------- 

413 ref : `DatasetRef` 

414 Reference to the required dataset. 

415 

416 Returns 

417 ------- 

418 exists : `bool` 

419 `True` if the dataset is known to the datastore. 

420 """ 

421 raise NotImplementedError() 

422 

423 def mexists( 

424 self, refs: Iterable[DatasetRef], artifact_existence: Optional[Dict[ResourcePath, bool]] = None 

425 ) -> Dict[DatasetRef, bool]: 

426 """Check the existence of multiple datasets at once. 

427 

428 Parameters 

429 ---------- 

430 refs : iterable of `DatasetRef` 

431 The datasets to be checked. 

432 artifact_existence : `dict` [`lsst.resources.ResourcePath`, `bool`] 

433 Optional mapping of datastore artifact to existence. Updated by 

434 this method with details of all artifacts tested. Can be `None` 

435 if the caller is not interested. 

436 

437 Returns 

438 ------- 

439 existence : `dict` of [`DatasetRef`, `bool`] 

440 Mapping from dataset to boolean indicating existence. 

441 """ 

442 existence: Dict[DatasetRef, bool] = {} 

443 # Non-optimized default. 

444 for ref in refs: 

445 existence[ref] = self.exists(ref) 

446 return existence 

447 

448 @abstractmethod 

449 def exists(self, datasetRef: DatasetRef) -> bool: 

450 """Check if the dataset exists in the datastore. 

451 

452 Parameters 

453 ---------- 

454 datasetRef : `DatasetRef` 

455 Reference to the required dataset. 

456 

457 Returns 

458 ------- 

459 exists : `bool` 

460 `True` if the entity exists in the `Datastore`. 

461 """ 

462 raise NotImplementedError("Must be implemented by subclass") 

463 

464 @abstractmethod 

465 def get(self, datasetRef: DatasetRef, parameters: Mapping[str, Any] = None) -> Any: 

466 """Load an `InMemoryDataset` from the store. 

467 

468 Parameters 

469 ---------- 

470 datasetRef : `DatasetRef` 

471 Reference to the required Dataset. 

472 parameters : `dict` 

473 `StorageClass`-specific parameters that specify a slice of the 

474 Dataset to be loaded. 

475 

476 Returns 

477 ------- 

478 inMemoryDataset : `object` 

479 Requested Dataset or slice thereof as an InMemoryDataset. 

480 """ 

481 raise NotImplementedError("Must be implemented by subclass") 

482 

483 @abstractmethod 

484 def put(self, inMemoryDataset: Any, datasetRef: DatasetRef) -> None: 

485 """Write a `InMemoryDataset` with a given `DatasetRef` to the store. 

486 

487 Parameters 

488 ---------- 

489 inMemoryDataset : `object` 

490 The Dataset to store. 

491 datasetRef : `DatasetRef` 

492 Reference to the associated Dataset. 

493 """ 

494 raise NotImplementedError("Must be implemented by subclass") 

495 

496 def _overrideTransferMode(self, *datasets: FileDataset, transfer: Optional[str] = None) -> Optional[str]: 

497 """Allow ingest transfer mode to be defaulted based on datasets. 

498 

499 Parameters 

500 ---------- 

501 datasets : `FileDataset` 

502 Each positional argument is a struct containing information about 

503 a file to be ingested, including its path (either absolute or 

504 relative to the datastore root, if applicable), a complete 

505 `DatasetRef` (with ``dataset_id not None``), and optionally a 

506 formatter class or its fully-qualified string name. If a formatter 

507 is not provided, this method should populate that attribute with 

508 the formatter the datastore would use for `put`. Subclasses are 

509 also permitted to modify the path attribute (typically to put it 

510 in what the datastore considers its standard form). 

511 transfer : `str`, optional 

512 How (and whether) the dataset should be added to the datastore. 

513 See `ingest` for details of transfer modes. 

514 

515 Returns 

516 ------- 

517 newTransfer : `str` 

518 Transfer mode to use. Will be identical to the supplied transfer 

519 mode unless "auto" is used. 

520 """ 

521 if transfer != "auto": 

522 return transfer 

523 raise RuntimeError(f"{transfer} is not allowed without specialization.") 

524 

525 def _prepIngest(self, *datasets: FileDataset, transfer: Optional[str] = None) -> IngestPrepData: 

526 """Process datasets to identify which ones can be ingested. 

527 

528 Parameters 

529 ---------- 

530 datasets : `FileDataset` 

531 Each positional argument is a struct containing information about 

532 a file to be ingested, including its path (either absolute or 

533 relative to the datastore root, if applicable), a complete 

534 `DatasetRef` (with ``dataset_id not None``), and optionally a 

535 formatter class or its fully-qualified string name. If a formatter 

536 is not provided, this method should populate that attribute with 

537 the formatter the datastore would use for `put`. Subclasses are 

538 also permitted to modify the path attribute (typically to put it 

539 in what the datastore considers its standard form). 

540 transfer : `str`, optional 

541 How (and whether) the dataset should be added to the datastore. 

542 See `ingest` for details of transfer modes. 

543 

544 Returns 

545 ------- 

546 data : `IngestPrepData` 

547 An instance of a subclass of `IngestPrepData`, used to pass 

548 arbitrary data from `_prepIngest` to `_finishIngest`. This should 

549 include only the datasets this datastore can actually ingest; 

550 others should be silently ignored (`Datastore.ingest` will inspect 

551 `IngestPrepData.refs` and raise `DatasetTypeNotSupportedError` if 

552 necessary). 

553 

554 Raises 

555 ------ 

556 NotImplementedError 

557 Raised if the datastore does not support the given transfer mode 

558 (including the case where ingest is not supported at all). 

559 FileNotFoundError 

560 Raised if one of the given files does not exist. 

561 FileExistsError 

562 Raised if transfer is not `None` but the (internal) location the 

563 file would be moved to is already occupied. 

564 

565 Notes 

566 ----- 

567 This method (along with `_finishIngest`) should be implemented by 

568 subclasses to provide ingest support instead of implementing `ingest` 

569 directly. 

570 

571 `_prepIngest` should not modify the data repository or given files in 

572 any way; all changes should be deferred to `_finishIngest`. 

573 

574 When possible, exceptions should be raised in `_prepIngest` instead of 

575 `_finishIngest`. `NotImplementedError` exceptions that indicate that 

576 the transfer mode is not supported must be raised by `_prepIngest` 

577 instead of `_finishIngest`. 

578 """ 

579 raise NotImplementedError(f"Datastore {self} does not support direct file-based ingest.") 

580 

581 def _finishIngest( 

582 self, prepData: IngestPrepData, *, transfer: Optional[str] = None, record_validation_info: bool = True 

583 ) -> None: 

584 """Complete an ingest operation. 

585 

586 Parameters 

587 ---------- 

588 data : `IngestPrepData` 

589 An instance of a subclass of `IngestPrepData`. Guaranteed to be 

590 the direct result of a call to `_prepIngest` on this datastore. 

591 transfer : `str`, optional 

592 How (and whether) the dataset should be added to the datastore. 

593 See `ingest` for details of transfer modes. 

594 record_validation_info : `bool`, optional 

595 If `True`, the default, the datastore can record validation 

596 information associated with the file. If `False` the datastore 

597 will not attempt to track any information such as checksums 

598 or file sizes. This can be useful if such information is tracked 

599 in an external system or if the file is to be compressed in place. 

600 It is up to the datastore whether this parameter is relevant. 

601 

602 Raises 

603 ------ 

604 FileNotFoundError 

605 Raised if one of the given files does not exist. 

606 FileExistsError 

607 Raised if transfer is not `None` but the (internal) location the 

608 file would be moved to is already occupied. 

609 

610 Notes 

611 ----- 

612 This method (along with `_prepIngest`) should be implemented by 

613 subclasses to provide ingest support instead of implementing `ingest` 

614 directly. 

615 """ 

616 raise NotImplementedError(f"Datastore {self} does not support direct file-based ingest.") 

617 

618 def ingest( 

619 self, *datasets: FileDataset, transfer: Optional[str] = None, record_validation_info: bool = True 

620 ) -> None: 

621 """Ingest one or more files into the datastore. 

622 

623 Parameters 

624 ---------- 

625 datasets : `FileDataset` 

626 Each positional argument is a struct containing information about 

627 a file to be ingested, including its path (either absolute or 

628 relative to the datastore root, if applicable), a complete 

629 `DatasetRef` (with ``dataset_id not None``), and optionally a 

630 formatter class or its fully-qualified string name. If a formatter 

631 is not provided, the one the datastore would use for ``put`` on 

632 that dataset is assumed. 

633 transfer : `str`, optional 

634 How (and whether) the dataset should be added to the datastore. 

635 If `None` (default), the file must already be in a location 

636 appropriate for the datastore (e.g. within its root directory), 

637 and will not be modified. Other choices include "move", "copy", 

638 "link", "symlink", "relsymlink", and "hardlink". "link" is a 

639 special transfer mode that will first try to make a hardlink and 

640 if that fails a symlink will be used instead. "relsymlink" creates 

641 a relative symlink rather than use an absolute path. 

642 Most datastores do not support all transfer modes. 

643 "auto" is a special option that will let the 

644 data store choose the most natural option for itself. 

645 record_validation_info : `bool`, optional 

646 If `True`, the default, the datastore can record validation 

647 information associated with the file. If `False` the datastore 

648 will not attempt to track any information such as checksums 

649 or file sizes. This can be useful if such information is tracked 

650 in an external system or if the file is to be compressed in place. 

651 It is up to the datastore whether this parameter is relevant. 

652 

653 Raises 

654 ------ 

655 NotImplementedError 

656 Raised if the datastore does not support the given transfer mode 

657 (including the case where ingest is not supported at all). 

658 DatasetTypeNotSupportedError 

659 Raised if one or more files to be ingested have a dataset type that 

660 is not supported by the datastore. 

661 FileNotFoundError 

662 Raised if one of the given files does not exist. 

663 FileExistsError 

664 Raised if transfer is not `None` but the (internal) location the 

665 file would be moved to is already occupied. 

666 

667 Notes 

668 ----- 

669 Subclasses should implement `_prepIngest` and `_finishIngest` instead 

670 of implementing `ingest` directly. Datastores that hold and 

671 delegate to child datastores may want to call those methods as well. 

672 

673 Subclasses are encouraged to document their supported transfer modes 

674 in their class documentation. 

675 """ 

676 # Allow a datastore to select a default transfer mode 

677 transfer = self._overrideTransferMode(*datasets, transfer=transfer) 

678 prepData = self._prepIngest(*datasets, transfer=transfer) 

679 refs = {ref.id: ref for dataset in datasets for ref in dataset.refs} 

680 if None in refs: 

681 # Find the file for the error message. There may be multiple 

682 # bad refs so look for all of them. 

683 unresolved_paths = {} 

684 for dataset in datasets: 

685 unresolved = [] 

686 for ref in dataset.refs: 

687 if ref.id is None: 

688 unresolved.append(ref) 

689 if unresolved: 

690 unresolved_paths[dataset.path] = unresolved 

691 raise RuntimeError( 

692 "Attempt to ingest unresolved DatasetRef from: " 

693 + ",".join(f"{p}: ({[str(r) for r in ref]})" for p, ref in unresolved_paths.items()) 

694 ) 

695 if refs.keys() != prepData.refs.keys(): 

696 unsupported = refs.keys() - prepData.refs.keys() 

697 # Group unsupported refs by DatasetType for an informative 

698 # but still concise error message. 

699 byDatasetType = defaultdict(list) 

700 for datasetId in unsupported: 

701 ref = refs[datasetId] 

702 byDatasetType[ref.datasetType].append(ref) 

703 raise DatasetTypeNotSupportedError( 

704 "DatasetType(s) not supported in ingest: " 

705 + ", ".join(f"{k.name} ({len(v)} dataset(s))" for k, v in byDatasetType.items()) 

706 ) 

707 self._finishIngest(prepData, transfer=transfer, record_validation_info=record_validation_info) 

708 

709 def transfer_from( 

710 self, 

711 source_datastore: Datastore, 

712 refs: Iterable[DatasetRef], 

713 local_refs: Optional[Iterable[DatasetRef]] = None, 

714 transfer: str = "auto", 

715 artifact_existence: Optional[Dict[ResourcePath, bool]] = None, 

716 ) -> None: 

717 """Transfer dataset artifacts from another datastore to this one. 

718 

719 Parameters 

720 ---------- 

721 source_datastore : `Datastore` 

722 The datastore from which to transfer artifacts. That datastore 

723 must be compatible with this datastore receiving the artifacts. 

724 refs : iterable of `DatasetRef` 

725 The datasets to transfer from the source datastore. 

726 local_refs : iterable of `DatasetRef`, optional 

727 The dataset refs associated with the registry associated with 

728 this datastore. Can be `None` if the source and target datastore 

729 are using UUIDs. 

730 transfer : `str`, optional 

731 How (and whether) the dataset should be added to the datastore. 

732 Choices include "move", "copy", 

733 "link", "symlink", "relsymlink", and "hardlink". "link" is a 

734 special transfer mode that will first try to make a hardlink and 

735 if that fails a symlink will be used instead. "relsymlink" creates 

736 a relative symlink rather than use an absolute path. 

737 Most datastores do not support all transfer modes. 

738 "auto" (the default) is a special option that will let the 

739 data store choose the most natural option for itself. 

740 If the source location and transfer location are identical the 

741 transfer mode will be ignored. 

742 artifact_existence : `dict` [`lsst.resources.ResourcePath`, `bool`] 

743 Optional mapping of datastore artifact to existence. Updated by 

744 this method with details of all artifacts tested. Can be `None` 

745 if the caller is not interested. 

746 

747 Raises 

748 ------ 

749 TypeError 

750 Raised if the two datastores are not compatible. 

751 """ 

752 if type(self) is not type(source_datastore): 

753 raise TypeError( 

754 f"Datastore mismatch between this datastore ({type(self)}) and the " 

755 f"source datastore ({type(source_datastore)})." 

756 ) 

757 

758 raise NotImplementedError(f"Datastore {type(self)} must implement a transfer_from method.") 

759 

760 def getManyURIs( 

761 self, 

762 refs: Iterable[DatasetRef], 

763 predict: bool = False, 

764 allow_missing: bool = False, 

765 ) -> Dict[DatasetRef, DatasetRefURIs]: 

766 """Return URIs associated with many datasets. 

767 

768 Parameters 

769 ---------- 

770 refs : iterable of `DatasetIdRef` 

771 References to the required datasets. 

772 predict : `bool`, optional 

773 If the datastore does not know about a dataset, should it 

774 return a predicted URI or not? 

775 allow_missing : `bool` 

776 If `False`, and `predict` is `False`, will raise if a `DatasetRef` 

777 does not exist. 

778 

779 Returns 

780 ------- 

781 URIs : `dict` of [`DatasetRef`, `DatasetRefUris`] 

782 A dict of primary and component URIs, indexed by the passed-in 

783 refs. 

784 

785 Raises 

786 ------ 

787 FileNotFoundError 

788 A URI has been requested for a dataset that does not exist and 

789 guessing is not allowed. 

790 

791 Notes 

792 ----- 

793 In file-based datastores, getManuURIs does not check that the file is 

794 really there, it's assuming it is if datastore is aware of the file 

795 then it actually exists. 

796 """ 

797 uris: Dict[DatasetRef, DatasetRefURIs] = {} 

798 missing_refs = [] 

799 for ref in refs: 

800 try: 

801 uris[ref] = self.getURIs(ref, predict=predict) 

802 except FileNotFoundError: 

803 missing_refs.append(ref) 

804 if missing_refs and not allow_missing: 

805 raise FileNotFoundError( 

806 "Missing {} refs from datastore out of {} and predict=False.".format( 

807 num_missing := len(missing_refs), num_missing + len(uris) 

808 ) 

809 ) 

810 return uris 

811 

812 @abstractmethod 

813 def getURIs(self, datasetRef: DatasetRef, predict: bool = False) -> DatasetRefURIs: 

814 """Return URIs associated with dataset. 

815 

816 Parameters 

817 ---------- 

818 ref : `DatasetRef` 

819 Reference to the required dataset. 

820 predict : `bool`, optional 

821 If the datastore does not know about the dataset, should it 

822 return a predicted URI or not? 

823 

824 Returns 

825 ------- 

826 uris : `DatasetRefURIs` 

827 The URI to the primary artifact associated with this dataset (if 

828 the dataset was disassembled within the datastore this may be 

829 `None`), and the URIs to any components associated with the dataset 

830 artifact. (can be empty if there are no components). 

831 """ 

832 raise NotImplementedError() 

833 

834 @abstractmethod 

835 def getURI(self, datasetRef: DatasetRef, predict: bool = False) -> ResourcePath: 

836 """URI to the Dataset. 

837 

838 Parameters 

839 ---------- 

840 datasetRef : `DatasetRef` 

841 Reference to the required Dataset. 

842 predict : `bool` 

843 If `True` attempt to predict the URI for a dataset if it does 

844 not exist in datastore. 

845 

846 Returns 

847 ------- 

848 uri : `str` 

849 URI string pointing to the Dataset within the datastore. If the 

850 Dataset does not exist in the datastore, the URI may be a guess. 

851 If the datastore does not have entities that relate well 

852 to the concept of a URI the returned URI string will be 

853 descriptive. The returned URI is not guaranteed to be obtainable. 

854 

855 Raises 

856 ------ 

857 FileNotFoundError 

858 A URI has been requested for a dataset that does not exist and 

859 guessing is not allowed. 

860 """ 

861 raise NotImplementedError("Must be implemented by subclass") 

862 

863 @abstractmethod 

864 def retrieveArtifacts( 

865 self, 

866 refs: Iterable[DatasetRef], 

867 destination: ResourcePath, 

868 transfer: str = "auto", 

869 preserve_path: bool = True, 

870 overwrite: bool = False, 

871 ) -> List[ResourcePath]: 

872 """Retrieve the artifacts associated with the supplied refs. 

873 

874 Parameters 

875 ---------- 

876 refs : iterable of `DatasetRef` 

877 The datasets for which artifacts are to be retrieved. 

878 A single ref can result in multiple artifacts. The refs must 

879 be resolved. 

880 destination : `lsst.resources.ResourcePath` 

881 Location to write the artifacts. 

882 transfer : `str`, optional 

883 Method to use to transfer the artifacts. Must be one of the options 

884 supported by `lsst.resources.ResourcePath.transfer_from()`. 

885 "move" is not allowed. 

886 preserve_path : `bool`, optional 

887 If `True` the full path of the artifact within the datastore 

888 is preserved. If `False` the final file component of the path 

889 is used. 

890 overwrite : `bool`, optional 

891 If `True` allow transfers to overwrite existing files at the 

892 destination. 

893 

894 Returns 

895 ------- 

896 targets : `list` of `lsst.resources.ResourcePath` 

897 URIs of file artifacts in destination location. Order is not 

898 preserved. 

899 

900 Notes 

901 ----- 

902 For non-file datastores the artifacts written to the destination 

903 may not match the representation inside the datastore. For example 

904 a hierarchichal data structure in a NoSQL database may well be stored 

905 as a JSON file. 

906 """ 

907 raise NotImplementedError() 

908 

909 @abstractmethod 

910 def remove(self, datasetRef: DatasetRef) -> None: 

911 """Indicate to the Datastore that a Dataset can be removed. 

912 

913 Parameters 

914 ---------- 

915 datasetRef : `DatasetRef` 

916 Reference to the required Dataset. 

917 

918 Raises 

919 ------ 

920 FileNotFoundError 

921 When Dataset does not exist. 

922 

923 Notes 

924 ----- 

925 Some Datastores may implement this method as a silent no-op to 

926 disable Dataset deletion through standard interfaces. 

927 """ 

928 raise NotImplementedError("Must be implemented by subclass") 

929 

930 @abstractmethod 

931 def forget(self, refs: Iterable[DatasetRef]) -> None: 

932 """Indicate to the Datastore that it should remove all records of the 

933 given datasets, without actually deleting them. 

934 

935 Parameters 

936 ---------- 

937 refs : `Iterable` [ `DatasetRef` ] 

938 References to the datasets being forgotten. 

939 

940 Notes 

941 ----- 

942 Asking a datastore to forget a `DatasetRef` it does not hold should be 

943 a silent no-op, not an error. 

944 """ 

945 raise NotImplementedError("Must be implemented by subclass") 

946 

947 @abstractmethod 

948 def trash(self, ref: Union[DatasetRef, Iterable[DatasetRef]], ignore_errors: bool = True) -> None: 

949 """Indicate to the Datastore that a Dataset can be moved to the trash. 

950 

951 Parameters 

952 ---------- 

953 ref : `DatasetRef` or iterable thereof 

954 Reference(s) to the required Dataset. 

955 ignore_errors : `bool`, optional 

956 Determine whether errors should be ignored. When multiple 

957 refs are being trashed there will be no per-ref check. 

958 

959 Raises 

960 ------ 

961 FileNotFoundError 

962 When Dataset does not exist and errors are not ignored. Only 

963 checked if a single ref is supplied (and not in a list). 

964 

965 Notes 

966 ----- 

967 Some Datastores may implement this method as a silent no-op to 

968 disable Dataset deletion through standard interfaces. 

969 """ 

970 raise NotImplementedError("Must be implemented by subclass") 

971 

972 @abstractmethod 

973 def emptyTrash(self, ignore_errors: bool = True) -> None: 

974 """Remove all datasets from the trash. 

975 

976 Parameters 

977 ---------- 

978 ignore_errors : `bool`, optional 

979 Determine whether errors should be ignored. 

980 

981 Notes 

982 ----- 

983 Some Datastores may implement this method as a silent no-op to 

984 disable Dataset deletion through standard interfaces. 

985 """ 

986 raise NotImplementedError("Must be implemented by subclass") 

987 

988 @abstractmethod 

989 def transfer(self, inputDatastore: Datastore, datasetRef: DatasetRef) -> None: 

990 """Transfer a dataset from another datastore to this datastore. 

991 

992 Parameters 

993 ---------- 

994 inputDatastore : `Datastore` 

995 The external `Datastore` from which to retrieve the Dataset. 

996 datasetRef : `DatasetRef` 

997 Reference to the required Dataset. 

998 """ 

999 raise NotImplementedError("Must be implemented by subclass") 

1000 

1001 def export( 

1002 self, refs: Iterable[DatasetRef], *, directory: Optional[str] = None, transfer: Optional[str] = None 

1003 ) -> Iterable[FileDataset]: 

1004 """Export datasets for transfer to another data repository. 

1005 

1006 Parameters 

1007 ---------- 

1008 refs : iterable of `DatasetRef` 

1009 Dataset references to be exported. 

1010 directory : `str`, optional 

1011 Path to a directory that should contain files corresponding to 

1012 output datasets. Ignored if ``transfer`` is `None`. 

1013 transfer : `str`, optional 

1014 Mode that should be used to move datasets out of the repository. 

1015 Valid options are the same as those of the ``transfer`` argument 

1016 to ``ingest``, and datastores may similarly signal that a transfer 

1017 mode is not supported by raising `NotImplementedError`. 

1018 

1019 Returns 

1020 ------- 

1021 dataset : iterable of `DatasetTransfer` 

1022 Structs containing information about the exported datasets, in the 

1023 same order as ``refs``. 

1024 

1025 Raises 

1026 ------ 

1027 NotImplementedError 

1028 Raised if the given transfer mode is not supported. 

1029 """ 

1030 raise NotImplementedError(f"Transfer mode {transfer} not supported.") 

1031 

1032 @abstractmethod 

1033 def validateConfiguration( 

1034 self, entities: Iterable[Union[DatasetRef, DatasetType, StorageClass]], logFailures: bool = False 

1035 ) -> None: 

1036 """Validate some of the configuration for this datastore. 

1037 

1038 Parameters 

1039 ---------- 

1040 entities : iterable of `DatasetRef`, `DatasetType`, or `StorageClass` 

1041 Entities to test against this configuration. Can be differing 

1042 types. 

1043 logFailures : `bool`, optional 

1044 If `True`, output a log message for every validation error 

1045 detected. 

1046 

1047 Raises 

1048 ------ 

1049 DatastoreValidationError 

1050 Raised if there is a validation problem with a configuration. 

1051 

1052 Notes 

1053 ----- 

1054 Which parts of the configuration are validated is at the discretion 

1055 of each Datastore implementation. 

1056 """ 

1057 raise NotImplementedError("Must be implemented by subclass") 

1058 

1059 @abstractmethod 

1060 def validateKey(self, lookupKey: LookupKey, entity: Union[DatasetRef, DatasetType, StorageClass]) -> None: 

1061 """Validate a specific look up key with supplied entity. 

1062 

1063 Parameters 

1064 ---------- 

1065 lookupKey : `LookupKey` 

1066 Key to use to retrieve information from the datastore 

1067 configuration. 

1068 entity : `DatasetRef`, `DatasetType`, or `StorageClass` 

1069 Entity to compare with configuration retrieved using the 

1070 specified lookup key. 

1071 

1072 Raises 

1073 ------ 

1074 DatastoreValidationError 

1075 Raised if there is a problem with the combination of entity 

1076 and lookup key. 

1077 

1078 Notes 

1079 ----- 

1080 Bypasses the normal selection priorities by allowing a key that 

1081 would normally not be selected to be validated. 

1082 """ 

1083 raise NotImplementedError("Must be implemented by subclass") 

1084 

1085 @abstractmethod 

1086 def getLookupKeys(self) -> Set[LookupKey]: 

1087 """Return all the lookup keys relevant to this datastore. 

1088 

1089 Returns 

1090 ------- 

1091 keys : `set` of `LookupKey` 

1092 The keys stored internally for looking up information based 

1093 on `DatasetType` name or `StorageClass`. 

1094 """ 

1095 raise NotImplementedError("Must be implemented by subclass") 

1096 

1097 def needs_expanded_data_ids( 

1098 self, 

1099 transfer: Optional[str], 

1100 entity: Optional[Union[DatasetRef, DatasetType, StorageClass]] = None, 

1101 ) -> bool: 

1102 """Test whether this datastore needs expanded data IDs to ingest. 

1103 

1104 Parameters 

1105 ---------- 

1106 transfer : `str` or `None` 

1107 Transfer mode for ingest. 

1108 entity, optional 

1109 Object representing what will be ingested. If not provided (or not 

1110 specific enough), `True` may be returned even if expanded data 

1111 IDs aren't necessary. 

1112 

1113 Returns 

1114 ------- 

1115 needed : `bool` 

1116 If `True`, expanded data IDs may be needed. `False` only if 

1117 expansion definitely isn't necessary. 

1118 """ 

1119 return True 

1120 

1121 @abstractmethod 

1122 def import_records( 

1123 self, 

1124 data: Mapping[str, DatastoreRecordData], 

1125 ) -> None: 

1126 """Import datastore location and record data from an in-memory data 

1127 structure. 

1128 

1129 Parameters 

1130 ---------- 

1131 data : `Mapping` [ `str`, `DatastoreRecordData` ] 

1132 Datastore records indexed by datastore name. May contain data for 

1133 other `Datastore` instances (generally because they are chained to 

1134 this one), which should be ignored. 

1135 

1136 Notes 

1137 ----- 

1138 Implementations should generally not check that any external resources 

1139 (e.g. files) referred to by these records actually exist, for 

1140 performance reasons; we expect higher-level code to guarantee that they 

1141 do. 

1142 

1143 Implementations are responsible for calling 

1144 `DatastoreRegistryBridge.insert` on all datasets in ``data.locations`` 

1145 where the key is in `names`, as well as loading any opaque table data. 

1146 """ 

1147 raise NotImplementedError() 

1148 

1149 @abstractmethod 

1150 def export_records( 

1151 self, 

1152 refs: Iterable[DatasetIdRef], 

1153 ) -> Mapping[str, DatastoreRecordData]: 

1154 """Export datastore records and locations to an in-memory data 

1155 structure. 

1156 

1157 Parameters 

1158 ---------- 

1159 refs : `Iterable` [ `DatasetIdRef` ] 

1160 Datasets to save. This may include datasets not known to this 

1161 datastore, which should be ignored. 

1162 

1163 Returns 

1164 ------- 

1165 data : `Mapping` [ `str`, `DatastoreRecordData` ] 

1166 Exported datastore records indexed by datastore name. 

1167 """ 

1168 raise NotImplementedError()