Coverage for python/lsst/daf/butler/core/datastore.py: 42%

244 statements  

« prev     ^ index     » next       coverage.py v7.2.5, created at 2023-05-02 18:18 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Support for generic data stores.""" 

23 

24from __future__ import annotations 

25 

26__all__ = ("DatastoreConfig", "Datastore", "DatastoreValidationError", "DatasetRefURIs") 

27 

28import contextlib 

29import dataclasses 

30import logging 

31from abc import ABCMeta, abstractmethod 

32from collections import abc, defaultdict 

33from typing import ( 

34 TYPE_CHECKING, 

35 Any, 

36 Callable, 

37 ClassVar, 

38 Dict, 

39 Iterable, 

40 Iterator, 

41 List, 

42 Mapping, 

43 Optional, 

44 Set, 

45 Tuple, 

46 Type, 

47 Union, 

48) 

49 

50from lsst.utils import doImportType 

51 

52from .config import Config, ConfigSubset 

53from .constraints import Constraints 

54from .exceptions import DatasetTypeNotSupportedError, ValidationError 

55from .fileDataset import FileDataset 

56from .storageClass import StorageClassFactory 

57 

58if TYPE_CHECKING: 58 ↛ 59line 58 didn't jump to line 59, because the condition on line 58 was never true

59 from lsst.resources import ResourcePath, ResourcePathExpression 

60 

61 from ..registry.interfaces import DatasetIdRef, DatastoreRegistryBridgeManager 

62 from .configSupport import LookupKey 

63 from .datasets import DatasetRef, DatasetType 

64 from .datastoreRecordData import DatastoreRecordData 

65 from .storageClass import StorageClass 

66 

67 

68class DatastoreConfig(ConfigSubset): 

69 """Configuration for Datastores.""" 

70 

71 component = "datastore" 

72 requiredKeys = ("cls",) 

73 defaultConfigFile = "datastore.yaml" 

74 

75 

76class DatastoreValidationError(ValidationError): 

77 """There is a problem with the Datastore configuration.""" 

78 

79 pass 

80 

81 

82@dataclasses.dataclass(frozen=True) 

83class Event: 

84 __slots__ = {"name", "undoFunc", "args", "kwargs"} 

85 name: str 

86 undoFunc: Callable 

87 args: tuple 

88 kwargs: dict 

89 

90 

91class IngestPrepData: 

92 """A helper base class for `Datastore` ingest implementations. 

93 

94 Datastore implementations will generally need a custom implementation of 

95 this class. 

96 

97 Should be accessed as ``Datastore.IngestPrepData`` instead of via direct 

98 import. 

99 

100 Parameters 

101 ---------- 

102 refs : iterable of `DatasetRef` 

103 References for the datasets that can be ingested by this datastore. 

104 """ 

105 

106 def __init__(self, refs: Iterable[DatasetRef]): 

107 self.refs = {ref.id: ref for ref in refs} 

108 

109 

110class DatastoreTransaction: 

111 """Keeps a log of `Datastore` activity and allow rollback. 

112 

113 Parameters 

114 ---------- 

115 parent : `DatastoreTransaction`, optional 

116 The parent transaction (if any) 

117 """ 

118 

119 Event: ClassVar[Type] = Event 

120 

121 parent: Optional[DatastoreTransaction] 

122 """The parent transaction. (`DatastoreTransaction`, optional)""" 

123 

124 def __init__(self, parent: Optional[DatastoreTransaction] = None): 

125 self.parent = parent 

126 self._log: List[Event] = [] 

127 

128 def registerUndo(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> None: 

129 """Register event with undo function. 

130 

131 Parameters 

132 ---------- 

133 name : `str` 

134 Name of the event. 

135 undoFunc : func 

136 Function to undo this event. 

137 args : `tuple` 

138 Positional arguments to `undoFunc`. 

139 **kwargs 

140 Keyword arguments to `undoFunc`. 

141 """ 

142 self._log.append(self.Event(name, undoFunc, args, kwargs)) 

143 

144 @contextlib.contextmanager 

145 def undoWith(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> Iterator[None]: 

146 """Register undo function if nested operation succeeds. 

147 

148 Calls `registerUndo`. 

149 

150 This can be used to wrap individual undo-able statements within a 

151 DatastoreTransaction block. Multiple statements that can fail 

152 separately should not be part of the same `undoWith` block. 

153 

154 All arguments are forwarded directly to `registerUndo`. 

155 """ 

156 try: 

157 yield None 

158 except BaseException: 

159 raise 

160 else: 

161 self.registerUndo(name, undoFunc, *args, **kwargs) 

162 

163 def rollback(self) -> None: 

164 """Roll back all events in this transaction.""" 

165 log = logging.getLogger(__name__) 

166 while self._log: 

167 ev = self._log.pop() 

168 try: 

169 log.debug( 

170 "Rolling back transaction: %s: %s(%s,%s)", 

171 ev.name, 

172 ev.undoFunc, 

173 ",".join(str(a) for a in ev.args), 

174 ",".join(f"{k}={v}" for k, v in ev.kwargs.items()), 

175 ) 

176 except Exception: 

177 # In case we had a problem in stringification of arguments 

178 log.warning("Rolling back transaction: %s", ev.name) 

179 try: 

180 ev.undoFunc(*ev.args, **ev.kwargs) 

181 except BaseException as e: 

182 # Deliberately swallow error that may occur in unrolling 

183 log.warning("Exception: %s caught while unrolling: %s", e, ev.name) 

184 pass 

185 

186 def commit(self) -> None: 

187 """Commit this transaction.""" 

188 if self.parent is None: 

189 # Just forget about the events, they have already happened. 

190 return 

191 else: 

192 # We may still want to events from this transaction as part of 

193 # the parent. 

194 self.parent._log.extend(self._log) 

195 

196 

197@dataclasses.dataclass 

198class DatasetRefURIs(abc.Sequence): 

199 """Represents the primary and component ResourcePath(s) associated with a 

200 DatasetRef. 

201 

202 This is used in places where its members used to be represented as a tuple 

203 `(primaryURI, componentURIs)`. To maintain backward compatibility this 

204 inherits from Sequence and so instances can be treated as a two-item 

205 tuple. 

206 """ 

207 

208 def __init__( 

209 self, 

210 primaryURI: Optional[ResourcePath] = None, 

211 componentURIs: Optional[Dict[str, ResourcePath]] = None, 

212 ): 

213 self.primaryURI = primaryURI 

214 """The URI to the primary artifact associated with this dataset. If the 

215 dataset was disassembled within the datastore this may be `None`. 

216 """ 

217 

218 self.componentURIs = componentURIs or {} 

219 """The URIs to any components associated with the dataset artifact 

220 indexed by component name. This can be empty if there are no 

221 components. 

222 """ 

223 

224 def __getitem__(self, index: Any) -> Any: 

225 """Get primaryURI and componentURIs by index. 

226 

227 Provides support for tuple-like access. 

228 """ 

229 if index == 0: 

230 return self.primaryURI 

231 elif index == 1: 

232 return self.componentURIs 

233 raise IndexError("list index out of range") 

234 

235 def __len__(self) -> int: 

236 """Get the number of data members. 

237 

238 Provides support for tuple-like access. 

239 """ 

240 return 2 

241 

242 def __repr__(self) -> str: 

243 return f"DatasetRefURIs({repr(self.primaryURI)}, {repr(self.componentURIs)})" 

244 

245 

246class Datastore(metaclass=ABCMeta): 

247 """Datastore interface. 

248 

249 Parameters 

250 ---------- 

251 config : `DatastoreConfig` or `str` 

252 Load configuration either from an existing config instance or by 

253 referring to a configuration file. 

254 bridgeManager : `DatastoreRegistryBridgeManager` 

255 Object that manages the interface between `Registry` and datastores. 

256 butlerRoot : `str`, optional 

257 New datastore root to use to override the configuration value. 

258 """ 

259 

260 defaultConfigFile: ClassVar[Optional[str]] = None 

261 """Path to configuration defaults. Accessed within the ``config`` resource 

262 or relative to a search path. Can be None if no defaults specified. 

263 """ 

264 

265 containerKey: ClassVar[Optional[str]] = None 

266 """Name of the key containing a list of subconfigurations that also 

267 need to be merged with defaults and will likely use different Python 

268 datastore classes (but all using DatastoreConfig). Assumed to be a 

269 list of configurations that can be represented in a DatastoreConfig 

270 and containing a "cls" definition. None indicates that no containers 

271 are expected in this Datastore.""" 

272 

273 isEphemeral: bool = False 

274 """Indicate whether this Datastore is ephemeral or not. An ephemeral 

275 datastore is one where the contents of the datastore will not exist 

276 across process restarts. This value can change per-instance.""" 

277 

278 config: DatastoreConfig 

279 """Configuration used to create Datastore.""" 

280 

281 name: str 

282 """Label associated with this Datastore.""" 

283 

284 storageClassFactory: StorageClassFactory 

285 """Factory for creating storage class instances from name.""" 

286 

287 constraints: Constraints 

288 """Constraints to apply when putting datasets into the datastore.""" 

289 

290 # MyPy does not like for this to be annotated as any kind of type, because 

291 # it can't do static checking on type variables that can change at runtime. 

292 IngestPrepData: ClassVar[Any] = IngestPrepData 

293 """Helper base class for ingest implementations. 

294 """ 

295 

296 @classmethod 

297 @abstractmethod 

298 def setConfigRoot(cls, root: str, config: Config, full: Config, overwrite: bool = True) -> None: 

299 """Set filesystem-dependent config options for this datastore. 

300 

301 The options will be appropriate for a new empty repository with the 

302 given root. 

303 

304 Parameters 

305 ---------- 

306 root : `str` 

307 Filesystem path to the root of the data repository. 

308 config : `Config` 

309 A `Config` to update. Only the subset understood by 

310 this component will be updated. Will not expand 

311 defaults. 

312 full : `Config` 

313 A complete config with all defaults expanded that can be 

314 converted to a `DatastoreConfig`. Read-only and will not be 

315 modified by this method. 

316 Repository-specific options that should not be obtained 

317 from defaults when Butler instances are constructed 

318 should be copied from ``full`` to ``config``. 

319 overwrite : `bool`, optional 

320 If `False`, do not modify a value in ``config`` if the value 

321 already exists. Default is always to overwrite with the provided 

322 ``root``. 

323 

324 Notes 

325 ----- 

326 If a keyword is explicitly defined in the supplied ``config`` it 

327 will not be overridden by this method if ``overwrite`` is `False`. 

328 This allows explicit values set in external configs to be retained. 

329 """ 

330 raise NotImplementedError() 

331 

332 @staticmethod 

333 def fromConfig( 

334 config: Config, 

335 bridgeManager: DatastoreRegistryBridgeManager, 

336 butlerRoot: Optional[ResourcePathExpression] = None, 

337 ) -> "Datastore": 

338 """Create datastore from type specified in config file. 

339 

340 Parameters 

341 ---------- 

342 config : `Config` 

343 Configuration instance. 

344 bridgeManager : `DatastoreRegistryBridgeManager` 

345 Object that manages the interface between `Registry` and 

346 datastores. 

347 butlerRoot : `str`, optional 

348 Butler root directory. 

349 """ 

350 cls = doImportType(config["datastore", "cls"]) 

351 if not issubclass(cls, Datastore): 

352 raise TypeError(f"Imported child class {config['datastore', 'cls']} is not a Datastore") 

353 return cls(config=config, bridgeManager=bridgeManager, butlerRoot=butlerRoot) 

354 

355 def __init__( 

356 self, 

357 config: Union[Config, str], 

358 bridgeManager: DatastoreRegistryBridgeManager, 

359 butlerRoot: Optional[ResourcePathExpression] = None, 

360 ): 

361 self.config = DatastoreConfig(config) 

362 self.name = "ABCDataStore" 

363 self._transaction: Optional[DatastoreTransaction] = None 

364 

365 # All Datastores need storage classes and constraints 

366 self.storageClassFactory = StorageClassFactory() 

367 

368 # And read the constraints list 

369 constraintsConfig = self.config.get("constraints") 

370 self.constraints = Constraints(constraintsConfig, universe=bridgeManager.universe) 

371 

372 def __str__(self) -> str: 

373 return self.name 

374 

375 def __repr__(self) -> str: 

376 return self.name 

377 

378 @property 

379 def names(self) -> Tuple[str, ...]: 

380 """Names associated with this datastore returned as a list. 

381 

382 Can be different to ``name`` for a chaining datastore. 

383 """ 

384 # Default implementation returns solely the name itself 

385 return (self.name,) 

386 

387 @contextlib.contextmanager 

388 def transaction(self) -> Iterator[DatastoreTransaction]: 

389 """Context manager supporting `Datastore` transactions. 

390 

391 Transactions can be nested, and are to be used in combination with 

392 `Registry.transaction`. 

393 """ 

394 self._transaction = DatastoreTransaction(self._transaction) 

395 try: 

396 yield self._transaction 

397 except BaseException: 

398 self._transaction.rollback() 

399 raise 

400 else: 

401 self._transaction.commit() 

402 self._transaction = self._transaction.parent 

403 

404 @abstractmethod 

405 def knows(self, ref: DatasetRef) -> bool: 

406 """Check if the dataset is known to the datastore. 

407 

408 Does not check for existence of any artifact. 

409 

410 Parameters 

411 ---------- 

412 ref : `DatasetRef` 

413 Reference to the required dataset. 

414 

415 Returns 

416 ------- 

417 exists : `bool` 

418 `True` if the dataset is known to the datastore. 

419 """ 

420 raise NotImplementedError() 

421 

422 def mexists( 

423 self, refs: Iterable[DatasetRef], artifact_existence: Optional[Dict[ResourcePath, bool]] = None 

424 ) -> Dict[DatasetRef, bool]: 

425 """Check the existence of multiple datasets at once. 

426 

427 Parameters 

428 ---------- 

429 refs : iterable of `DatasetRef` 

430 The datasets to be checked. 

431 artifact_existence : `dict` [`lsst.resources.ResourcePath`, `bool`] 

432 Optional mapping of datastore artifact to existence. Updated by 

433 this method with details of all artifacts tested. Can be `None` 

434 if the caller is not interested. 

435 

436 Returns 

437 ------- 

438 existence : `dict` of [`DatasetRef`, `bool`] 

439 Mapping from dataset to boolean indicating existence. 

440 """ 

441 existence: Dict[DatasetRef, bool] = {} 

442 # Non-optimized default. 

443 for ref in refs: 

444 existence[ref] = self.exists(ref) 

445 return existence 

446 

447 @abstractmethod 

448 def exists(self, datasetRef: DatasetRef) -> bool: 

449 """Check if the dataset exists in the datastore. 

450 

451 Parameters 

452 ---------- 

453 datasetRef : `DatasetRef` 

454 Reference to the required dataset. 

455 

456 Returns 

457 ------- 

458 exists : `bool` 

459 `True` if the entity exists in the `Datastore`. 

460 """ 

461 raise NotImplementedError("Must be implemented by subclass") 

462 

463 @abstractmethod 

464 def get(self, datasetRef: DatasetRef, parameters: Mapping[str, Any] = None) -> Any: 

465 """Load an `InMemoryDataset` from the store. 

466 

467 Parameters 

468 ---------- 

469 datasetRef : `DatasetRef` 

470 Reference to the required Dataset. 

471 parameters : `dict` 

472 `StorageClass`-specific parameters that specify a slice of the 

473 Dataset to be loaded. 

474 

475 Returns 

476 ------- 

477 inMemoryDataset : `object` 

478 Requested Dataset or slice thereof as an InMemoryDataset. 

479 """ 

480 raise NotImplementedError("Must be implemented by subclass") 

481 

482 @abstractmethod 

483 def put(self, inMemoryDataset: Any, datasetRef: DatasetRef) -> None: 

484 """Write a `InMemoryDataset` with a given `DatasetRef` to the store. 

485 

486 Parameters 

487 ---------- 

488 inMemoryDataset : `object` 

489 The Dataset to store. 

490 datasetRef : `DatasetRef` 

491 Reference to the associated Dataset. 

492 """ 

493 raise NotImplementedError("Must be implemented by subclass") 

494 

495 def _overrideTransferMode(self, *datasets: FileDataset, transfer: Optional[str] = None) -> Optional[str]: 

496 """Allow ingest transfer mode to be defaulted based on datasets. 

497 

498 Parameters 

499 ---------- 

500 datasets : `FileDataset` 

501 Each positional argument is a struct containing information about 

502 a file to be ingested, including its path (either absolute or 

503 relative to the datastore root, if applicable), a complete 

504 `DatasetRef` (with ``dataset_id not None``), and optionally a 

505 formatter class or its fully-qualified string name. If a formatter 

506 is not provided, this method should populate that attribute with 

507 the formatter the datastore would use for `put`. Subclasses are 

508 also permitted to modify the path attribute (typically to put it 

509 in what the datastore considers its standard form). 

510 transfer : `str`, optional 

511 How (and whether) the dataset should be added to the datastore. 

512 See `ingest` for details of transfer modes. 

513 

514 Returns 

515 ------- 

516 newTransfer : `str` 

517 Transfer mode to use. Will be identical to the supplied transfer 

518 mode unless "auto" is used. 

519 """ 

520 if transfer != "auto": 

521 return transfer 

522 raise RuntimeError(f"{transfer} is not allowed without specialization.") 

523 

524 def _prepIngest(self, *datasets: FileDataset, transfer: Optional[str] = None) -> IngestPrepData: 

525 """Process datasets to identify which ones can be ingested. 

526 

527 Parameters 

528 ---------- 

529 datasets : `FileDataset` 

530 Each positional argument is a struct containing information about 

531 a file to be ingested, including its path (either absolute or 

532 relative to the datastore root, if applicable), a complete 

533 `DatasetRef` (with ``dataset_id not None``), and optionally a 

534 formatter class or its fully-qualified string name. If a formatter 

535 is not provided, this method should populate that attribute with 

536 the formatter the datastore would use for `put`. Subclasses are 

537 also permitted to modify the path attribute (typically to put it 

538 in what the datastore considers its standard form). 

539 transfer : `str`, optional 

540 How (and whether) the dataset should be added to the datastore. 

541 See `ingest` for details of transfer modes. 

542 

543 Returns 

544 ------- 

545 data : `IngestPrepData` 

546 An instance of a subclass of `IngestPrepData`, used to pass 

547 arbitrary data from `_prepIngest` to `_finishIngest`. This should 

548 include only the datasets this datastore can actually ingest; 

549 others should be silently ignored (`Datastore.ingest` will inspect 

550 `IngestPrepData.refs` and raise `DatasetTypeNotSupportedError` if 

551 necessary). 

552 

553 Raises 

554 ------ 

555 NotImplementedError 

556 Raised if the datastore does not support the given transfer mode 

557 (including the case where ingest is not supported at all). 

558 FileNotFoundError 

559 Raised if one of the given files does not exist. 

560 FileExistsError 

561 Raised if transfer is not `None` but the (internal) location the 

562 file would be moved to is already occupied. 

563 

564 Notes 

565 ----- 

566 This method (along with `_finishIngest`) should be implemented by 

567 subclasses to provide ingest support instead of implementing `ingest` 

568 directly. 

569 

570 `_prepIngest` should not modify the data repository or given files in 

571 any way; all changes should be deferred to `_finishIngest`. 

572 

573 When possible, exceptions should be raised in `_prepIngest` instead of 

574 `_finishIngest`. `NotImplementedError` exceptions that indicate that 

575 the transfer mode is not supported must be raised by `_prepIngest` 

576 instead of `_finishIngest`. 

577 """ 

578 raise NotImplementedError(f"Datastore {self} does not support direct file-based ingest.") 

579 

580 def _finishIngest( 

581 self, prepData: IngestPrepData, *, transfer: Optional[str] = None, record_validation_info: bool = True 

582 ) -> None: 

583 """Complete an ingest operation. 

584 

585 Parameters 

586 ---------- 

587 data : `IngestPrepData` 

588 An instance of a subclass of `IngestPrepData`. Guaranteed to be 

589 the direct result of a call to `_prepIngest` on this datastore. 

590 transfer : `str`, optional 

591 How (and whether) the dataset should be added to the datastore. 

592 See `ingest` for details of transfer modes. 

593 record_validation_info : `bool`, optional 

594 If `True`, the default, the datastore can record validation 

595 information associated with the file. If `False` the datastore 

596 will not attempt to track any information such as checksums 

597 or file sizes. This can be useful if such information is tracked 

598 in an external system or if the file is to be compressed in place. 

599 It is up to the datastore whether this parameter is relevant. 

600 

601 Raises 

602 ------ 

603 FileNotFoundError 

604 Raised if one of the given files does not exist. 

605 FileExistsError 

606 Raised if transfer is not `None` but the (internal) location the 

607 file would be moved to is already occupied. 

608 

609 Notes 

610 ----- 

611 This method (along with `_prepIngest`) should be implemented by 

612 subclasses to provide ingest support instead of implementing `ingest` 

613 directly. 

614 """ 

615 raise NotImplementedError(f"Datastore {self} does not support direct file-based ingest.") 

616 

617 def ingest( 

618 self, *datasets: FileDataset, transfer: Optional[str] = None, record_validation_info: bool = True 

619 ) -> None: 

620 """Ingest one or more files into the datastore. 

621 

622 Parameters 

623 ---------- 

624 datasets : `FileDataset` 

625 Each positional argument is a struct containing information about 

626 a file to be ingested, including its path (either absolute or 

627 relative to the datastore root, if applicable), a complete 

628 `DatasetRef` (with ``dataset_id not None``), and optionally a 

629 formatter class or its fully-qualified string name. If a formatter 

630 is not provided, the one the datastore would use for ``put`` on 

631 that dataset is assumed. 

632 transfer : `str`, optional 

633 How (and whether) the dataset should be added to the datastore. 

634 If `None` (default), the file must already be in a location 

635 appropriate for the datastore (e.g. within its root directory), 

636 and will not be modified. Other choices include "move", "copy", 

637 "link", "symlink", "relsymlink", and "hardlink". "link" is a 

638 special transfer mode that will first try to make a hardlink and 

639 if that fails a symlink will be used instead. "relsymlink" creates 

640 a relative symlink rather than use an absolute path. 

641 Most datastores do not support all transfer modes. 

642 "auto" is a special option that will let the 

643 data store choose the most natural option for itself. 

644 record_validation_info : `bool`, optional 

645 If `True`, the default, the datastore can record validation 

646 information associated with the file. If `False` the datastore 

647 will not attempt to track any information such as checksums 

648 or file sizes. This can be useful if such information is tracked 

649 in an external system or if the file is to be compressed in place. 

650 It is up to the datastore whether this parameter is relevant. 

651 

652 Raises 

653 ------ 

654 NotImplementedError 

655 Raised if the datastore does not support the given transfer mode 

656 (including the case where ingest is not supported at all). 

657 DatasetTypeNotSupportedError 

658 Raised if one or more files to be ingested have a dataset type that 

659 is not supported by the datastore. 

660 FileNotFoundError 

661 Raised if one of the given files does not exist. 

662 FileExistsError 

663 Raised if transfer is not `None` but the (internal) location the 

664 file would be moved to is already occupied. 

665 

666 Notes 

667 ----- 

668 Subclasses should implement `_prepIngest` and `_finishIngest` instead 

669 of implementing `ingest` directly. Datastores that hold and 

670 delegate to child datastores may want to call those methods as well. 

671 

672 Subclasses are encouraged to document their supported transfer modes 

673 in their class documentation. 

674 """ 

675 # Allow a datastore to select a default transfer mode 

676 transfer = self._overrideTransferMode(*datasets, transfer=transfer) 

677 prepData = self._prepIngest(*datasets, transfer=transfer) 

678 refs = {ref.id: ref for dataset in datasets for ref in dataset.refs} 

679 if None in refs: 

680 # Find the file for the error message. There may be multiple 

681 # bad refs so look for all of them. 

682 unresolved_paths = {} 

683 for dataset in datasets: 

684 unresolved = [] 

685 for ref in dataset.refs: 

686 if ref.id is None: 

687 unresolved.append(ref) 

688 if unresolved: 

689 unresolved_paths[dataset.path] = unresolved 

690 raise RuntimeError( 

691 "Attempt to ingest unresolved DatasetRef from: " 

692 + ",".join(f"{p}: ({[str(r) for r in ref]})" for p, ref in unresolved_paths.items()) 

693 ) 

694 if refs.keys() != prepData.refs.keys(): 

695 unsupported = refs.keys() - prepData.refs.keys() 

696 # Group unsupported refs by DatasetType for an informative 

697 # but still concise error message. 

698 byDatasetType = defaultdict(list) 

699 for datasetId in unsupported: 

700 ref = refs[datasetId] 

701 byDatasetType[ref.datasetType].append(ref) 

702 raise DatasetTypeNotSupportedError( 

703 "DatasetType(s) not supported in ingest: " 

704 + ", ".join(f"{k.name} ({len(v)} dataset(s))" for k, v in byDatasetType.items()) 

705 ) 

706 self._finishIngest(prepData, transfer=transfer, record_validation_info=record_validation_info) 

707 

708 def transfer_from( 

709 self, 

710 source_datastore: Datastore, 

711 refs: Iterable[DatasetRef], 

712 local_refs: Optional[Iterable[DatasetRef]] = None, 

713 transfer: str = "auto", 

714 artifact_existence: Optional[Dict[ResourcePath, bool]] = None, 

715 ) -> None: 

716 """Transfer dataset artifacts from another datastore to this one. 

717 

718 Parameters 

719 ---------- 

720 source_datastore : `Datastore` 

721 The datastore from which to transfer artifacts. That datastore 

722 must be compatible with this datastore receiving the artifacts. 

723 refs : iterable of `DatasetRef` 

724 The datasets to transfer from the source datastore. 

725 local_refs : iterable of `DatasetRef`, optional 

726 The dataset refs associated with the registry associated with 

727 this datastore. Can be `None` if the source and target datastore 

728 are using UUIDs. 

729 transfer : `str`, optional 

730 How (and whether) the dataset should be added to the datastore. 

731 Choices include "move", "copy", 

732 "link", "symlink", "relsymlink", and "hardlink". "link" is a 

733 special transfer mode that will first try to make a hardlink and 

734 if that fails a symlink will be used instead. "relsymlink" creates 

735 a relative symlink rather than use an absolute path. 

736 Most datastores do not support all transfer modes. 

737 "auto" (the default) is a special option that will let the 

738 data store choose the most natural option for itself. 

739 If the source location and transfer location are identical the 

740 transfer mode will be ignored. 

741 artifact_existence : `dict` [`lsst.resources.ResourcePath`, `bool`] 

742 Optional mapping of datastore artifact to existence. Updated by 

743 this method with details of all artifacts tested. Can be `None` 

744 if the caller is not interested. 

745 

746 Raises 

747 ------ 

748 TypeError 

749 Raised if the two datastores are not compatible. 

750 """ 

751 if type(self) is not type(source_datastore): 

752 raise TypeError( 

753 f"Datastore mismatch between this datastore ({type(self)}) and the " 

754 f"source datastore ({type(source_datastore)})." 

755 ) 

756 

757 raise NotImplementedError(f"Datastore {type(self)} must implement a transfer_from method.") 

758 

759 def getManyURIs( 

760 self, 

761 refs: Iterable[DatasetRef], 

762 predict: bool = False, 

763 allow_missing: bool = False, 

764 ) -> Dict[DatasetRef, DatasetRefURIs]: 

765 """Return URIs associated with many datasets. 

766 

767 Parameters 

768 ---------- 

769 refs : iterable of `DatasetIdRef` 

770 References to the required datasets. 

771 predict : `bool`, optional 

772 If the datastore does not know about a dataset, should it 

773 return a predicted URI or not? 

774 allow_missing : `bool` 

775 If `False`, and `predict` is `False`, will raise if a `DatasetRef` 

776 does not exist. 

777 

778 Returns 

779 ------- 

780 URIs : `dict` of [`DatasetRef`, `DatasetRefUris`] 

781 A dict of primary and component URIs, indexed by the passed-in 

782 refs. 

783 

784 Raises 

785 ------ 

786 FileNotFoundError 

787 A URI has been requested for a dataset that does not exist and 

788 guessing is not allowed. 

789 

790 Notes 

791 ----- 

792 In file-based datastores, getManuURIs does not check that the file is 

793 really there, it's assuming it is if datastore is aware of the file 

794 then it actually exists. 

795 """ 

796 uris: Dict[DatasetRef, DatasetRefURIs] = {} 

797 missing_refs = [] 

798 for ref in refs: 

799 try: 

800 uris[ref] = self.getURIs(ref, predict=predict) 

801 except FileNotFoundError: 

802 missing_refs.append(ref) 

803 if missing_refs and not allow_missing: 

804 raise FileNotFoundError( 

805 "Missing {} refs from datastore out of {} and predict=False.".format( 

806 num_missing := len(missing_refs), num_missing + len(uris) 

807 ) 

808 ) 

809 return uris 

810 

811 @abstractmethod 

812 def getURIs(self, datasetRef: DatasetRef, predict: bool = False) -> DatasetRefURIs: 

813 """Return URIs associated with dataset. 

814 

815 Parameters 

816 ---------- 

817 ref : `DatasetRef` 

818 Reference to the required dataset. 

819 predict : `bool`, optional 

820 If the datastore does not know about the dataset, should it 

821 return a predicted URI or not? 

822 

823 Returns 

824 ------- 

825 uris : `DatasetRefURIs` 

826 The URI to the primary artifact associated with this dataset (if 

827 the dataset was disassembled within the datastore this may be 

828 `None`), and the URIs to any components associated with the dataset 

829 artifact. (can be empty if there are no components). 

830 """ 

831 raise NotImplementedError() 

832 

833 @abstractmethod 

834 def getURI(self, datasetRef: DatasetRef, predict: bool = False) -> ResourcePath: 

835 """URI to the Dataset. 

836 

837 Parameters 

838 ---------- 

839 datasetRef : `DatasetRef` 

840 Reference to the required Dataset. 

841 predict : `bool` 

842 If `True` attempt to predict the URI for a dataset if it does 

843 not exist in datastore. 

844 

845 Returns 

846 ------- 

847 uri : `str` 

848 URI string pointing to the Dataset within the datastore. If the 

849 Dataset does not exist in the datastore, the URI may be a guess. 

850 If the datastore does not have entities that relate well 

851 to the concept of a URI the returned URI string will be 

852 descriptive. The returned URI is not guaranteed to be obtainable. 

853 

854 Raises 

855 ------ 

856 FileNotFoundError 

857 A URI has been requested for a dataset that does not exist and 

858 guessing is not allowed. 

859 """ 

860 raise NotImplementedError("Must be implemented by subclass") 

861 

862 @abstractmethod 

863 def retrieveArtifacts( 

864 self, 

865 refs: Iterable[DatasetRef], 

866 destination: ResourcePath, 

867 transfer: str = "auto", 

868 preserve_path: bool = True, 

869 overwrite: bool = False, 

870 ) -> List[ResourcePath]: 

871 """Retrieve the artifacts associated with the supplied refs. 

872 

873 Parameters 

874 ---------- 

875 refs : iterable of `DatasetRef` 

876 The datasets for which artifacts are to be retrieved. 

877 A single ref can result in multiple artifacts. The refs must 

878 be resolved. 

879 destination : `lsst.resources.ResourcePath` 

880 Location to write the artifacts. 

881 transfer : `str`, optional 

882 Method to use to transfer the artifacts. Must be one of the options 

883 supported by `lsst.resources.ResourcePath.transfer_from()`. 

884 "move" is not allowed. 

885 preserve_path : `bool`, optional 

886 If `True` the full path of the artifact within the datastore 

887 is preserved. If `False` the final file component of the path 

888 is used. 

889 overwrite : `bool`, optional 

890 If `True` allow transfers to overwrite existing files at the 

891 destination. 

892 

893 Returns 

894 ------- 

895 targets : `list` of `lsst.resources.ResourcePath` 

896 URIs of file artifacts in destination location. Order is not 

897 preserved. 

898 

899 Notes 

900 ----- 

901 For non-file datastores the artifacts written to the destination 

902 may not match the representation inside the datastore. For example 

903 a hierarchichal data structure in a NoSQL database may well be stored 

904 as a JSON file. 

905 """ 

906 raise NotImplementedError() 

907 

908 @abstractmethod 

909 def remove(self, datasetRef: DatasetRef) -> None: 

910 """Indicate to the Datastore that a Dataset can be removed. 

911 

912 Parameters 

913 ---------- 

914 datasetRef : `DatasetRef` 

915 Reference to the required Dataset. 

916 

917 Raises 

918 ------ 

919 FileNotFoundError 

920 When Dataset does not exist. 

921 

922 Notes 

923 ----- 

924 Some Datastores may implement this method as a silent no-op to 

925 disable Dataset deletion through standard interfaces. 

926 """ 

927 raise NotImplementedError("Must be implemented by subclass") 

928 

929 @abstractmethod 

930 def forget(self, refs: Iterable[DatasetRef]) -> None: 

931 """Indicate to the Datastore that it should remove all records of the 

932 given datasets, without actually deleting them. 

933 

934 Parameters 

935 ---------- 

936 refs : `Iterable` [ `DatasetRef` ] 

937 References to the datasets being forgotten. 

938 

939 Notes 

940 ----- 

941 Asking a datastore to forget a `DatasetRef` it does not hold should be 

942 a silent no-op, not an error. 

943 """ 

944 raise NotImplementedError("Must be implemented by subclass") 

945 

946 @abstractmethod 

947 def trash(self, ref: Union[DatasetRef, Iterable[DatasetRef]], ignore_errors: bool = True) -> None: 

948 """Indicate to the Datastore that a Dataset can be moved to the trash. 

949 

950 Parameters 

951 ---------- 

952 ref : `DatasetRef` or iterable thereof 

953 Reference(s) to the required Dataset. 

954 ignore_errors : `bool`, optional 

955 Determine whether errors should be ignored. When multiple 

956 refs are being trashed there will be no per-ref check. 

957 

958 Raises 

959 ------ 

960 FileNotFoundError 

961 When Dataset does not exist and errors are not ignored. Only 

962 checked if a single ref is supplied (and not in a list). 

963 

964 Notes 

965 ----- 

966 Some Datastores may implement this method as a silent no-op to 

967 disable Dataset deletion through standard interfaces. 

968 """ 

969 raise NotImplementedError("Must be implemented by subclass") 

970 

971 @abstractmethod 

972 def emptyTrash(self, ignore_errors: bool = True) -> None: 

973 """Remove all datasets from the trash. 

974 

975 Parameters 

976 ---------- 

977 ignore_errors : `bool`, optional 

978 Determine whether errors should be ignored. 

979 

980 Notes 

981 ----- 

982 Some Datastores may implement this method as a silent no-op to 

983 disable Dataset deletion through standard interfaces. 

984 """ 

985 raise NotImplementedError("Must be implemented by subclass") 

986 

987 @abstractmethod 

988 def transfer(self, inputDatastore: Datastore, datasetRef: DatasetRef) -> None: 

989 """Transfer a dataset from another datastore to this datastore. 

990 

991 Parameters 

992 ---------- 

993 inputDatastore : `Datastore` 

994 The external `Datastore` from which to retrieve the Dataset. 

995 datasetRef : `DatasetRef` 

996 Reference to the required Dataset. 

997 """ 

998 raise NotImplementedError("Must be implemented by subclass") 

999 

1000 def export( 

1001 self, refs: Iterable[DatasetRef], *, directory: Optional[str] = None, transfer: Optional[str] = None 

1002 ) -> Iterable[FileDataset]: 

1003 """Export datasets for transfer to another data repository. 

1004 

1005 Parameters 

1006 ---------- 

1007 refs : iterable of `DatasetRef` 

1008 Dataset references to be exported. 

1009 directory : `str`, optional 

1010 Path to a directory that should contain files corresponding to 

1011 output datasets. Ignored if ``transfer`` is `None`. 

1012 transfer : `str`, optional 

1013 Mode that should be used to move datasets out of the repository. 

1014 Valid options are the same as those of the ``transfer`` argument 

1015 to ``ingest``, and datastores may similarly signal that a transfer 

1016 mode is not supported by raising `NotImplementedError`. 

1017 

1018 Returns 

1019 ------- 

1020 dataset : iterable of `DatasetTransfer` 

1021 Structs containing information about the exported datasets, in the 

1022 same order as ``refs``. 

1023 

1024 Raises 

1025 ------ 

1026 NotImplementedError 

1027 Raised if the given transfer mode is not supported. 

1028 """ 

1029 raise NotImplementedError(f"Transfer mode {transfer} not supported.") 

1030 

1031 @abstractmethod 

1032 def validateConfiguration( 

1033 self, entities: Iterable[Union[DatasetRef, DatasetType, StorageClass]], logFailures: bool = False 

1034 ) -> None: 

1035 """Validate some of the configuration for this datastore. 

1036 

1037 Parameters 

1038 ---------- 

1039 entities : iterable of `DatasetRef`, `DatasetType`, or `StorageClass` 

1040 Entities to test against this configuration. Can be differing 

1041 types. 

1042 logFailures : `bool`, optional 

1043 If `True`, output a log message for every validation error 

1044 detected. 

1045 

1046 Raises 

1047 ------ 

1048 DatastoreValidationError 

1049 Raised if there is a validation problem with a configuration. 

1050 

1051 Notes 

1052 ----- 

1053 Which parts of the configuration are validated is at the discretion 

1054 of each Datastore implementation. 

1055 """ 

1056 raise NotImplementedError("Must be implemented by subclass") 

1057 

1058 @abstractmethod 

1059 def validateKey(self, lookupKey: LookupKey, entity: Union[DatasetRef, DatasetType, StorageClass]) -> None: 

1060 """Validate a specific look up key with supplied entity. 

1061 

1062 Parameters 

1063 ---------- 

1064 lookupKey : `LookupKey` 

1065 Key to use to retrieve information from the datastore 

1066 configuration. 

1067 entity : `DatasetRef`, `DatasetType`, or `StorageClass` 

1068 Entity to compare with configuration retrieved using the 

1069 specified lookup key. 

1070 

1071 Raises 

1072 ------ 

1073 DatastoreValidationError 

1074 Raised if there is a problem with the combination of entity 

1075 and lookup key. 

1076 

1077 Notes 

1078 ----- 

1079 Bypasses the normal selection priorities by allowing a key that 

1080 would normally not be selected to be validated. 

1081 """ 

1082 raise NotImplementedError("Must be implemented by subclass") 

1083 

1084 @abstractmethod 

1085 def getLookupKeys(self) -> Set[LookupKey]: 

1086 """Return all the lookup keys relevant to this datastore. 

1087 

1088 Returns 

1089 ------- 

1090 keys : `set` of `LookupKey` 

1091 The keys stored internally for looking up information based 

1092 on `DatasetType` name or `StorageClass`. 

1093 """ 

1094 raise NotImplementedError("Must be implemented by subclass") 

1095 

1096 def needs_expanded_data_ids( 

1097 self, 

1098 transfer: Optional[str], 

1099 entity: Optional[Union[DatasetRef, DatasetType, StorageClass]] = None, 

1100 ) -> bool: 

1101 """Test whether this datastore needs expanded data IDs to ingest. 

1102 

1103 Parameters 

1104 ---------- 

1105 transfer : `str` or `None` 

1106 Transfer mode for ingest. 

1107 entity, optional 

1108 Object representing what will be ingested. If not provided (or not 

1109 specific enough), `True` may be returned even if expanded data 

1110 IDs aren't necessary. 

1111 

1112 Returns 

1113 ------- 

1114 needed : `bool` 

1115 If `True`, expanded data IDs may be needed. `False` only if 

1116 expansion definitely isn't necessary. 

1117 """ 

1118 return True 

1119 

1120 @abstractmethod 

1121 def import_records( 

1122 self, 

1123 data: Mapping[str, DatastoreRecordData], 

1124 ) -> None: 

1125 """Import datastore location and record data from an in-memory data 

1126 structure. 

1127 

1128 Parameters 

1129 ---------- 

1130 data : `Mapping` [ `str`, `DatastoreRecordData` ] 

1131 Datastore records indexed by datastore name. May contain data for 

1132 other `Datastore` instances (generally because they are chained to 

1133 this one), which should be ignored. 

1134 

1135 Notes 

1136 ----- 

1137 Implementations should generally not check that any external resources 

1138 (e.g. files) referred to by these records actually exist, for 

1139 performance reasons; we expect higher-level code to guarantee that they 

1140 do. 

1141 

1142 Implementations are responsible for calling 

1143 `DatastoreRegistryBridge.insert` on all datasets in ``data.locations`` 

1144 where the key is in `names`, as well as loading any opaque table data. 

1145 """ 

1146 raise NotImplementedError() 

1147 

1148 @abstractmethod 

1149 def export_records( 

1150 self, 

1151 refs: Iterable[DatasetIdRef], 

1152 ) -> Mapping[str, DatastoreRecordData]: 

1153 """Export datastore records and locations to an in-memory data 

1154 structure. 

1155 

1156 Parameters 

1157 ---------- 

1158 refs : `Iterable` [ `DatasetIdRef` ] 

1159 Datasets to save. This may include datasets not known to this 

1160 datastore, which should be ignored. 

1161 

1162 Returns 

1163 ------- 

1164 data : `Mapping` [ `str`, `DatastoreRecordData` ] 

1165 Exported datastore records indexed by datastore name. 

1166 """ 

1167 raise NotImplementedError()