Coverage for python/lsst/daf/butler/core/datastore.py: 61%

250 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-10-25 15:14 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Support for generic data stores.""" 

23 

24from __future__ import annotations 

25 

26__all__ = ("DatastoreConfig", "Datastore", "DatastoreValidationError", "DatasetRefURIs", "NullDatastore") 

27 

28import contextlib 

29import dataclasses 

30import logging 

31import time 

32from abc import ABCMeta, abstractmethod 

33from collections import abc, defaultdict 

34from collections.abc import Callable, Iterable, Iterator, Mapping 

35from typing import TYPE_CHECKING, Any, ClassVar 

36 

37from lsst.utils import doImportType 

38 

39from .config import Config, ConfigSubset 

40from .constraints import Constraints 

41from .exceptions import DatasetTypeNotSupportedError, ValidationError 

42from .fileDataset import FileDataset 

43from .storageClass import StorageClassFactory 

44 

45if TYPE_CHECKING: 

46 from lsst.resources import ResourcePath, ResourcePathExpression 

47 

48 from ..registry.interfaces import DatasetIdRef, DatastoreRegistryBridgeManager 

49 from .configSupport import LookupKey 

50 from .datasets import DatasetRef, DatasetType 

51 from .datastoreRecordData import DatastoreRecordData 

52 from .storageClass import StorageClass 

53 

54_LOG = logging.getLogger(__name__) 

55 

56 

57class DatastoreConfig(ConfigSubset): 

58 """Configuration for Datastores.""" 

59 

60 component = "datastore" 

61 requiredKeys = ("cls",) 

62 defaultConfigFile = "datastore.yaml" 

63 

64 

65class DatastoreValidationError(ValidationError): 

66 """There is a problem with the Datastore configuration.""" 

67 

68 pass 

69 

70 

71@dataclasses.dataclass(frozen=True) 

72class Event: 

73 """Representation of an event that can be rolled back.""" 

74 

75 __slots__ = {"name", "undoFunc", "args", "kwargs"} 

76 name: str 

77 undoFunc: Callable 

78 args: tuple 

79 kwargs: dict 

80 

81 

82class IngestPrepData: 

83 """A helper base class for `Datastore` ingest implementations. 

84 

85 Datastore implementations will generally need a custom implementation of 

86 this class. 

87 

88 Should be accessed as ``Datastore.IngestPrepData`` instead of via direct 

89 import. 

90 

91 Parameters 

92 ---------- 

93 refs : iterable of `DatasetRef` 

94 References for the datasets that can be ingested by this datastore. 

95 """ 

96 

97 def __init__(self, refs: Iterable[DatasetRef]): 

98 self.refs = {ref.id: ref for ref in refs} 

99 

100 

101class DatastoreTransaction: 

102 """Keeps a log of `Datastore` activity and allow rollback. 

103 

104 Parameters 

105 ---------- 

106 parent : `DatastoreTransaction`, optional 

107 The parent transaction (if any) 

108 """ 

109 

110 Event: ClassVar[type] = Event 

111 

112 parent: DatastoreTransaction | None 

113 """The parent transaction. (`DatastoreTransaction`, optional)""" 

114 

115 def __init__(self, parent: DatastoreTransaction | None = None): 

116 self.parent = parent 

117 self._log: list[Event] = [] 

118 

119 def registerUndo(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> None: 

120 """Register event with undo function. 

121 

122 Parameters 

123 ---------- 

124 name : `str` 

125 Name of the event. 

126 undoFunc : func 

127 Function to undo this event. 

128 args : `tuple` 

129 Positional arguments to `undoFunc`. 

130 **kwargs 

131 Keyword arguments to `undoFunc`. 

132 """ 

133 self._log.append(self.Event(name, undoFunc, args, kwargs)) 

134 

135 @contextlib.contextmanager 

136 def undoWith(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> Iterator[None]: 

137 """Register undo function if nested operation succeeds. 

138 

139 Calls `registerUndo`. 

140 

141 This can be used to wrap individual undo-able statements within a 

142 DatastoreTransaction block. Multiple statements that can fail 

143 separately should not be part of the same `undoWith` block. 

144 

145 All arguments are forwarded directly to `registerUndo`. 

146 """ 

147 try: 

148 yield None 

149 except BaseException: 

150 raise 

151 else: 

152 self.registerUndo(name, undoFunc, *args, **kwargs) 

153 

154 def rollback(self) -> None: 

155 """Roll back all events in this transaction.""" 

156 log = logging.getLogger(__name__) 

157 while self._log: 

158 ev = self._log.pop() 

159 try: 

160 log.debug( 

161 "Rolling back transaction: %s: %s(%s,%s)", 

162 ev.name, 

163 ev.undoFunc, 

164 ",".join(str(a) for a in ev.args), 

165 ",".join(f"{k}={v}" for k, v in ev.kwargs.items()), 

166 ) 

167 except Exception: 

168 # In case we had a problem in stringification of arguments 

169 log.warning("Rolling back transaction: %s", ev.name) 

170 try: 

171 ev.undoFunc(*ev.args, **ev.kwargs) 

172 except BaseException as e: 

173 # Deliberately swallow error that may occur in unrolling 

174 log.warning("Exception: %s caught while unrolling: %s", e, ev.name) 

175 pass 

176 

177 def commit(self) -> None: 

178 """Commit this transaction.""" 

179 if self.parent is None: 

180 # Just forget about the events, they have already happened. 

181 return 

182 else: 

183 # We may still want to events from this transaction as part of 

184 # the parent. 

185 self.parent._log.extend(self._log) 

186 

187 

188@dataclasses.dataclass 

189class DatasetRefURIs(abc.Sequence): 

190 """Represents the primary and component ResourcePath(s) associated with a 

191 DatasetRef. 

192 

193 This is used in places where its members used to be represented as a tuple 

194 `(primaryURI, componentURIs)`. To maintain backward compatibility this 

195 inherits from Sequence and so instances can be treated as a two-item 

196 tuple. 

197 """ 

198 

199 def __init__( 

200 self, 

201 primaryURI: ResourcePath | None = None, 

202 componentURIs: dict[str, ResourcePath] | None = None, 

203 ): 

204 self.primaryURI = primaryURI 

205 """The URI to the primary artifact associated with this dataset. If the 

206 dataset was disassembled within the datastore this may be `None`. 

207 """ 

208 

209 self.componentURIs = componentURIs or {} 

210 """The URIs to any components associated with the dataset artifact 

211 indexed by component name. This can be empty if there are no 

212 components. 

213 """ 

214 

215 def __getitem__(self, index: Any) -> Any: 

216 """Get primaryURI and componentURIs by index. 

217 

218 Provides support for tuple-like access. 

219 """ 

220 if index == 0: 

221 return self.primaryURI 

222 elif index == 1: 

223 return self.componentURIs 

224 raise IndexError("list index out of range") 

225 

226 def __len__(self) -> int: 

227 """Get the number of data members. 

228 

229 Provides support for tuple-like access. 

230 """ 

231 return 2 

232 

233 def __repr__(self) -> str: 

234 return f"DatasetRefURIs({repr(self.primaryURI)}, {repr(self.componentURIs)})" 

235 

236 

237class Datastore(metaclass=ABCMeta): 

238 """Datastore interface. 

239 

240 Parameters 

241 ---------- 

242 config : `DatastoreConfig` or `str` 

243 Load configuration either from an existing config instance or by 

244 referring to a configuration file. 

245 bridgeManager : `DatastoreRegistryBridgeManager` 

246 Object that manages the interface between `Registry` and datastores. 

247 butlerRoot : `str`, optional 

248 New datastore root to use to override the configuration value. 

249 """ 

250 

251 defaultConfigFile: ClassVar[str | None] = None 

252 """Path to configuration defaults. Accessed within the ``config`` resource 

253 or relative to a search path. Can be None if no defaults specified. 

254 """ 

255 

256 containerKey: ClassVar[str | None] = None 

257 """Name of the key containing a list of subconfigurations that also 

258 need to be merged with defaults and will likely use different Python 

259 datastore classes (but all using DatastoreConfig). Assumed to be a 

260 list of configurations that can be represented in a DatastoreConfig 

261 and containing a "cls" definition. None indicates that no containers 

262 are expected in this Datastore.""" 

263 

264 isEphemeral: bool = False 

265 """Indicate whether this Datastore is ephemeral or not. An ephemeral 

266 datastore is one where the contents of the datastore will not exist 

267 across process restarts. This value can change per-instance.""" 

268 

269 config: DatastoreConfig 

270 """Configuration used to create Datastore.""" 

271 

272 name: str 

273 """Label associated with this Datastore.""" 

274 

275 storageClassFactory: StorageClassFactory 

276 """Factory for creating storage class instances from name.""" 

277 

278 constraints: Constraints 

279 """Constraints to apply when putting datasets into the datastore.""" 

280 

281 # MyPy does not like for this to be annotated as any kind of type, because 

282 # it can't do static checking on type variables that can change at runtime. 

283 IngestPrepData: ClassVar[Any] = IngestPrepData 

284 """Helper base class for ingest implementations. 

285 """ 

286 

287 @classmethod 

288 @abstractmethod 

289 def setConfigRoot(cls, root: str, config: Config, full: Config, overwrite: bool = True) -> None: 

290 """Set filesystem-dependent config options for this datastore. 

291 

292 The options will be appropriate for a new empty repository with the 

293 given root. 

294 

295 Parameters 

296 ---------- 

297 root : `str` 

298 Filesystem path to the root of the data repository. 

299 config : `Config` 

300 A `Config` to update. Only the subset understood by 

301 this component will be updated. Will not expand 

302 defaults. 

303 full : `Config` 

304 A complete config with all defaults expanded that can be 

305 converted to a `DatastoreConfig`. Read-only and will not be 

306 modified by this method. 

307 Repository-specific options that should not be obtained 

308 from defaults when Butler instances are constructed 

309 should be copied from ``full`` to ``config``. 

310 overwrite : `bool`, optional 

311 If `False`, do not modify a value in ``config`` if the value 

312 already exists. Default is always to overwrite with the provided 

313 ``root``. 

314 

315 Notes 

316 ----- 

317 If a keyword is explicitly defined in the supplied ``config`` it 

318 will not be overridden by this method if ``overwrite`` is `False`. 

319 This allows explicit values set in external configs to be retained. 

320 """ 

321 raise NotImplementedError() 

322 

323 @staticmethod 

324 def fromConfig( 

325 config: Config, 

326 bridgeManager: DatastoreRegistryBridgeManager, 

327 butlerRoot: ResourcePathExpression | None = None, 

328 ) -> Datastore: 

329 """Create datastore from type specified in config file. 

330 

331 Parameters 

332 ---------- 

333 config : `Config` or `~lsst.resources.ResourcePathExpression` 

334 Configuration instance. 

335 bridgeManager : `DatastoreRegistryBridgeManager` 

336 Object that manages the interface between `Registry` and 

337 datastores. 

338 butlerRoot : `str`, optional 

339 Butler root directory. 

340 """ 

341 cls = doImportType(config["datastore", "cls"]) 

342 if not issubclass(cls, Datastore): 

343 raise TypeError(f"Imported child class {config['datastore', 'cls']} is not a Datastore") 

344 return cls(config=config, bridgeManager=bridgeManager, butlerRoot=butlerRoot) 

345 

346 def __init__( 

347 self, 

348 config: Config | ResourcePathExpression, 

349 bridgeManager: DatastoreRegistryBridgeManager, 

350 butlerRoot: ResourcePathExpression | None = None, 

351 ): 

352 self.config = DatastoreConfig(config) 

353 self.name = "ABCDataStore" 

354 self._transaction: DatastoreTransaction | None = None 

355 

356 # All Datastores need storage classes and constraints 

357 self.storageClassFactory = StorageClassFactory() 

358 

359 # And read the constraints list 

360 constraintsConfig = self.config.get("constraints") 

361 self.constraints = Constraints(constraintsConfig, universe=bridgeManager.universe) 

362 

363 def __str__(self) -> str: 

364 return self.name 

365 

366 def __repr__(self) -> str: 

367 return self.name 

368 

369 @property 

370 def names(self) -> tuple[str, ...]: 

371 """Names associated with this datastore returned as a list. 

372 

373 Can be different to ``name`` for a chaining datastore. 

374 """ 

375 # Default implementation returns solely the name itself 

376 return (self.name,) 

377 

378 @property 

379 def roots(self) -> dict[str, ResourcePath | None]: 

380 """Return the root URIs for each named datastore. 

381 

382 Mapping from datastore name to root URI. The URI can be `None` 

383 if a datastore has no concept of a root URI. 

384 (`dict` [`str`, `ResourcePath` | `None`]) 

385 """ 

386 return {self.name: None} 

387 

388 @contextlib.contextmanager 

389 def transaction(self) -> Iterator[DatastoreTransaction]: 

390 """Context manager supporting `Datastore` transactions. 

391 

392 Transactions can be nested, and are to be used in combination with 

393 `Registry.transaction`. 

394 """ 

395 self._transaction = DatastoreTransaction(self._transaction) 

396 try: 

397 yield self._transaction 

398 except BaseException: 

399 self._transaction.rollback() 

400 raise 

401 else: 

402 self._transaction.commit() 

403 self._transaction = self._transaction.parent 

404 

405 @abstractmethod 

406 def knows(self, ref: DatasetRef) -> bool: 

407 """Check if the dataset is known to the datastore. 

408 

409 Does not check for existence of any artifact. 

410 

411 Parameters 

412 ---------- 

413 ref : `DatasetRef` 

414 Reference to the required dataset. 

415 

416 Returns 

417 ------- 

418 exists : `bool` 

419 `True` if the dataset is known to the datastore. 

420 """ 

421 raise NotImplementedError() 

422 

423 def knows_these(self, refs: Iterable[DatasetRef]) -> dict[DatasetRef, bool]: 

424 """Check which of the given datasets are known to this datastore. 

425 

426 This is like ``mexist()`` but does not check that the file exists. 

427 

428 Parameters 

429 ---------- 

430 refs : iterable `DatasetRef` 

431 The datasets to check. 

432 

433 Returns 

434 ------- 

435 exists : `dict`[`DatasetRef`, `bool`] 

436 Mapping of dataset to boolean indicating whether the dataset 

437 is known to the datastore. 

438 """ 

439 # Non-optimized default calls knows() repeatedly. 

440 return {ref: self.knows(ref) for ref in refs} 

441 

442 def mexists( 

443 self, refs: Iterable[DatasetRef], artifact_existence: dict[ResourcePath, bool] | None = None 

444 ) -> dict[DatasetRef, bool]: 

445 """Check the existence of multiple datasets at once. 

446 

447 Parameters 

448 ---------- 

449 refs : iterable of `DatasetRef` 

450 The datasets to be checked. 

451 artifact_existence : `dict` [`lsst.resources.ResourcePath`, `bool`] 

452 Optional mapping of datastore artifact to existence. Updated by 

453 this method with details of all artifacts tested. Can be `None` 

454 if the caller is not interested. 

455 

456 Returns 

457 ------- 

458 existence : `dict` of [`DatasetRef`, `bool`] 

459 Mapping from dataset to boolean indicating existence. 

460 """ 

461 existence: dict[DatasetRef, bool] = {} 

462 # Non-optimized default. 

463 for ref in refs: 

464 existence[ref] = self.exists(ref) 

465 return existence 

466 

467 @abstractmethod 

468 def exists(self, datasetRef: DatasetRef) -> bool: 

469 """Check if the dataset exists in the datastore. 

470 

471 Parameters 

472 ---------- 

473 datasetRef : `DatasetRef` 

474 Reference to the required dataset. 

475 

476 Returns 

477 ------- 

478 exists : `bool` 

479 `True` if the entity exists in the `Datastore`. 

480 """ 

481 raise NotImplementedError("Must be implemented by subclass") 

482 

483 @abstractmethod 

484 def get( 

485 self, 

486 datasetRef: DatasetRef, 

487 parameters: Mapping[str, Any] | None = None, 

488 storageClass: StorageClass | str | None = None, 

489 ) -> Any: 

490 """Load an `InMemoryDataset` from the store. 

491 

492 Parameters 

493 ---------- 

494 datasetRef : `DatasetRef` 

495 Reference to the required Dataset. 

496 parameters : `dict` 

497 `StorageClass`-specific parameters that specify a slice of the 

498 Dataset to be loaded. 

499 storageClass : `StorageClass` or `str`, optional 

500 The storage class to be used to override the Python type 

501 returned by this method. By default the returned type matches 

502 the dataset type definition for this dataset. Specifying a 

503 read `StorageClass` can force a different type to be returned. 

504 This type must be compatible with the original type. 

505 

506 Returns 

507 ------- 

508 inMemoryDataset : `object` 

509 Requested Dataset or slice thereof as an InMemoryDataset. 

510 """ 

511 raise NotImplementedError("Must be implemented by subclass") 

512 

513 @abstractmethod 

514 def put(self, inMemoryDataset: Any, datasetRef: DatasetRef) -> None: 

515 """Write a `InMemoryDataset` with a given `DatasetRef` to the store. 

516 

517 Parameters 

518 ---------- 

519 inMemoryDataset : `object` 

520 The Dataset to store. 

521 datasetRef : `DatasetRef` 

522 Reference to the associated Dataset. 

523 """ 

524 raise NotImplementedError("Must be implemented by subclass") 

525 

526 def _overrideTransferMode(self, *datasets: FileDataset, transfer: str | None = None) -> str | None: 

527 """Allow ingest transfer mode to be defaulted based on datasets. 

528 

529 Parameters 

530 ---------- 

531 datasets : `FileDataset` 

532 Each positional argument is a struct containing information about 

533 a file to be ingested, including its path (either absolute or 

534 relative to the datastore root, if applicable), a complete 

535 `DatasetRef` (with ``dataset_id not None``), and optionally a 

536 formatter class or its fully-qualified string name. If a formatter 

537 is not provided, this method should populate that attribute with 

538 the formatter the datastore would use for `put`. Subclasses are 

539 also permitted to modify the path attribute (typically to put it 

540 in what the datastore considers its standard form). 

541 transfer : `str`, optional 

542 How (and whether) the dataset should be added to the datastore. 

543 See `ingest` for details of transfer modes. 

544 

545 Returns 

546 ------- 

547 newTransfer : `str` 

548 Transfer mode to use. Will be identical to the supplied transfer 

549 mode unless "auto" is used. 

550 """ 

551 if transfer != "auto": 

552 return transfer 

553 raise RuntimeError(f"{transfer} is not allowed without specialization.") 

554 

555 def _prepIngest(self, *datasets: FileDataset, transfer: str | None = None) -> IngestPrepData: 

556 """Process datasets to identify which ones can be ingested. 

557 

558 Parameters 

559 ---------- 

560 datasets : `FileDataset` 

561 Each positional argument is a struct containing information about 

562 a file to be ingested, including its path (either absolute or 

563 relative to the datastore root, if applicable), a complete 

564 `DatasetRef` (with ``dataset_id not None``), and optionally a 

565 formatter class or its fully-qualified string name. If a formatter 

566 is not provided, this method should populate that attribute with 

567 the formatter the datastore would use for `put`. Subclasses are 

568 also permitted to modify the path attribute (typically to put it 

569 in what the datastore considers its standard form). 

570 transfer : `str`, optional 

571 How (and whether) the dataset should be added to the datastore. 

572 See `ingest` for details of transfer modes. 

573 

574 Returns 

575 ------- 

576 data : `IngestPrepData` 

577 An instance of a subclass of `IngestPrepData`, used to pass 

578 arbitrary data from `_prepIngest` to `_finishIngest`. This should 

579 include only the datasets this datastore can actually ingest; 

580 others should be silently ignored (`Datastore.ingest` will inspect 

581 `IngestPrepData.refs` and raise `DatasetTypeNotSupportedError` if 

582 necessary). 

583 

584 Raises 

585 ------ 

586 NotImplementedError 

587 Raised if the datastore does not support the given transfer mode 

588 (including the case where ingest is not supported at all). 

589 FileNotFoundError 

590 Raised if one of the given files does not exist. 

591 FileExistsError 

592 Raised if transfer is not `None` but the (internal) location the 

593 file would be moved to is already occupied. 

594 

595 Notes 

596 ----- 

597 This method (along with `_finishIngest`) should be implemented by 

598 subclasses to provide ingest support instead of implementing `ingest` 

599 directly. 

600 

601 `_prepIngest` should not modify the data repository or given files in 

602 any way; all changes should be deferred to `_finishIngest`. 

603 

604 When possible, exceptions should be raised in `_prepIngest` instead of 

605 `_finishIngest`. `NotImplementedError` exceptions that indicate that 

606 the transfer mode is not supported must be raised by `_prepIngest` 

607 instead of `_finishIngest`. 

608 """ 

609 raise NotImplementedError(f"Datastore {self} does not support direct file-based ingest.") 

610 

611 def _finishIngest( 

612 self, prepData: IngestPrepData, *, transfer: str | None = None, record_validation_info: bool = True 

613 ) -> None: 

614 """Complete an ingest operation. 

615 

616 Parameters 

617 ---------- 

618 data : `IngestPrepData` 

619 An instance of a subclass of `IngestPrepData`. Guaranteed to be 

620 the direct result of a call to `_prepIngest` on this datastore. 

621 transfer : `str`, optional 

622 How (and whether) the dataset should be added to the datastore. 

623 See `ingest` for details of transfer modes. 

624 record_validation_info : `bool`, optional 

625 If `True`, the default, the datastore can record validation 

626 information associated with the file. If `False` the datastore 

627 will not attempt to track any information such as checksums 

628 or file sizes. This can be useful if such information is tracked 

629 in an external system or if the file is to be compressed in place. 

630 It is up to the datastore whether this parameter is relevant. 

631 

632 Raises 

633 ------ 

634 FileNotFoundError 

635 Raised if one of the given files does not exist. 

636 FileExistsError 

637 Raised if transfer is not `None` but the (internal) location the 

638 file would be moved to is already occupied. 

639 

640 Notes 

641 ----- 

642 This method (along with `_prepIngest`) should be implemented by 

643 subclasses to provide ingest support instead of implementing `ingest` 

644 directly. 

645 """ 

646 raise NotImplementedError(f"Datastore {self} does not support direct file-based ingest.") 

647 

648 def ingest( 

649 self, *datasets: FileDataset, transfer: str | None = None, record_validation_info: bool = True 

650 ) -> None: 

651 """Ingest one or more files into the datastore. 

652 

653 Parameters 

654 ---------- 

655 datasets : `FileDataset` 

656 Each positional argument is a struct containing information about 

657 a file to be ingested, including its path (either absolute or 

658 relative to the datastore root, if applicable), a complete 

659 `DatasetRef` (with ``dataset_id not None``), and optionally a 

660 formatter class or its fully-qualified string name. If a formatter 

661 is not provided, the one the datastore would use for ``put`` on 

662 that dataset is assumed. 

663 transfer : `str`, optional 

664 How (and whether) the dataset should be added to the datastore. 

665 If `None` (default), the file must already be in a location 

666 appropriate for the datastore (e.g. within its root directory), 

667 and will not be modified. Other choices include "move", "copy", 

668 "link", "symlink", "relsymlink", and "hardlink". "link" is a 

669 special transfer mode that will first try to make a hardlink and 

670 if that fails a symlink will be used instead. "relsymlink" creates 

671 a relative symlink rather than use an absolute path. 

672 Most datastores do not support all transfer modes. 

673 "auto" is a special option that will let the 

674 data store choose the most natural option for itself. 

675 record_validation_info : `bool`, optional 

676 If `True`, the default, the datastore can record validation 

677 information associated with the file. If `False` the datastore 

678 will not attempt to track any information such as checksums 

679 or file sizes. This can be useful if such information is tracked 

680 in an external system or if the file is to be compressed in place. 

681 It is up to the datastore whether this parameter is relevant. 

682 

683 Raises 

684 ------ 

685 NotImplementedError 

686 Raised if the datastore does not support the given transfer mode 

687 (including the case where ingest is not supported at all). 

688 DatasetTypeNotSupportedError 

689 Raised if one or more files to be ingested have a dataset type that 

690 is not supported by the datastore. 

691 FileNotFoundError 

692 Raised if one of the given files does not exist. 

693 FileExistsError 

694 Raised if transfer is not `None` but the (internal) location the 

695 file would be moved to is already occupied. 

696 

697 Notes 

698 ----- 

699 Subclasses should implement `_prepIngest` and `_finishIngest` instead 

700 of implementing `ingest` directly. Datastores that hold and 

701 delegate to child datastores may want to call those methods as well. 

702 

703 Subclasses are encouraged to document their supported transfer modes 

704 in their class documentation. 

705 """ 

706 # Allow a datastore to select a default transfer mode 

707 transfer = self._overrideTransferMode(*datasets, transfer=transfer) 

708 prepData = self._prepIngest(*datasets, transfer=transfer) 

709 refs = {ref.id: ref for dataset in datasets for ref in dataset.refs} 

710 if refs.keys() != prepData.refs.keys(): 

711 unsupported = refs.keys() - prepData.refs.keys() 

712 # Group unsupported refs by DatasetType for an informative 

713 # but still concise error message. 

714 byDatasetType = defaultdict(list) 

715 for datasetId in unsupported: 

716 ref = refs[datasetId] 

717 byDatasetType[ref.datasetType].append(ref) 

718 raise DatasetTypeNotSupportedError( 

719 "DatasetType(s) not supported in ingest: " 

720 + ", ".join(f"{k.name} ({len(v)} dataset(s))" for k, v in byDatasetType.items()) 

721 ) 

722 self._finishIngest(prepData, transfer=transfer, record_validation_info=record_validation_info) 

723 

724 def transfer_from( 

725 self, 

726 source_datastore: Datastore, 

727 refs: Iterable[DatasetRef], 

728 transfer: str = "auto", 

729 artifact_existence: dict[ResourcePath, bool] | None = None, 

730 ) -> tuple[set[DatasetRef], set[DatasetRef]]: 

731 """Transfer dataset artifacts from another datastore to this one. 

732 

733 Parameters 

734 ---------- 

735 source_datastore : `Datastore` 

736 The datastore from which to transfer artifacts. That datastore 

737 must be compatible with this datastore receiving the artifacts. 

738 refs : iterable of `DatasetRef` 

739 The datasets to transfer from the source datastore. 

740 transfer : `str`, optional 

741 How (and whether) the dataset should be added to the datastore. 

742 Choices include "move", "copy", 

743 "link", "symlink", "relsymlink", and "hardlink". "link" is a 

744 special transfer mode that will first try to make a hardlink and 

745 if that fails a symlink will be used instead. "relsymlink" creates 

746 a relative symlink rather than use an absolute path. 

747 Most datastores do not support all transfer modes. 

748 "auto" (the default) is a special option that will let the 

749 data store choose the most natural option for itself. 

750 If the source location and transfer location are identical the 

751 transfer mode will be ignored. 

752 artifact_existence : `dict` [`lsst.resources.ResourcePath`, `bool`] 

753 Optional mapping of datastore artifact to existence. Updated by 

754 this method with details of all artifacts tested. Can be `None` 

755 if the caller is not interested. 

756 

757 Returns 

758 ------- 

759 accepted : `set` [`DatasetRef`] 

760 The datasets that were transferred. 

761 rejected : `set` [`DatasetRef`] 

762 The datasets that were rejected due to a constraints violation. 

763 

764 Raises 

765 ------ 

766 TypeError 

767 Raised if the two datastores are not compatible. 

768 """ 

769 if type(self) is not type(source_datastore): 

770 raise TypeError( 

771 f"Datastore mismatch between this datastore ({type(self)}) and the " 

772 f"source datastore ({type(source_datastore)})." 

773 ) 

774 

775 raise NotImplementedError(f"Datastore {type(self)} must implement a transfer_from method.") 

776 

777 def getManyURIs( 

778 self, 

779 refs: Iterable[DatasetRef], 

780 predict: bool = False, 

781 allow_missing: bool = False, 

782 ) -> dict[DatasetRef, DatasetRefURIs]: 

783 """Return URIs associated with many datasets. 

784 

785 Parameters 

786 ---------- 

787 refs : iterable of `DatasetIdRef` 

788 References to the required datasets. 

789 predict : `bool`, optional 

790 If `True`, allow URIs to be returned of datasets that have not 

791 been written. 

792 allow_missing : `bool` 

793 If `False`, and ``predict`` is `False`, will raise if a 

794 `DatasetRef` does not exist. 

795 

796 Returns 

797 ------- 

798 URIs : `dict` of [`DatasetRef`, `DatasetRefUris`] 

799 A dict of primary and component URIs, indexed by the passed-in 

800 refs. 

801 

802 Raises 

803 ------ 

804 FileNotFoundError 

805 A URI has been requested for a dataset that does not exist and 

806 guessing is not allowed. 

807 

808 Notes 

809 ----- 

810 In file-based datastores, getManyURIs does not check that the file is 

811 really there, it's assuming it is if datastore is aware of the file 

812 then it actually exists. 

813 """ 

814 uris: dict[DatasetRef, DatasetRefURIs] = {} 

815 missing_refs = [] 

816 for ref in refs: 

817 try: 

818 uris[ref] = self.getURIs(ref, predict=predict) 

819 except FileNotFoundError: 

820 missing_refs.append(ref) 

821 if missing_refs and not allow_missing: 

822 raise FileNotFoundError( 

823 "Missing {} refs from datastore out of {} and predict=False.".format( 

824 num_missing := len(missing_refs), num_missing + len(uris) 

825 ) 

826 ) 

827 return uris 

828 

829 @abstractmethod 

830 def getURIs(self, datasetRef: DatasetRef, predict: bool = False) -> DatasetRefURIs: 

831 """Return URIs associated with dataset. 

832 

833 Parameters 

834 ---------- 

835 ref : `DatasetRef` 

836 Reference to the required dataset. 

837 predict : `bool`, optional 

838 If the datastore does not know about the dataset, should it 

839 return a predicted URI or not? 

840 

841 Returns 

842 ------- 

843 uris : `DatasetRefURIs` 

844 The URI to the primary artifact associated with this dataset (if 

845 the dataset was disassembled within the datastore this may be 

846 `None`), and the URIs to any components associated with the dataset 

847 artifact. (can be empty if there are no components). 

848 """ 

849 raise NotImplementedError() 

850 

851 @abstractmethod 

852 def getURI(self, datasetRef: DatasetRef, predict: bool = False) -> ResourcePath: 

853 """URI to the Dataset. 

854 

855 Parameters 

856 ---------- 

857 datasetRef : `DatasetRef` 

858 Reference to the required Dataset. 

859 predict : `bool` 

860 If `True` attempt to predict the URI for a dataset if it does 

861 not exist in datastore. 

862 

863 Returns 

864 ------- 

865 uri : `str` 

866 URI string pointing to the Dataset within the datastore. If the 

867 Dataset does not exist in the datastore, the URI may be a guess. 

868 If the datastore does not have entities that relate well 

869 to the concept of a URI the returned URI string will be 

870 descriptive. The returned URI is not guaranteed to be obtainable. 

871 

872 Raises 

873 ------ 

874 FileNotFoundError 

875 A URI has been requested for a dataset that does not exist and 

876 guessing is not allowed. 

877 """ 

878 raise NotImplementedError("Must be implemented by subclass") 

879 

880 @abstractmethod 

881 def retrieveArtifacts( 

882 self, 

883 refs: Iterable[DatasetRef], 

884 destination: ResourcePath, 

885 transfer: str = "auto", 

886 preserve_path: bool = True, 

887 overwrite: bool = False, 

888 ) -> list[ResourcePath]: 

889 """Retrieve the artifacts associated with the supplied refs. 

890 

891 Parameters 

892 ---------- 

893 refs : iterable of `DatasetRef` 

894 The datasets for which artifacts are to be retrieved. 

895 A single ref can result in multiple artifacts. The refs must 

896 be resolved. 

897 destination : `lsst.resources.ResourcePath` 

898 Location to write the artifacts. 

899 transfer : `str`, optional 

900 Method to use to transfer the artifacts. Must be one of the options 

901 supported by `lsst.resources.ResourcePath.transfer_from()`. 

902 "move" is not allowed. 

903 preserve_path : `bool`, optional 

904 If `True` the full path of the artifact within the datastore 

905 is preserved. If `False` the final file component of the path 

906 is used. 

907 overwrite : `bool`, optional 

908 If `True` allow transfers to overwrite existing files at the 

909 destination. 

910 

911 Returns 

912 ------- 

913 targets : `list` of `lsst.resources.ResourcePath` 

914 URIs of file artifacts in destination location. Order is not 

915 preserved. 

916 

917 Notes 

918 ----- 

919 For non-file datastores the artifacts written to the destination 

920 may not match the representation inside the datastore. For example 

921 a hierarchichal data structure in a NoSQL database may well be stored 

922 as a JSON file. 

923 """ 

924 raise NotImplementedError() 

925 

926 @abstractmethod 

927 def remove(self, datasetRef: DatasetRef) -> None: 

928 """Indicate to the Datastore that a Dataset can be removed. 

929 

930 Parameters 

931 ---------- 

932 datasetRef : `DatasetRef` 

933 Reference to the required Dataset. 

934 

935 Raises 

936 ------ 

937 FileNotFoundError 

938 When Dataset does not exist. 

939 

940 Notes 

941 ----- 

942 Some Datastores may implement this method as a silent no-op to 

943 disable Dataset deletion through standard interfaces. 

944 """ 

945 raise NotImplementedError("Must be implemented by subclass") 

946 

947 @abstractmethod 

948 def forget(self, refs: Iterable[DatasetRef]) -> None: 

949 """Indicate to the Datastore that it should remove all records of the 

950 given datasets, without actually deleting them. 

951 

952 Parameters 

953 ---------- 

954 refs : `~collections.abc.Iterable` [ `DatasetRef` ] 

955 References to the datasets being forgotten. 

956 

957 Notes 

958 ----- 

959 Asking a datastore to forget a `DatasetRef` it does not hold should be 

960 a silent no-op, not an error. 

961 """ 

962 raise NotImplementedError("Must be implemented by subclass") 

963 

964 @abstractmethod 

965 def trash(self, ref: DatasetRef | Iterable[DatasetRef], ignore_errors: bool = True) -> None: 

966 """Indicate to the Datastore that a Dataset can be moved to the trash. 

967 

968 Parameters 

969 ---------- 

970 ref : `DatasetRef` or iterable thereof 

971 Reference(s) to the required Dataset. 

972 ignore_errors : `bool`, optional 

973 Determine whether errors should be ignored. When multiple 

974 refs are being trashed there will be no per-ref check. 

975 

976 Raises 

977 ------ 

978 FileNotFoundError 

979 When Dataset does not exist and errors are not ignored. Only 

980 checked if a single ref is supplied (and not in a list). 

981 

982 Notes 

983 ----- 

984 Some Datastores may implement this method as a silent no-op to 

985 disable Dataset deletion through standard interfaces. 

986 """ 

987 raise NotImplementedError("Must be implemented by subclass") 

988 

989 @abstractmethod 

990 def emptyTrash(self, ignore_errors: bool = True) -> None: 

991 """Remove all datasets from the trash. 

992 

993 Parameters 

994 ---------- 

995 ignore_errors : `bool`, optional 

996 Determine whether errors should be ignored. 

997 

998 Notes 

999 ----- 

1000 Some Datastores may implement this method as a silent no-op to 

1001 disable Dataset deletion through standard interfaces. 

1002 """ 

1003 raise NotImplementedError("Must be implemented by subclass") 

1004 

1005 @abstractmethod 

1006 def transfer(self, inputDatastore: Datastore, datasetRef: DatasetRef) -> None: 

1007 """Transfer a dataset from another datastore to this datastore. 

1008 

1009 Parameters 

1010 ---------- 

1011 inputDatastore : `Datastore` 

1012 The external `Datastore` from which to retrieve the Dataset. 

1013 datasetRef : `DatasetRef` 

1014 Reference to the required Dataset. 

1015 """ 

1016 raise NotImplementedError("Must be implemented by subclass") 

1017 

1018 def export( 

1019 self, 

1020 refs: Iterable[DatasetRef], 

1021 *, 

1022 directory: ResourcePathExpression | None = None, 

1023 transfer: str | None = "auto", 

1024 ) -> Iterable[FileDataset]: 

1025 """Export datasets for transfer to another data repository. 

1026 

1027 Parameters 

1028 ---------- 

1029 refs : iterable of `DatasetRef` 

1030 Dataset references to be exported. 

1031 directory : `str`, optional 

1032 Path to a directory that should contain files corresponding to 

1033 output datasets. Ignored if ``transfer`` is explicitly `None`. 

1034 transfer : `str`, optional 

1035 Mode that should be used to move datasets out of the repository. 

1036 Valid options are the same as those of the ``transfer`` argument 

1037 to ``ingest``, and datastores may similarly signal that a transfer 

1038 mode is not supported by raising `NotImplementedError`. If "auto" 

1039 is given and no ``directory`` is specified, `None` will be 

1040 implied. 

1041 

1042 Returns 

1043 ------- 

1044 dataset : iterable of `DatasetTransfer` 

1045 Structs containing information about the exported datasets, in the 

1046 same order as ``refs``. 

1047 

1048 Raises 

1049 ------ 

1050 NotImplementedError 

1051 Raised if the given transfer mode is not supported. 

1052 """ 

1053 raise NotImplementedError(f"Transfer mode {transfer} not supported.") 

1054 

1055 @abstractmethod 

1056 def validateConfiguration( 

1057 self, entities: Iterable[DatasetRef | DatasetType | StorageClass], logFailures: bool = False 

1058 ) -> None: 

1059 """Validate some of the configuration for this datastore. 

1060 

1061 Parameters 

1062 ---------- 

1063 entities : iterable of `DatasetRef`, `DatasetType`, or `StorageClass` 

1064 Entities to test against this configuration. Can be differing 

1065 types. 

1066 logFailures : `bool`, optional 

1067 If `True`, output a log message for every validation error 

1068 detected. 

1069 

1070 Raises 

1071 ------ 

1072 DatastoreValidationError 

1073 Raised if there is a validation problem with a configuration. 

1074 

1075 Notes 

1076 ----- 

1077 Which parts of the configuration are validated is at the discretion 

1078 of each Datastore implementation. 

1079 """ 

1080 raise NotImplementedError("Must be implemented by subclass") 

1081 

1082 @abstractmethod 

1083 def validateKey(self, lookupKey: LookupKey, entity: DatasetRef | DatasetType | StorageClass) -> None: 

1084 """Validate a specific look up key with supplied entity. 

1085 

1086 Parameters 

1087 ---------- 

1088 lookupKey : `LookupKey` 

1089 Key to use to retrieve information from the datastore 

1090 configuration. 

1091 entity : `DatasetRef`, `DatasetType`, or `StorageClass` 

1092 Entity to compare with configuration retrieved using the 

1093 specified lookup key. 

1094 

1095 Raises 

1096 ------ 

1097 DatastoreValidationError 

1098 Raised if there is a problem with the combination of entity 

1099 and lookup key. 

1100 

1101 Notes 

1102 ----- 

1103 Bypasses the normal selection priorities by allowing a key that 

1104 would normally not be selected to be validated. 

1105 """ 

1106 raise NotImplementedError("Must be implemented by subclass") 

1107 

1108 @abstractmethod 

1109 def getLookupKeys(self) -> set[LookupKey]: 

1110 """Return all the lookup keys relevant to this datastore. 

1111 

1112 Returns 

1113 ------- 

1114 keys : `set` of `LookupKey` 

1115 The keys stored internally for looking up information based 

1116 on `DatasetType` name or `StorageClass`. 

1117 """ 

1118 raise NotImplementedError("Must be implemented by subclass") 

1119 

1120 def needs_expanded_data_ids( 

1121 self, 

1122 transfer: str | None, 

1123 entity: DatasetRef | DatasetType | StorageClass | None = None, 

1124 ) -> bool: 

1125 """Test whether this datastore needs expanded data IDs to ingest. 

1126 

1127 Parameters 

1128 ---------- 

1129 transfer : `str` or `None` 

1130 Transfer mode for ingest. 

1131 entity, optional 

1132 Object representing what will be ingested. If not provided (or not 

1133 specific enough), `True` may be returned even if expanded data 

1134 IDs aren't necessary. 

1135 

1136 Returns 

1137 ------- 

1138 needed : `bool` 

1139 If `True`, expanded data IDs may be needed. `False` only if 

1140 expansion definitely isn't necessary. 

1141 """ 

1142 return True 

1143 

1144 @abstractmethod 

1145 def import_records( 

1146 self, 

1147 data: Mapping[str, DatastoreRecordData], 

1148 ) -> None: 

1149 """Import datastore location and record data from an in-memory data 

1150 structure. 

1151 

1152 Parameters 

1153 ---------- 

1154 data : `~collections.abc.Mapping` [ `str`, `DatastoreRecordData` ] 

1155 Datastore records indexed by datastore name. May contain data for 

1156 other `Datastore` instances (generally because they are chained to 

1157 this one), which should be ignored. 

1158 

1159 Notes 

1160 ----- 

1161 Implementations should generally not check that any external resources 

1162 (e.g. files) referred to by these records actually exist, for 

1163 performance reasons; we expect higher-level code to guarantee that they 

1164 do. 

1165 

1166 Implementations are responsible for calling 

1167 `DatastoreRegistryBridge.insert` on all datasets in ``data.locations`` 

1168 where the key is in `names`, as well as loading any opaque table data. 

1169 

1170 Implementations may assume that datasets are either fully present or 

1171 not at all (single-component exports are not permitted). 

1172 """ 

1173 raise NotImplementedError() 

1174 

1175 @abstractmethod 

1176 def export_records( 

1177 self, 

1178 refs: Iterable[DatasetIdRef], 

1179 ) -> Mapping[str, DatastoreRecordData]: 

1180 """Export datastore records and locations to an in-memory data 

1181 structure. 

1182 

1183 Parameters 

1184 ---------- 

1185 refs : `~collections.abc.Iterable` [ `DatasetIdRef` ] 

1186 Datasets to save. This may include datasets not known to this 

1187 datastore, which should be ignored. May not include component 

1188 datasets. 

1189 

1190 Returns 

1191 ------- 

1192 data : `~collections.abc.Mapping` [ `str`, `DatastoreRecordData` ] 

1193 Exported datastore records indexed by datastore name. 

1194 """ 

1195 raise NotImplementedError() 

1196 

1197 def set_retrieve_dataset_type_method(self, method: Callable[[str], DatasetType | None] | None) -> None: 

1198 """Specify a method that can be used by datastore to retrieve 

1199 registry-defined dataset type. 

1200 

1201 Parameters 

1202 ---------- 

1203 method : `~collections.abc.Callable` | `None` 

1204 Method that takes a name of the dataset type and returns a 

1205 corresponding `DatasetType` instance as defined in Registry. If 

1206 dataset type name is not known to registry `None` is returned. 

1207 

1208 Notes 

1209 ----- 

1210 This method is only needed for a Datastore supporting a "trusted" mode 

1211 when it does not have an access to datastore records and needs to 

1212 guess dataset location based on its stored dataset type. 

1213 """ 

1214 pass 

1215 

1216 

1217class NullDatastore(Datastore): 

1218 """A datastore that implements the `Datastore` API but always fails when 

1219 it accepts any request. 

1220 """ 

1221 

1222 @classmethod 

1223 def setConfigRoot(cls, root: str, config: Config, full: Config, overwrite: bool = True) -> None: 

1224 # Nothing to do. This is not a real Datastore. 

1225 pass 

1226 

1227 def __init__( 

1228 self, 

1229 config: Config | ResourcePathExpression | None, 

1230 bridgeManager: DatastoreRegistryBridgeManager | None, 

1231 butlerRoot: ResourcePathExpression | None = None, 

1232 ): 

1233 # Name ourselves with the timestamp the datastore 

1234 # was created. 

1235 self.name = f"{type(self).__name__}@{time.time()}" 

1236 _LOG.debug("Creating datastore %s", self.name) 

1237 

1238 return 

1239 

1240 def knows(self, ref: DatasetRef) -> bool: 

1241 return False 

1242 

1243 def exists(self, datasetRef: DatasetRef) -> bool: 

1244 return False 

1245 

1246 def get( 

1247 self, 

1248 datasetRef: DatasetRef, 

1249 parameters: Mapping[str, Any] | None = None, 

1250 storageClass: StorageClass | str | None = None, 

1251 ) -> Any: 

1252 raise FileNotFoundError("This is a no-op datastore that can not access a real datastore") 

1253 

1254 def put(self, inMemoryDataset: Any, datasetRef: DatasetRef) -> None: 

1255 raise NotImplementedError("This is a no-op datastore that can not access a real datastore") 

1256 

1257 def ingest( 

1258 self, *datasets: FileDataset, transfer: str | None = None, record_validation_info: bool = True 

1259 ) -> None: 

1260 raise NotImplementedError("This is a no-op datastore that can not access a real datastore") 

1261 

1262 def transfer_from( 

1263 self, 

1264 source_datastore: Datastore, 

1265 refs: Iterable[DatasetRef], 

1266 transfer: str = "auto", 

1267 artifact_existence: dict[ResourcePath, bool] | None = None, 

1268 ) -> tuple[set[DatasetRef], set[DatasetRef]]: 

1269 raise NotImplementedError("This is a no-op datastore that can not access a real datastore") 

1270 

1271 def getURIs(self, datasetRef: DatasetRef, predict: bool = False) -> DatasetRefURIs: 

1272 raise FileNotFoundError("This is a no-op datastore that can not access a real datastore") 

1273 

1274 def getURI(self, datasetRef: DatasetRef, predict: bool = False) -> ResourcePath: 

1275 raise FileNotFoundError("This is a no-op datastore that can not access a real datastore") 

1276 

1277 def retrieveArtifacts( 

1278 self, 

1279 refs: Iterable[DatasetRef], 

1280 destination: ResourcePath, 

1281 transfer: str = "auto", 

1282 preserve_path: bool = True, 

1283 overwrite: bool = False, 

1284 ) -> list[ResourcePath]: 

1285 raise NotImplementedError("This is a no-op datastore that can not access a real datastore") 

1286 

1287 def remove(self, datasetRef: DatasetRef) -> None: 

1288 raise NotImplementedError("This is a no-op datastore that can not access a real datastore") 

1289 

1290 def forget(self, refs: Iterable[DatasetRef]) -> None: 

1291 raise NotImplementedError("This is a no-op datastore that can not access a real datastore") 

1292 

1293 def trash(self, ref: DatasetRef | Iterable[DatasetRef], ignore_errors: bool = True) -> None: 

1294 raise NotImplementedError("This is a no-op datastore that can not access a real datastore") 

1295 

1296 def emptyTrash(self, ignore_errors: bool = True) -> None: 

1297 raise NotImplementedError("This is a no-op datastore that can not access a real datastore") 

1298 

1299 def transfer(self, inputDatastore: Datastore, datasetRef: DatasetRef) -> None: 

1300 raise NotImplementedError("This is a no-op datastore that can not access a real datastore") 

1301 

1302 def export( 

1303 self, 

1304 refs: Iterable[DatasetRef], 

1305 *, 

1306 directory: ResourcePathExpression | None = None, 

1307 transfer: str | None = "auto", 

1308 ) -> Iterable[FileDataset]: 

1309 raise NotImplementedError("This is a no-op datastore that can not access a real datastore") 

1310 

1311 def validateConfiguration( 

1312 self, entities: Iterable[DatasetRef | DatasetType | StorageClass], logFailures: bool = False 

1313 ) -> None: 

1314 # No configuration so always validates. 

1315 pass 

1316 

1317 def validateKey(self, lookupKey: LookupKey, entity: DatasetRef | DatasetType | StorageClass) -> None: 

1318 pass 

1319 

1320 def getLookupKeys(self) -> set[LookupKey]: 

1321 raise NotImplementedError("This is a no-op datastore that can not access a real datastore") 

1322 

1323 def import_records( 

1324 self, 

1325 data: Mapping[str, DatastoreRecordData], 

1326 ) -> None: 

1327 raise NotImplementedError("This is a no-op datastore that can not access a real datastore") 

1328 

1329 def export_records( 

1330 self, 

1331 refs: Iterable[DatasetIdRef], 

1332 ) -> Mapping[str, DatastoreRecordData]: 

1333 raise NotImplementedError("This is a no-op datastore that can not access a real datastore")