Coverage for python / lsst / daf / butler / datastore / _datastore.py: 57%

285 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-05-06 08:30 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28"""Support for generic data stores.""" 

29 

30from __future__ import annotations 

31 

32__all__ = ( 

33 "DatasetRefURIs", 

34 "Datastore", 

35 "DatastoreConfig", 

36 "DatastoreOpaqueTable", 

37 "DatastoreTransaction", 

38 "DatastoreValidationError", 

39 "NullDatastore", 

40) 

41 

42import contextlib 

43import dataclasses 

44import logging 

45import time 

46from abc import ABCMeta, abstractmethod 

47from collections import abc, defaultdict 

48from collections.abc import Callable, Collection, Iterable, Iterator, Mapping 

49from typing import TYPE_CHECKING, Any, ClassVar 

50 

51from lsst.utils import doImportType 

52 

53from .._config import Config, ConfigSubset 

54from .._exceptions import DatasetTypeNotSupportedError, ValidationError 

55from .._file_dataset import FileDataset 

56from .._storage_class import StorageClassFactory 

57from ._transfer import FileTransferMap, FileTransferSource 

58from .constraints import Constraints 

59 

60if TYPE_CHECKING: 

61 from lsst.resources import ResourcePath, ResourcePathExpression 

62 

63 from .. import ddl 

64 from .._config_support import LookupKey 

65 from .._dataset_provenance import DatasetProvenance 

66 from .._dataset_ref import DatasetId, DatasetRef 

67 from .._dataset_type import DatasetType 

68 from .._storage_class import StorageClass 

69 from ..datastores.file_datastore.get import DatasetLocationInformation 

70 from ..datastores.file_datastore.retrieve_artifacts import ArtifactIndexInfo 

71 from ..registry.interfaces import DatasetIdRef, DatastoreRegistryBridgeManager 

72 from .record_data import DatastoreRecordData 

73 from .stored_file_info import StoredDatastoreItemInfo 

74 

75_LOG = logging.getLogger(__name__) 

76 

77 

78class DatastoreConfig(ConfigSubset): 

79 """Configuration for Datastores.""" 

80 

81 component = "datastore" 

82 requiredKeys = ("cls",) 

83 defaultConfigFile = "datastore.yaml" 

84 

85 

86class DatastoreValidationError(ValidationError): 

87 """There is a problem with the Datastore configuration.""" 

88 

89 pass 

90 

91 

92@dataclasses.dataclass(frozen=True) 

93class Event: 

94 """Representation of an event that can be rolled back.""" 

95 

96 __slots__ = {"name", "undoFunc", "args", "kwargs"} 

97 name: str 

98 undoFunc: Callable 

99 args: tuple 

100 kwargs: dict 

101 

102 

103@dataclasses.dataclass(frozen=True) 

104class DatastoreOpaqueTable: 

105 """Definition of the opaque table which stores datastore records. 

106 

107 Table definition contains `.ddl.TableSpec` for a table and a class 

108 of a record which must be a subclass of `StoredDatastoreItemInfo`. 

109 """ 

110 

111 __slots__ = {"table_spec", "record_class"} 

112 table_spec: ddl.TableSpec 

113 record_class: type[StoredDatastoreItemInfo] 

114 

115 

116class IngestPrepData: 

117 """A helper base class for `Datastore` ingest implementations. 

118 

119 Datastore implementations will generally need a custom implementation of 

120 this class. 

121 

122 Should be accessed as ``Datastore.IngestPrepData`` instead of via direct 

123 import. 

124 

125 Parameters 

126 ---------- 

127 refs : `~collections.abc.Iterable` of `DatasetRef` 

128 References for the datasets that can be ingested by this datastore. 

129 """ 

130 

131 def __init__(self, refs: Iterable[DatasetRef]): 

132 self.refs = {ref.id: ref for ref in refs} 

133 

134 

135class DatastoreTransaction: 

136 """Keeps a log of `Datastore` activity and allow rollback. 

137 

138 Parameters 

139 ---------- 

140 parent : `DatastoreTransaction`, optional 

141 The parent transaction (if any). 

142 

143 Notes 

144 ----- 

145 This transaction object must be thread safe. 

146 """ 

147 

148 Event: ClassVar[type] = Event 

149 

150 parent: DatastoreTransaction | None 

151 """The parent transaction. (`DatastoreTransaction`, optional)""" 

152 

153 def __init__(self, parent: DatastoreTransaction | None = None): 

154 self.parent = parent 

155 self._log: list[Event] = [] 

156 

157 def registerUndo(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> None: 

158 """Register event with undo function. 

159 

160 Parameters 

161 ---------- 

162 name : `str` 

163 Name of the event. 

164 undoFunc : `~collections.abc.Callable` 

165 Function to undo this event. 

166 *args : `tuple` 

167 Positional arguments to ``undoFunc``. 

168 **kwargs 

169 Keyword arguments to ``undoFunc``. 

170 """ 

171 self._log.append(self.Event(name, undoFunc, args, kwargs)) 

172 

173 @contextlib.contextmanager 

174 def undoWith(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> Iterator[None]: 

175 """Register undo function if nested operation succeeds. 

176 

177 Calls `registerUndo`. 

178 

179 This can be used to wrap individual undo-able statements within a 

180 DatastoreTransaction block. Multiple statements that can fail 

181 separately should not be part of the same `undoWith` block. 

182 

183 All arguments are forwarded directly to `registerUndo`. 

184 

185 Parameters 

186 ---------- 

187 name : `str` 

188 The name to associate with this event. 

189 undoFunc : `~collections.abc.Callable` 

190 Function to undo this event. 

191 *args : `tuple` 

192 Positional arguments for ``undoFunc``. 

193 **kwargs : `typing.Any` 

194 Keyword arguments for ``undoFunc``. 

195 """ 

196 try: 

197 yield None 

198 except BaseException: 

199 raise 

200 else: 

201 self.registerUndo(name, undoFunc, *args, **kwargs) 

202 

203 def rollback(self) -> None: 

204 """Roll back all events in this transaction.""" 

205 log = logging.getLogger(__name__) 

206 while self._log: 

207 ev = self._log.pop() 

208 try: 

209 log.debug( 

210 "Rolling back transaction: %s: %s(%s,%s)", 

211 ev.name, 

212 ev.undoFunc, 

213 ",".join(str(a) for a in ev.args), 

214 ",".join(f"{k}={v}" for k, v in ev.kwargs.items()), 

215 ) 

216 except Exception: 

217 # In case we had a problem in stringification of arguments 

218 log.warning("Rolling back transaction: %s", ev.name) 

219 try: 

220 ev.undoFunc(*ev.args, **ev.kwargs) 

221 except BaseException as e: 

222 # Deliberately swallow error that may occur in unrolling 

223 log.warning("Exception: %s caught while unrolling: %s", e, ev.name) 

224 pass 

225 

226 def commit(self) -> None: 

227 """Commit this transaction.""" 

228 if self.parent is None: 

229 # Just forget about the events, they have already happened. 

230 return 

231 else: 

232 # We may still want to events from this transaction as part of 

233 # the parent. 

234 self.parent._log.extend(self._log) 

235 

236 

237@dataclasses.dataclass 

238class DatasetRefURIs(abc.Sequence): 

239 """Represents the primary and component ResourcePath(s) associated with a 

240 DatasetRef. 

241 

242 This is used in places where its members used to be represented as a tuple 

243 (``primaryURI``, ``componentURIs``). To maintain backward compatibility 

244 this inherits from Sequence and so instances can be treated as a two-item 

245 tuple. 

246 

247 Parameters 

248 ---------- 

249 primaryURI : `lsst.resources.ResourcePath` or `None`, optional 

250 The URI to the primary artifact associated with this dataset. If the 

251 dataset was disassembled within the datastore this may be `None`. 

252 componentURIs : `dict` [`str`, `~lsst.resources.ResourcePath`] or `None` 

253 The URIs to any components associated with the dataset artifact 

254 indexed by component name. This can be empty if there are no 

255 components. 

256 """ 

257 

258 def __init__( 

259 self, 

260 primaryURI: ResourcePath | None = None, 

261 componentURIs: dict[str, ResourcePath] | None = None, 

262 ): 

263 self.primaryURI = primaryURI 

264 self.componentURIs = componentURIs or {} 

265 

266 def __getitem__(self, index: Any) -> Any: 

267 """Get primaryURI and componentURIs by index. 

268 

269 Provides support for tuple-like access. 

270 """ 

271 if index == 0: 

272 return self.primaryURI 

273 elif index == 1: 

274 return self.componentURIs 

275 raise IndexError("list index out of range") 

276 

277 def __len__(self) -> int: 

278 """Get the number of data members. 

279 

280 Provides support for tuple-like access. 

281 """ 

282 return 2 

283 

284 def __repr__(self) -> str: 

285 return f"DatasetRefURIs({repr(self.primaryURI)}, {repr(self.componentURIs)})" 

286 

287 def iter_all(self) -> Iterator[ResourcePath]: 

288 """Iterate over all URIs without regard to whether they are primary 

289 or component. 

290 """ 

291 if self.primaryURI is not None: 

292 yield self.primaryURI 

293 yield from self.componentURIs.values() 

294 

295 

296class Datastore(FileTransferSource, metaclass=ABCMeta): 

297 """Datastore interface. 

298 

299 Parameters 

300 ---------- 

301 config : `DatastoreConfig` or `str` 

302 Load configuration either from an existing config instance or by 

303 referring to a configuration file. 

304 bridgeManager : `DatastoreRegistryBridgeManager` 

305 Object that manages the interface between `Registry` and datastores. 

306 

307 See Also 

308 -------- 

309 lsst.daf.butler.Butler 

310 """ 

311 

312 defaultConfigFile: ClassVar[str | None] = None 

313 """Path to configuration defaults. Accessed within the ``config`` resource 

314 or relative to a search path. Can be None if no defaults specified. 

315 """ 

316 

317 containerKey: ClassVar[str | None] = None 

318 """Name of the key containing a list of subconfigurations that also 

319 need to be merged with defaults and will likely use different Python 

320 datastore classes (but all using DatastoreConfig). Assumed to be a 

321 list of configurations that can be represented in a DatastoreConfig 

322 and containing a "cls" definition. None indicates that no containers 

323 are expected in this Datastore.""" 

324 

325 isEphemeral: bool = False 

326 """Indicate whether this Datastore is ephemeral or not. An ephemeral 

327 datastore is one where the contents of the datastore will not exist 

328 across process restarts. This value can change per-instance.""" 

329 

330 config: DatastoreConfig 

331 """Configuration used to create Datastore.""" 

332 

333 name: str 

334 """Label associated with this Datastore.""" 

335 

336 storageClassFactory: StorageClassFactory 

337 """Factory for creating storage class instances from name.""" 

338 

339 constraints: Constraints 

340 """Constraints to apply when putting datasets into the datastore.""" 

341 

342 # MyPy does not like for this to be annotated as any kind of type, because 

343 # it can't do static checking on type variables that can change at runtime. 

344 IngestPrepData: ClassVar[Any] = IngestPrepData 

345 """Helper base class for ingest implementations. 

346 """ 

347 

348 @classmethod 

349 @abstractmethod 

350 def setConfigRoot(cls, root: str, config: Config, full: Config, overwrite: bool = True) -> None: 

351 """Set filesystem-dependent config options for this datastore. 

352 

353 The options will be appropriate for a new empty repository with the 

354 given root. 

355 

356 Parameters 

357 ---------- 

358 root : `str` 

359 Filesystem path to the root of the data repository. 

360 config : `Config` 

361 A `Config` to update. Only the subset understood by 

362 this component will be updated. Will not expand 

363 defaults. 

364 full : `Config` 

365 A complete config with all defaults expanded that can be 

366 converted to a `DatastoreConfig`. Read-only and will not be 

367 modified by this method. 

368 Repository-specific options that should not be obtained 

369 from defaults when Butler instances are constructed 

370 should be copied from ``full`` to ``config``. 

371 overwrite : `bool`, optional 

372 If `False`, do not modify a value in ``config`` if the value 

373 already exists. Default is always to overwrite with the provided 

374 ``root``. 

375 

376 Notes 

377 ----- 

378 If a keyword is explicitly defined in the supplied ``config`` it 

379 will not be overridden by this method if ``overwrite`` is `False`. 

380 This allows explicit values set in external configs to be retained. 

381 """ 

382 raise NotImplementedError() 

383 

384 @staticmethod 

385 def fromConfig( 

386 config: Config, 

387 bridgeManager: DatastoreRegistryBridgeManager, 

388 butlerRoot: ResourcePathExpression | None = None, 

389 ) -> Datastore: 

390 """Create datastore from type specified in config file. 

391 

392 Parameters 

393 ---------- 

394 config : `Config` or `~lsst.resources.ResourcePathExpression` 

395 Configuration instance. 

396 bridgeManager : `DatastoreRegistryBridgeManager` 

397 Object that manages the interface between `Registry` and 

398 datastores. 

399 butlerRoot : `str`, optional 

400 Butler root directory. 

401 """ 

402 config = DatastoreConfig(config) 

403 cls = doImportType(config["cls"]) 

404 if not issubclass(cls, Datastore): 

405 raise TypeError(f"Imported child class {config['cls']} is not a Datastore") 

406 return cls._create_from_config(config=config, bridgeManager=bridgeManager, butlerRoot=butlerRoot) 

407 

408 def __init__( 

409 self, 

410 config: DatastoreConfig, 

411 bridgeManager: DatastoreRegistryBridgeManager, 

412 ): 

413 self.config = config 

414 self.name = "ABCDataStore" 

415 self._transaction: DatastoreTransaction | None = None 

416 

417 # All Datastores need storage classes and constraints 

418 self.storageClassFactory = StorageClassFactory() 

419 

420 # And read the constraints list 

421 constraintsConfig = self.config.get("constraints") 

422 self.constraints = Constraints(constraintsConfig, universe=bridgeManager.universe) 

423 

424 @classmethod 

425 @abstractmethod 

426 def _create_from_config( 

427 cls, 

428 config: DatastoreConfig, 

429 bridgeManager: DatastoreRegistryBridgeManager, 

430 butlerRoot: ResourcePathExpression | None, 

431 ) -> Datastore: 

432 """`Datastore`.``fromConfig`` calls this to instantiate Datastore 

433 subclasses. This is the primary constructor for the individual 

434 Datastore subclasses. 

435 """ 

436 raise NotImplementedError() 

437 

438 @abstractmethod 

439 def clone(self, bridgeManager: DatastoreRegistryBridgeManager) -> Datastore: 

440 """Make an independent copy of this Datastore with a different 

441 `DatastoreRegistryBridgeManager` instance. 

442 

443 Parameters 

444 ---------- 

445 bridgeManager : `DatastoreRegistryBridgeManager` 

446 New `DatastoreRegistryBridgeManager` object to use when 

447 instantiating managers. 

448 

449 Returns 

450 ------- 

451 datastore : `Datastore` 

452 New `Datastore` instance with the same configuration as the 

453 existing instance. 

454 """ 

455 raise NotImplementedError() 

456 

457 def __str__(self) -> str: 

458 return self.name 

459 

460 def __repr__(self) -> str: 

461 return self.name 

462 

463 @property 

464 def names(self) -> tuple[str, ...]: 

465 """Names associated with this datastore returned as a list. 

466 

467 Can be different to ``name`` for a chaining datastore. 

468 """ 

469 # Default implementation returns solely the name itself 

470 return (self.name,) 

471 

472 @property 

473 def roots(self) -> dict[str, ResourcePath | None]: 

474 """Return the root URIs for each named datastore. 

475 

476 Mapping from datastore name to root URI. The URI can be `None` 

477 if a datastore has no concept of a root URI. 

478 (`dict` [`str`, `lsst.resources.ResourcePath` | `None`]) 

479 """ 

480 return {self.name: None} 

481 

482 @contextlib.contextmanager 

483 def transaction(self) -> Iterator[DatastoreTransaction]: 

484 """Context manager supporting `Datastore` transactions. 

485 

486 Transactions can be nested, and are to be used in combination with 

487 `Registry.transaction`. 

488 """ 

489 self._transaction = DatastoreTransaction(self._transaction) 

490 try: 

491 yield self._transaction 

492 except BaseException: 

493 self._transaction.rollback() 

494 raise 

495 else: 

496 self._transaction.commit() 

497 self._transaction = self._transaction.parent 

498 

499 def _set_trust_mode(self, mode: bool) -> None: 

500 """Set the trust mode for this datastore. 

501 

502 Parameters 

503 ---------- 

504 mode : `bool` 

505 If `True`, get requests will be attempted even if the datastore 

506 does not know about the dataset. 

507 

508 Notes 

509 ----- 

510 This is a private method to indicate that trust mode might be a 

511 transitory property that we do not want to make fully public. For now 

512 only a `~lsst.daf.butler.datastores.FileDatastore` understands this 

513 concept. By default this method does nothing. 

514 """ 

515 return 

516 

517 @abstractmethod 

518 def knows(self, ref: DatasetRef) -> bool: 

519 """Check if the dataset is known to the datastore. 

520 

521 Does not check for existence of any artifact. 

522 

523 Parameters 

524 ---------- 

525 ref : `DatasetRef` 

526 Reference to the required dataset. 

527 

528 Returns 

529 ------- 

530 exists : `bool` 

531 `True` if the dataset is known to the datastore. 

532 """ 

533 raise NotImplementedError() 

534 

535 def knows_these(self, refs: Iterable[DatasetRef]) -> dict[DatasetRef, bool]: 

536 """Check which of the given datasets are known to this datastore. 

537 

538 This is like ``mexist()`` but does not check that the file exists. 

539 

540 Parameters 

541 ---------- 

542 refs : `~collections.abc.Iterable` [ `DatasetRef` ] 

543 The datasets to check. 

544 

545 Returns 

546 ------- 

547 exists : `dict` [`DatasetRef`, `bool`] 

548 Mapping of dataset to boolean indicating whether the dataset 

549 is known to the datastore. 

550 """ 

551 # Non-optimized default calls knows() repeatedly. 

552 return {ref: self.knows(ref) for ref in refs} 

553 

554 def mexists( 

555 self, refs: Iterable[DatasetRef], artifact_existence: dict[ResourcePath, bool] | None = None 

556 ) -> dict[DatasetRef, bool]: 

557 """Check the existence of multiple datasets at once. 

558 

559 Parameters 

560 ---------- 

561 refs : `~collections.abc.Iterable` of `DatasetRef` 

562 The datasets to be checked. 

563 artifact_existence : `dict` [`lsst.resources.ResourcePath`, `bool`] 

564 Optional mapping of datastore artifact to existence. Updated by 

565 this method with details of all artifacts tested. Can be `None` 

566 if the caller is not interested. 

567 

568 Returns 

569 ------- 

570 existence : `dict` of [`DatasetRef`, `bool`] 

571 Mapping from dataset to boolean indicating existence. 

572 """ 

573 existence: dict[DatasetRef, bool] = {} 

574 # Non-optimized default. 

575 for ref in refs: 

576 existence[ref] = self.exists(ref) 

577 return existence 

578 

579 @abstractmethod 

580 def exists(self, datasetRef: DatasetRef) -> bool: 

581 """Check if the dataset exists in the datastore. 

582 

583 Parameters 

584 ---------- 

585 datasetRef : `DatasetRef` 

586 Reference to the required dataset. 

587 

588 Returns 

589 ------- 

590 exists : `bool` 

591 `True` if the entity exists in the `Datastore`. 

592 """ 

593 raise NotImplementedError("Must be implemented by subclass") 

594 

595 @abstractmethod 

596 def get( 

597 self, 

598 datasetRef: DatasetRef, 

599 parameters: Mapping[str, Any] | None = None, 

600 storageClass: StorageClass | str | None = None, 

601 ) -> Any: 

602 """Load an in-memory dataset from the store. 

603 

604 Parameters 

605 ---------- 

606 datasetRef : `DatasetRef` 

607 Reference to the required Dataset. 

608 parameters : `dict` 

609 `StorageClass`-specific parameters that specify a slice of the 

610 Dataset to be loaded. 

611 storageClass : `StorageClass` or `str`, optional 

612 The storage class to be used to override the Python type 

613 returned by this method. By default the returned type matches 

614 the dataset type definition for this dataset. Specifying a 

615 read `StorageClass` can force a different type to be returned. 

616 This type must be compatible with the original type. 

617 

618 Returns 

619 ------- 

620 inMemoryDataset : `object` 

621 Requested Dataset or slice thereof as an in-memory dataset. 

622 """ 

623 raise NotImplementedError("Must be implemented by subclass") 

624 

625 def prepare_get_for_external_client(self, ref: DatasetRef) -> list[DatasetLocationInformation] | None: 

626 """Retrieve data that can be used to execute a ``get()``. 

627 

628 Parameters 

629 ---------- 

630 ref : `DatasetRef` 

631 Reference to the required dataset. 

632 

633 Returns 

634 ------- 

635 payload : `list` [ `DatasetLocationInformation` ] | `None` 

636 Information needed to perform a get() operation. Returns `None` if 

637 the dataset is not known to this datastore. 

638 """ 

639 raise NotImplementedError() 

640 

641 @abstractmethod 

642 def put( 

643 self, inMemoryDataset: Any, datasetRef: DatasetRef, provenance: DatasetProvenance | None = None 

644 ) -> None: 

645 """Write an in-memory dataset with a given `DatasetRef` to the store. 

646 

647 Parameters 

648 ---------- 

649 inMemoryDataset : `object` 

650 The Dataset to store. 

651 datasetRef : `DatasetRef` 

652 Reference to the associated Dataset. 

653 provenance : `DatasetProvenance` or `None`, optional 

654 Any provenance that should be attached to the serialized dataset. 

655 Not supported by all serialization mechanisms. 

656 """ 

657 raise NotImplementedError("Must be implemented by subclass") 

658 

659 @abstractmethod 

660 def put_new(self, in_memory_dataset: Any, ref: DatasetRef) -> Mapping[str, DatasetRef]: 

661 """Write an in-memory dataset with a given `DatasetRef` to the store. 

662 

663 Parameters 

664 ---------- 

665 in_memory_dataset : `object` 

666 The Dataset to store. 

667 ref : `DatasetRef` 

668 Reference to the associated Dataset. 

669 

670 Returns 

671 ------- 

672 datastore_refs : `~collections.abc.Mapping` [`str`, `DatasetRef`] 

673 Mapping of a datastore name to dataset reference stored in that 

674 datastore, reference will include datastore records. Only 

675 non-ephemeral datastores will appear in this mapping. 

676 """ 

677 raise NotImplementedError("Must be implemented by subclass") 

678 

679 def _overrideTransferMode(self, *datasets: FileDataset, transfer: str | None = None) -> str | None: 

680 """Allow ingest transfer mode to be defaulted based on datasets. 

681 

682 Parameters 

683 ---------- 

684 *datasets : `FileDataset` 

685 Each positional argument is a struct containing information about 

686 a file to be ingested, including its path (either absolute or 

687 relative to the datastore root, if applicable), a complete 

688 `DatasetRef` (with ``dataset_id not None``), and optionally a 

689 formatter class or its fully-qualified string name. If a formatter 

690 is not provided, this method should populate that attribute with 

691 the formatter the datastore would use for `put`. Subclasses are 

692 also permitted to modify the path attribute (typically to put it 

693 in what the datastore considers its standard form). 

694 transfer : `str`, optional 

695 How (and whether) the dataset should be added to the datastore. 

696 See `ingest` for details of transfer modes. 

697 

698 Returns 

699 ------- 

700 newTransfer : `str` 

701 Transfer mode to use. Will be identical to the supplied transfer 

702 mode unless "auto" is used. 

703 """ 

704 if transfer != "auto": 

705 return transfer 

706 raise RuntimeError(f"{transfer} is not allowed without specialization.") 

707 

708 def _prepIngest(self, *datasets: FileDataset, transfer: str | None = None) -> IngestPrepData: 

709 """Process datasets to identify which ones can be ingested. 

710 

711 Parameters 

712 ---------- 

713 *datasets : `FileDataset` 

714 Each positional argument is a struct containing information about 

715 a file to be ingested, including its path (either absolute or 

716 relative to the datastore root, if applicable), a complete 

717 `DatasetRef` (with ``dataset_id not None``), and optionally a 

718 formatter class or its fully-qualified string name. If a formatter 

719 is not provided, this method should populate that attribute with 

720 the formatter the datastore would use for `put`. Subclasses are 

721 also permitted to modify the path attribute (typically to put it 

722 in what the datastore considers its standard form). 

723 transfer : `str`, optional 

724 How (and whether) the dataset should be added to the datastore. 

725 See `ingest` for details of transfer modes. 

726 

727 Returns 

728 ------- 

729 data : `IngestPrepData` 

730 An instance of a subclass of `IngestPrepData`, used to pass 

731 arbitrary data from `_prepIngest` to `_finishIngest`. This should 

732 include only the datasets this datastore can actually ingest; 

733 others should be silently ignored (`Datastore.ingest` will inspect 

734 `IngestPrepData.refs` and raise `DatasetTypeNotSupportedError` if 

735 necessary). 

736 

737 Raises 

738 ------ 

739 NotImplementedError 

740 Raised if the datastore does not support the given transfer mode 

741 (including the case where ingest is not supported at all). 

742 FileNotFoundError 

743 Raised if one of the given files does not exist. 

744 FileExistsError 

745 Raised if transfer is not `None` but the (internal) location the 

746 file would be moved to is already occupied. 

747 

748 Notes 

749 ----- 

750 This method (along with `_finishIngest`) should be implemented by 

751 subclasses to provide ingest support instead of implementing `ingest` 

752 directly. 

753 

754 `_prepIngest` should not modify the data repository or given files in 

755 any way; all changes should be deferred to `_finishIngest`. 

756 

757 When possible, exceptions should be raised in `_prepIngest` instead of 

758 `_finishIngest`. `NotImplementedError` exceptions that indicate that 

759 the transfer mode is not supported must be raised by `_prepIngest` 

760 instead of `_finishIngest`. 

761 """ 

762 raise NotImplementedError(f"Datastore {self} does not support direct file-based ingest.") 

763 

764 def _finishIngest( 

765 self, prepData: IngestPrepData, *, transfer: str | None = None, record_validation_info: bool = True 

766 ) -> None: 

767 """Complete an ingest operation. 

768 

769 Parameters 

770 ---------- 

771 prepData : `IngestPrepData` 

772 An instance of a subclass of `IngestPrepData`. Guaranteed to be 

773 the direct result of a call to `_prepIngest` on this datastore. 

774 transfer : `str`, optional 

775 How (and whether) the dataset should be added to the datastore. 

776 See `ingest` for details of transfer modes. 

777 record_validation_info : `bool`, optional 

778 If `True`, the default, the datastore can record validation 

779 information associated with the file. If `False` the datastore 

780 will not attempt to track any information such as checksums 

781 or file sizes. This can be useful if such information is tracked 

782 in an external system or if the file is to be compressed in place. 

783 It is up to the datastore whether this parameter is relevant. 

784 

785 Raises 

786 ------ 

787 FileNotFoundError 

788 Raised if one of the given files does not exist. 

789 FileExistsError 

790 Raised if transfer is not `None` but the (internal) location the 

791 file would be moved to is already occupied. 

792 

793 Notes 

794 ----- 

795 This method (along with `_prepIngest`) should be implemented by 

796 subclasses to provide ingest support instead of implementing `ingest` 

797 directly. 

798 """ 

799 raise NotImplementedError(f"Datastore {self} does not support direct file-based ingest.") 

800 

801 def ingest( 

802 self, *datasets: FileDataset, transfer: str | None = None, record_validation_info: bool = True 

803 ) -> None: 

804 """Ingest one or more files into the datastore. 

805 

806 Parameters 

807 ---------- 

808 *datasets : `FileDataset` 

809 Each positional argument is a struct containing information about 

810 a file to be ingested, including its path (either absolute or 

811 relative to the datastore root, if applicable), a complete 

812 `DatasetRef` (with ``dataset_id not None``), and optionally a 

813 formatter class or its fully-qualified string name. If a formatter 

814 is not provided, the one the datastore would use for ``put`` on 

815 that dataset is assumed. 

816 transfer : `str`, optional 

817 How (and whether) the dataset should be added to the datastore. 

818 If `None` (default), the file must already be in a location 

819 appropriate for the datastore (e.g. within its root directory), 

820 and will not be modified. Other choices include "move", "copy", 

821 "link", "symlink", "relsymlink", and "hardlink". "link" is a 

822 special transfer mode that will first try to make a hardlink and 

823 if that fails a symlink will be used instead. "relsymlink" creates 

824 a relative symlink rather than use an absolute path. 

825 Most datastores do not support all transfer modes. 

826 "auto" is a special option that will let the 

827 data store choose the most natural option for itself. 

828 record_validation_info : `bool`, optional 

829 If `True`, the default, the datastore can record validation 

830 information associated with the file. If `False` the datastore 

831 will not attempt to track any information such as checksums 

832 or file sizes. This can be useful if such information is tracked 

833 in an external system or if the file is to be compressed in place. 

834 It is up to the datastore whether this parameter is relevant. 

835 

836 Returns 

837 ------- 

838 None 

839 

840 Raises 

841 ------ 

842 NotImplementedError 

843 Raised if the datastore does not support the given transfer mode 

844 (including the case where ingest is not supported at all). 

845 DatasetTypeNotSupportedError 

846 Raised if one or more files to be ingested have a dataset type that 

847 is not supported by the datastore. 

848 FileNotFoundError 

849 Raised if one of the given files does not exist. 

850 FileExistsError 

851 Raised if transfer is not `None` but the (internal) location the 

852 file would be moved to is already occupied. 

853 

854 Notes 

855 ----- 

856 Subclasses should implement `_prepIngest` and `_finishIngest` instead 

857 of implementing `ingest` directly. Datastores that hold and 

858 delegate to child datastores may want to call those methods as well. 

859 

860 Subclasses are encouraged to document their supported transfer modes 

861 in their class documentation. 

862 """ 

863 # Allow a datastore to select a default transfer mode 

864 transfer = self._overrideTransferMode(*datasets, transfer=transfer) 

865 prepData = self._prepIngest(*datasets, transfer=transfer) 

866 refs = {ref.id: ref for dataset in datasets for ref in dataset.refs} 

867 if refs.keys() != prepData.refs.keys(): 

868 unsupported = refs.keys() - prepData.refs.keys() 

869 # Group unsupported refs by DatasetType for an informative 

870 # but still concise error message. 

871 byDatasetType = defaultdict(list) 

872 for datasetId in unsupported: 

873 ref = refs[datasetId] 

874 byDatasetType[ref.datasetType].append(ref) 

875 raise DatasetTypeNotSupportedError( 

876 "DatasetType(s) not supported in ingest: " 

877 + ", ".join(f"{k.name} ({len(v)} dataset(s))" for k, v in byDatasetType.items()) 

878 ) 

879 self._finishIngest(prepData, transfer=transfer, record_validation_info=record_validation_info) 

880 

881 def transfer_from( 

882 self, 

883 source_records: FileTransferMap, 

884 refs: Collection[DatasetRef], 

885 transfer: str = "auto", 

886 artifact_existence: dict[ResourcePath, bool] | None = None, 

887 dry_run: bool = False, 

888 ) -> tuple[set[DatasetRef], set[DatasetRef]]: 

889 """Transfer dataset artifacts from another datastore to this one. 

890 

891 Parameters 

892 ---------- 

893 source_records : `FileTransferMap` 

894 The artifacts to be transferred into this datastore. 

895 refs : `~collections.abc.Collection` of `DatasetRef` 

896 The datasets to transfer from the source datastore. 

897 transfer : `str`, optional 

898 How (and whether) the dataset should be added to the datastore. 

899 Choices include "move", "copy", 

900 "link", "symlink", "relsymlink", and "hardlink". "link" is a 

901 special transfer mode that will first try to make a hardlink and 

902 if that fails a symlink will be used instead. "relsymlink" creates 

903 a relative symlink rather than use an absolute path. 

904 Most datastores do not support all transfer modes. 

905 "auto" (the default) is a special option that will let the 

906 data store choose the most natural option for itself. 

907 If the source location and transfer location are identical the 

908 transfer mode will be ignored. 

909 artifact_existence : `dict` [`lsst.resources.ResourcePath`, `bool`] 

910 Optional mapping of datastore artifact to existence. Updated by 

911 this method with details of all artifacts tested. Can be `None` 

912 if the caller is not interested. 

913 dry_run : `bool`, optional 

914 Process the supplied source refs without updating the target 

915 datastore. 

916 

917 Returns 

918 ------- 

919 accepted : `set` [`DatasetRef`] 

920 The datasets that were transferred. 

921 rejected : `set` [`DatasetRef`] 

922 The datasets that were rejected due to a constraints violation. 

923 

924 Raises 

925 ------ 

926 TypeError 

927 Raised if the two datastores are not compatible. 

928 """ 

929 raise NotImplementedError(f"Datastore {type(self)} does not implement a transfer_from method.") 

930 

931 def getManyURIs( 

932 self, 

933 refs: Iterable[DatasetRef], 

934 predict: bool = False, 

935 allow_missing: bool = False, 

936 ) -> dict[DatasetRef, DatasetRefURIs]: 

937 """Return URIs associated with many datasets. 

938 

939 Parameters 

940 ---------- 

941 refs : `~collections.abc.Iterable` of `DatasetIdRef` 

942 References to the required datasets. 

943 predict : `bool`, optional 

944 If `True`, allow URIs to be returned of datasets that have not 

945 been written. 

946 allow_missing : `bool` 

947 If `False`, and ``predict`` is `False`, will raise if a 

948 `DatasetRef` does not exist. 

949 

950 Returns 

951 ------- 

952 URIs : `dict` of [`DatasetRef`, `DatasetRefUris`] 

953 A dict of primary and component URIs, indexed by the passed-in 

954 refs. 

955 

956 Raises 

957 ------ 

958 FileNotFoundError 

959 A URI has been requested for a dataset that does not exist and 

960 guessing is not allowed. 

961 

962 Notes 

963 ----- 

964 In file-based datastores, getManyURIs does not check that the file is 

965 really there, it's assuming it is if datastore is aware of the file 

966 then it actually exists. 

967 """ 

968 uris: dict[DatasetRef, DatasetRefURIs] = {} 

969 missing_refs = [] 

970 for ref in refs: 

971 try: 

972 uris[ref] = self.getURIs(ref, predict=predict) 

973 except FileNotFoundError: 

974 missing_refs.append(ref) 

975 if missing_refs and not allow_missing: 

976 num_missing = len(missing_refs) 

977 raise FileNotFoundError( 

978 f"Missing {num_missing} refs from datastore out of " 

979 f"{num_missing + len(uris)} and predict=False." 

980 ) 

981 return uris 

982 

983 @abstractmethod 

984 def getURIs(self, datasetRef: DatasetRef, predict: bool = False) -> DatasetRefURIs: 

985 """Return URIs associated with dataset. 

986 

987 Parameters 

988 ---------- 

989 datasetRef : `DatasetRef` 

990 Reference to the required dataset. 

991 predict : `bool`, optional 

992 If the datastore does not know about the dataset, controls whether 

993 it should return a predicted URI or not. 

994 

995 Returns 

996 ------- 

997 uris : `DatasetRefURIs` 

998 The URI to the primary artifact associated with this dataset (if 

999 the dataset was disassembled within the datastore this may be 

1000 `None`), and the URIs to any components associated with the dataset 

1001 artifact. (can be empty if there are no components). 

1002 """ 

1003 raise NotImplementedError() 

1004 

1005 @abstractmethod 

1006 def getURI(self, datasetRef: DatasetRef, predict: bool = False) -> ResourcePath: 

1007 """URI to the Dataset. 

1008 

1009 Parameters 

1010 ---------- 

1011 datasetRef : `DatasetRef` 

1012 Reference to the required Dataset. 

1013 predict : `bool` 

1014 If `True` attempt to predict the URI for a dataset if it does 

1015 not exist in datastore. 

1016 

1017 Returns 

1018 ------- 

1019 uri : `str` 

1020 URI string pointing to the Dataset within the datastore. If the 

1021 Dataset does not exist in the datastore, the URI may be a guess. 

1022 If the datastore does not have entities that relate well 

1023 to the concept of a URI the returned URI string will be 

1024 descriptive. The returned URI is not guaranteed to be obtainable. 

1025 

1026 Raises 

1027 ------ 

1028 FileNotFoundError 

1029 A URI has been requested for a dataset that does not exist and 

1030 guessing is not allowed. 

1031 """ 

1032 raise NotImplementedError("Must be implemented by subclass") 

1033 

1034 @abstractmethod 

1035 def retrieveArtifacts( 

1036 self, 

1037 refs: Iterable[DatasetRef], 

1038 destination: ResourcePath, 

1039 transfer: str = "auto", 

1040 preserve_path: bool = True, 

1041 overwrite: bool = False, 

1042 write_index: bool = True, 

1043 add_prefix: bool = False, 

1044 ) -> dict[ResourcePath, ArtifactIndexInfo]: 

1045 """Retrieve the artifacts associated with the supplied refs. 

1046 

1047 Parameters 

1048 ---------- 

1049 refs : `~collections.abc.Iterable` of `DatasetRef` 

1050 The datasets for which artifacts are to be retrieved. 

1051 A single ref can result in multiple artifacts. The refs must 

1052 be resolved. 

1053 destination : `lsst.resources.ResourcePath` 

1054 Location to write the artifacts. 

1055 transfer : `str`, optional 

1056 Method to use to transfer the artifacts. Must be one of the options 

1057 supported by `lsst.resources.ResourcePath.transfer_from`. 

1058 "move" is not allowed. 

1059 preserve_path : `bool`, optional 

1060 If `True` the full path of the artifact within the datastore 

1061 is preserved. If `False` the final file component of the path 

1062 is used. 

1063 overwrite : `bool`, optional 

1064 If `True` allow transfers to overwrite existing files at the 

1065 destination. 

1066 write_index : `bool`, optional 

1067 If `True` write a file at the top level containing a serialization 

1068 of a `ZipIndex` for the downloaded datasets. 

1069 add_prefix : `bool`, optional 

1070 If `True` and if ``preserve_path`` is `False`, apply a prefix to 

1071 the filenames corresponding to some part of the dataset ref ID. 

1072 This can be used to guarantee uniqueness. 

1073 

1074 Returns 

1075 ------- 

1076 artifact_map : `dict` [ `lsst.resources.ResourcePath`, \ 

1077 `ArtifactIndexInfo` ] 

1078 Mapping of retrieved file to associated index information. 

1079 

1080 Notes 

1081 ----- 

1082 For non-file datastores the artifacts written to the destination 

1083 may not match the representation inside the datastore. For example 

1084 a hierarchical data structure in a NoSQL database may well be stored 

1085 as a JSON file. 

1086 """ 

1087 raise NotImplementedError() 

1088 

1089 @abstractmethod 

1090 def ingest_zip(self, zip_path: ResourcePath, transfer: str | None, *, dry_run: bool = False) -> None: 

1091 """Ingest an indexed Zip file and contents. 

1092 

1093 The Zip file must have an index file as created by `retrieveArtifacts`. 

1094 

1095 Parameters 

1096 ---------- 

1097 zip_path : `lsst.resources.ResourcePath` 

1098 Path to the Zip file. 

1099 transfer : `str` 

1100 Method to use for transferring the Zip file into the datastore. 

1101 dry_run : `bool`, optional 

1102 If `True` the ingest will be processed without any modifications 

1103 made to the target datastore and as if the target datastore did not 

1104 have any of the datasets. 

1105 """ 

1106 raise NotImplementedError() 

1107 

1108 @abstractmethod 

1109 def remove(self, datasetRef: DatasetRef) -> None: 

1110 """Indicate to the Datastore that a Dataset can be removed. 

1111 

1112 Parameters 

1113 ---------- 

1114 datasetRef : `DatasetRef` 

1115 Reference to the required Dataset. 

1116 

1117 Raises 

1118 ------ 

1119 FileNotFoundError 

1120 When Dataset does not exist. 

1121 

1122 Notes 

1123 ----- 

1124 Some Datastores may implement this method as a silent no-op to 

1125 disable Dataset deletion through standard interfaces. 

1126 """ 

1127 raise NotImplementedError("Must be implemented by subclass") 

1128 

1129 @abstractmethod 

1130 def forget(self, refs: Iterable[DatasetRef]) -> None: 

1131 """Indicate to the Datastore that it should remove all records of the 

1132 given datasets, without actually deleting them. 

1133 

1134 Parameters 

1135 ---------- 

1136 refs : `~collections.abc.Iterable` [ `DatasetRef` ] 

1137 References to the datasets being forgotten. 

1138 

1139 Notes 

1140 ----- 

1141 Asking a datastore to forget a `DatasetRef` it does not hold should be 

1142 a silent no-op, not an error. 

1143 """ 

1144 raise NotImplementedError("Must be implemented by subclass") 

1145 

1146 @abstractmethod 

1147 def trash(self, ref: DatasetRef | Iterable[DatasetRef], ignore_errors: bool = True) -> None: 

1148 """Indicate to the Datastore that a Dataset can be moved to the trash. 

1149 

1150 Parameters 

1151 ---------- 

1152 ref : `DatasetRef` or iterable thereof 

1153 Reference(s) to the required Dataset. 

1154 ignore_errors : `bool`, optional 

1155 Determine whether errors should be ignored. When multiple 

1156 refs are being trashed there will be no per-ref check. 

1157 

1158 Returns 

1159 ------- 

1160 None 

1161 

1162 Raises 

1163 ------ 

1164 FileNotFoundError 

1165 When Dataset does not exist and errors are not ignored. Only 

1166 checked if a single ref is supplied (and not in a list). 

1167 

1168 Notes 

1169 ----- 

1170 Some Datastores may implement this method as a silent no-op to 

1171 disable Dataset deletion through standard interfaces. 

1172 """ 

1173 raise NotImplementedError("Must be implemented by subclass") 

1174 

1175 @abstractmethod 

1176 def emptyTrash( 

1177 self, ignore_errors: bool = True, refs: Collection[DatasetRef] | None = None, dry_run: bool = False 

1178 ) -> set[ResourcePath]: 

1179 """Remove all datasets from the trash. 

1180 

1181 Parameters 

1182 ---------- 

1183 ignore_errors : `bool`, optional 

1184 Determine whether errors should be ignored. 

1185 refs : `collections.abc.Collection` [ `DatasetRef` ] or `None` 

1186 Explicit list of datasets that can be removed from trash. If listed 

1187 datasets are not already stored in the trash table they will be 

1188 ignored. If `None` every entry in the trash table will be 

1189 processed. 

1190 dry_run : `bool`, optional 

1191 If `True`, the trash table will be queried and results reported 

1192 but no artifacts will be removed. 

1193 

1194 Returns 

1195 ------- 

1196 removed : `set` [ `lsst.resources.ResourcePath` ] 

1197 List of artifacts that were removed. Can return nothing if 

1198 artifacts cannot be represented by URIs. 

1199 

1200 Notes 

1201 ----- 

1202 Some Datastores may implement this method as a silent no-op to 

1203 disable Dataset deletion through standard interfaces. 

1204 """ 

1205 raise NotImplementedError("Must be implemented by subclass") 

1206 

1207 @abstractmethod 

1208 def transfer(self, inputDatastore: Datastore, datasetRef: DatasetRef) -> None: 

1209 """Transfer a dataset from another datastore to this datastore. 

1210 

1211 Parameters 

1212 ---------- 

1213 inputDatastore : `Datastore` 

1214 The external `Datastore` from which to retrieve the Dataset. 

1215 datasetRef : `DatasetRef` 

1216 Reference to the required Dataset. 

1217 """ 

1218 raise NotImplementedError("Must be implemented by subclass") 

1219 

1220 def export( 

1221 self, 

1222 refs: Iterable[DatasetRef], 

1223 *, 

1224 directory: ResourcePathExpression | None = None, 

1225 transfer: str | None = "auto", 

1226 ) -> Iterable[FileDataset]: 

1227 """Export datasets for transfer to another data repository. 

1228 

1229 Parameters 

1230 ---------- 

1231 refs : `~collections.abc.Iterable` of `DatasetRef` 

1232 Dataset references to be exported. 

1233 directory : `str`, optional 

1234 Path to a directory that should contain files corresponding to 

1235 output datasets. Ignored if ``transfer`` is explicitly `None`. 

1236 transfer : `str`, optional 

1237 Mode that should be used to move datasets out of the repository. 

1238 Valid options are the same as those of the ``transfer`` argument 

1239 to ``ingest``, and datastores may similarly signal that a transfer 

1240 mode is not supported by raising `NotImplementedError`. If "auto" 

1241 is given and no ``directory`` is specified, `None` will be 

1242 implied. 

1243 

1244 Returns 

1245 ------- 

1246 dataset : `~collections.abc.Iterable` of `DatasetTransfer` 

1247 Structs containing information about the exported datasets, in the 

1248 same order as ``refs``. 

1249 

1250 Raises 

1251 ------ 

1252 NotImplementedError 

1253 Raised if the given transfer mode is not supported. 

1254 """ 

1255 raise NotImplementedError(f"Transfer mode {transfer} not supported.") 

1256 

1257 @abstractmethod 

1258 def validateConfiguration( 

1259 self, entities: Iterable[DatasetRef | DatasetType | StorageClass], logFailures: bool = False 

1260 ) -> None: 

1261 """Validate some of the configuration for this datastore. 

1262 

1263 Parameters 

1264 ---------- 

1265 entities : `~collections.abc.Iterable` [`DatasetRef` | `DatasetType` |\ 

1266 `StorageClass`] 

1267 Entities to test against this configuration. Can be differing 

1268 types. 

1269 logFailures : `bool`, optional 

1270 If `True`, output a log message for every validation error 

1271 detected. 

1272 

1273 Raises 

1274 ------ 

1275 DatastoreValidationError 

1276 Raised if there is a validation problem with a configuration. 

1277 

1278 Notes 

1279 ----- 

1280 Which parts of the configuration are validated is at the discretion 

1281 of each Datastore implementation. 

1282 """ 

1283 raise NotImplementedError("Must be implemented by subclass") 

1284 

1285 @abstractmethod 

1286 def validateKey(self, lookupKey: LookupKey, entity: DatasetRef | DatasetType | StorageClass) -> None: 

1287 """Validate a specific look up key with supplied entity. 

1288 

1289 Parameters 

1290 ---------- 

1291 lookupKey : `LookupKey` 

1292 Key to use to retrieve information from the datastore 

1293 configuration. 

1294 entity : `DatasetRef`, `DatasetType`, or `StorageClass` 

1295 Entity to compare with configuration retrieved using the 

1296 specified lookup key. 

1297 

1298 Returns 

1299 ------- 

1300 None 

1301 

1302 Raises 

1303 ------ 

1304 DatastoreValidationError 

1305 Raised if there is a problem with the combination of entity 

1306 and lookup key. 

1307 

1308 Notes 

1309 ----- 

1310 Bypasses the normal selection priorities by allowing a key that 

1311 would normally not be selected to be validated. 

1312 """ 

1313 raise NotImplementedError("Must be implemented by subclass") 

1314 

1315 @abstractmethod 

1316 def getLookupKeys(self) -> set[LookupKey]: 

1317 """Return all the lookup keys relevant to this datastore. 

1318 

1319 Returns 

1320 ------- 

1321 keys : `set` of `LookupKey` 

1322 The keys stored internally for looking up information based 

1323 on `DatasetType` name or `StorageClass`. 

1324 """ 

1325 raise NotImplementedError("Must be implemented by subclass") 

1326 

1327 def needs_expanded_data_ids( 

1328 self, 

1329 transfer: str | None, 

1330 entity: DatasetRef | DatasetType | StorageClass | None = None, 

1331 ) -> bool: 

1332 """Test whether this datastore needs expanded data IDs to ingest. 

1333 

1334 Parameters 

1335 ---------- 

1336 transfer : `str` or `None` 

1337 Transfer mode for ingest. 

1338 entity : `DatasetRef` or `DatasetType` or `StorageClass` or `None`, \ 

1339 optional 

1340 Object representing what will be ingested. If not provided (or not 

1341 specific enough), `True` may be returned even if expanded data 

1342 IDs aren't necessary. 

1343 

1344 Returns 

1345 ------- 

1346 needed : `bool` 

1347 If `True`, expanded data IDs may be needed. `False` only if 

1348 expansion definitely isn't necessary. 

1349 """ 

1350 return True 

1351 

1352 @abstractmethod 

1353 def import_records( 

1354 self, 

1355 data: Mapping[str, DatastoreRecordData], 

1356 ) -> None: 

1357 """Import datastore location and record data from an in-memory data 

1358 structure. 

1359 

1360 Parameters 

1361 ---------- 

1362 data : `~collections.abc.Mapping` [ `str`, `DatastoreRecordData` ] 

1363 Datastore records indexed by datastore name. May contain data for 

1364 other `Datastore` instances (generally because they are chained to 

1365 this one), which should be ignored. 

1366 

1367 Notes 

1368 ----- 

1369 Implementations should generally not check that any external resources 

1370 (e.g. files) referred to by these records actually exist, for 

1371 performance reasons; we expect higher-level code to guarantee that they 

1372 do. 

1373 

1374 Implementations are responsible for calling 

1375 `DatastoreRegistryBridge.insert` on all datasets in ``data.locations`` 

1376 where the key is in `names`, as well as loading any opaque table data. 

1377 

1378 Implementations may assume that datasets are either fully present or 

1379 not at all (single-component exports are not permitted). 

1380 """ 

1381 raise NotImplementedError() 

1382 

1383 @abstractmethod 

1384 def export_records( 

1385 self, 

1386 refs: Iterable[DatasetIdRef], 

1387 ) -> Mapping[str, DatastoreRecordData]: 

1388 """Export datastore records and locations to an in-memory data 

1389 structure. 

1390 

1391 Parameters 

1392 ---------- 

1393 refs : `~collections.abc.Iterable` [ `DatasetIdRef` ] 

1394 Datasets to save. This may include datasets not known to this 

1395 datastore, which should be ignored. May not include component 

1396 datasets. 

1397 

1398 Returns 

1399 ------- 

1400 data : `~collections.abc.Mapping` [ `str`, `DatastoreRecordData` ] 

1401 Exported datastore records indexed by datastore name. 

1402 """ 

1403 raise NotImplementedError() 

1404 

1405 def export_predicted_records(self, refs: Iterable[DatasetRef]) -> dict[str, DatastoreRecordData]: 

1406 """Export predicted datastore records and locations to an in-memory 

1407 data structure. 

1408 

1409 Parameters 

1410 ---------- 

1411 refs : `~collections.abc.Iterable` [ `DatasetRef` ] 

1412 Datastore records that would be used if the given refs were to 

1413 exist in this datastore. No attempt is made to determine if these 

1414 datasets actually exist. 

1415 

1416 Returns 

1417 ------- 

1418 data : `~collections.abc.Mapping` [ `str`, `DatastoreRecordData` ] 

1419 Exported datastore records indexed by datastore name. 

1420 """ 

1421 raise NotImplementedError() 

1422 

1423 def set_retrieve_dataset_type_method(self, method: Callable[[str], DatasetType | None] | None) -> None: 

1424 """Specify a method that can be used by datastore to retrieve 

1425 registry-defined dataset type. 

1426 

1427 Parameters 

1428 ---------- 

1429 method : `~collections.abc.Callable` | `None` 

1430 Method that takes a name of the dataset type and returns a 

1431 corresponding `DatasetType` instance as defined in Registry. If 

1432 dataset type name is not known to registry `None` is returned. 

1433 

1434 Notes 

1435 ----- 

1436 This method is only needed for a Datastore supporting a "trusted" mode 

1437 when it does not have an access to datastore records and needs to 

1438 guess dataset location based on its stored dataset type. 

1439 """ 

1440 pass 

1441 

1442 @abstractmethod 

1443 def get_opaque_table_definitions(self) -> Mapping[str, DatastoreOpaqueTable]: 

1444 """Make definitions of the opaque tables used by this Datastore. 

1445 

1446 Returns 

1447 ------- 

1448 tables : `~collections.abc.Mapping` [ `str`, `.ddl.TableSpec` ] 

1449 Mapping of opaque table names to their definitions. This can be an 

1450 empty mapping if Datastore does not use opaque tables to keep 

1451 datastore records. 

1452 """ 

1453 raise NotImplementedError() 

1454 

1455 def get_file_info_for_transfer(self, dataset_ids: Iterable[DatasetId]) -> FileTransferMap: 

1456 raise NotImplementedError(f"Transferring files is not supported by datastore {self}") 

1457 

1458 def locate_missing_files_for_transfer( 

1459 self, refs: Iterable[DatasetRef], artifact_existence: dict[ResourcePath, bool] 

1460 ) -> FileTransferMap: 

1461 return {} 

1462 

1463 

1464class NullDatastore(Datastore): 

1465 """A datastore that implements the `Datastore` API but always fails when 

1466 it accepts any request. 

1467 

1468 Parameters 

1469 ---------- 

1470 config : `Config` or `~lsst.resources.ResourcePathExpression` or `None` 

1471 Ignored. 

1472 bridgeManager : `DatastoreRegistryBridgeManager` or `None` 

1473 Ignored. 

1474 butlerRoot : `~lsst.resources.ResourcePathExpression` or `None` 

1475 Ignored. 

1476 """ 

1477 

1478 @classmethod 

1479 def _create_from_config( 

1480 cls, 

1481 config: Config, 

1482 bridgeManager: DatastoreRegistryBridgeManager, 

1483 butlerRoot: ResourcePathExpression | None = None, 

1484 ) -> NullDatastore: 

1485 return NullDatastore(config, bridgeManager, butlerRoot) 

1486 

1487 def clone(self, bridgeManager: DatastoreRegistryBridgeManager) -> Datastore: 

1488 return self 

1489 

1490 @classmethod 

1491 def setConfigRoot(cls, root: str, config: Config, full: Config, overwrite: bool = True) -> None: 

1492 # Nothing to do. This is not a real Datastore. 

1493 pass 

1494 

1495 def __init__( 

1496 self, 

1497 config: Config | ResourcePathExpression | None, 

1498 bridgeManager: DatastoreRegistryBridgeManager | None, 

1499 butlerRoot: ResourcePathExpression | None = None, 

1500 ): 

1501 # Name ourselves with the timestamp the datastore 

1502 # was created. 

1503 self.name = f"{type(self).__name__}@{time.time()}" 

1504 _LOG.debug("Creating datastore %s", self.name) 

1505 self._transaction: DatastoreTransaction | None = None 

1506 return 

1507 

1508 def knows(self, ref: DatasetRef) -> bool: 

1509 return False 

1510 

1511 def exists(self, datasetRef: DatasetRef) -> bool: 

1512 return False 

1513 

1514 def get( 

1515 self, 

1516 datasetRef: DatasetRef, 

1517 parameters: Mapping[str, Any] | None = None, 

1518 storageClass: StorageClass | str | None = None, 

1519 ) -> Any: 

1520 raise FileNotFoundError("This is a no-op datastore that can not access a real datastore") 

1521 

1522 def put( 

1523 self, inMemoryDataset: Any, datasetRef: DatasetRef, provenance: DatasetProvenance | None = None 

1524 ) -> None: 

1525 raise NotImplementedError("This is a no-op datastore that can not access a real datastore") 

1526 

1527 def put_new(self, in_memory_dataset: Any, ref: DatasetRef) -> Mapping[str, DatasetRef]: 

1528 raise NotImplementedError("This is a no-op datastore that can not access a real datastore") 

1529 

1530 def ingest( 

1531 self, *datasets: FileDataset, transfer: str | None = None, record_validation_info: bool = True 

1532 ) -> None: 

1533 raise NotImplementedError("This is a no-op datastore that can not access a real datastore") 

1534 

1535 def transfer_from( 

1536 self, 

1537 source_records: FileTransferMap, 

1538 refs: Iterable[DatasetRef], 

1539 transfer: str = "auto", 

1540 artifact_existence: dict[ResourcePath, bool] | None = None, 

1541 dry_run: bool = False, 

1542 ) -> tuple[set[DatasetRef], set[DatasetRef]]: 

1543 raise NotImplementedError("This is a no-op datastore that can not access a real datastore") 

1544 

1545 def getURIs(self, datasetRef: DatasetRef, predict: bool = False) -> DatasetRefURIs: 

1546 raise FileNotFoundError("This is a no-op datastore that can not access a real datastore") 

1547 

1548 def getURI(self, datasetRef: DatasetRef, predict: bool = False) -> ResourcePath: 

1549 raise FileNotFoundError("This is a no-op datastore that can not access a real datastore") 

1550 

1551 def ingest_zip(self, zip_path: ResourcePath, transfer: str | None, *, dry_run: bool = False) -> None: 

1552 raise NotImplementedError("Can only ingest a Zip into a real datastore.") 

1553 

1554 def retrieveArtifacts( 

1555 self, 

1556 refs: Iterable[DatasetRef], 

1557 destination: ResourcePath, 

1558 transfer: str = "auto", 

1559 preserve_path: bool = True, 

1560 overwrite: bool = False, 

1561 write_index: bool = True, 

1562 add_prefix: bool = False, 

1563 ) -> dict[ResourcePath, ArtifactIndexInfo]: 

1564 raise NotImplementedError("This is a no-op datastore that can not access a real datastore") 

1565 

1566 def remove(self, datasetRef: DatasetRef) -> None: 

1567 raise NotImplementedError("This is a no-op datastore that can not access a real datastore") 

1568 

1569 def forget(self, refs: Iterable[DatasetRef]) -> None: 

1570 raise NotImplementedError("This is a no-op datastore that can not access a real datastore") 

1571 

1572 def trash(self, ref: DatasetRef | Iterable[DatasetRef], ignore_errors: bool = True) -> None: 

1573 raise NotImplementedError("This is a no-op datastore that can not access a real datastore") 

1574 

1575 def emptyTrash( 

1576 self, ignore_errors: bool = True, refs: Collection[DatasetRef] | None = None, dry_run: bool = False 

1577 ) -> set[ResourcePath]: 

1578 raise NotImplementedError("This is a no-op datastore that can not access a real datastore") 

1579 

1580 def transfer(self, inputDatastore: Datastore, datasetRef: DatasetRef) -> None: 

1581 raise NotImplementedError("This is a no-op datastore that can not access a real datastore") 

1582 

1583 def export( 

1584 self, 

1585 refs: Iterable[DatasetRef], 

1586 *, 

1587 directory: ResourcePathExpression | None = None, 

1588 transfer: str | None = "auto", 

1589 ) -> Iterable[FileDataset]: 

1590 raise NotImplementedError("This is a no-op datastore that can not access a real datastore") 

1591 

1592 def validateConfiguration( 

1593 self, entities: Iterable[DatasetRef | DatasetType | StorageClass], logFailures: bool = False 

1594 ) -> None: 

1595 # No configuration so always validates. 

1596 pass 

1597 

1598 def validateKey(self, lookupKey: LookupKey, entity: DatasetRef | DatasetType | StorageClass) -> None: 

1599 pass 

1600 

1601 def getLookupKeys(self) -> set[LookupKey]: 

1602 raise NotImplementedError("This is a no-op datastore that can not access a real datastore") 

1603 

1604 def import_records( 

1605 self, 

1606 data: Mapping[str, DatastoreRecordData], 

1607 ) -> None: 

1608 raise NotImplementedError("This is a no-op datastore that can not access a real datastore") 

1609 

1610 def export_records( 

1611 self, 

1612 refs: Iterable[DatasetIdRef], 

1613 ) -> Mapping[str, DatastoreRecordData]: 

1614 raise NotImplementedError("This is a no-op datastore that can not access a real datastore") 

1615 

1616 def get_opaque_table_definitions(self) -> Mapping[str, DatastoreOpaqueTable]: 

1617 return {}