Coverage for python/lsst/daf/butler/core/datastore.py: 46%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Support for generic data stores."""
24from __future__ import annotations
26__all__ = ("DatastoreConfig", "Datastore", "DatastoreValidationError", "DatastoreRecordData")
28import contextlib
29import dataclasses
30import logging
31from abc import ABCMeta, abstractmethod
32from collections import defaultdict
33from typing import (
34 TYPE_CHECKING,
35 Any,
36 Callable,
37 ClassVar,
38 Dict,
39 Iterable,
40 Iterator,
41 List,
42 Mapping,
43 Optional,
44 Set,
45 Tuple,
46 Type,
47 Union,
48)
50from lsst.utils import doImportType
52from .config import Config, ConfigSubset
53from .constraints import Constraints
54from .exceptions import DatasetTypeNotSupportedError, ValidationError
55from .fileDataset import FileDataset
56from .storageClass import StorageClassFactory
58if TYPE_CHECKING: 58 ↛ 59line 58 didn't jump to line 59, because the condition on line 58 was never true
59 from lsst.resources import ResourcePath, ResourcePathExpression
61 from ..registry.interfaces import DatasetIdRef, DatastoreRegistryBridgeManager
62 from .configSupport import LookupKey
63 from .datasets import DatasetRef, DatasetType
64 from .storageClass import StorageClass
67class DatastoreConfig(ConfigSubset):
68 """Configuration for Datastores."""
70 component = "datastore"
71 requiredKeys = ("cls",)
72 defaultConfigFile = "datastore.yaml"
75class DatastoreValidationError(ValidationError):
76 """There is a problem with the Datastore configuration."""
78 pass
81@dataclasses.dataclass
82class DatastoreRecordData:
83 """A struct that represents a tabular data export from one or more
84 datastores.
85 """
87 locations: Dict[str, List[DatasetIdRef]] = dataclasses.field(default_factory=lambda: defaultdict(list)) 87 ↛ exitline 87 didn't run the lambda on line 87
88 """Mapping from datastore name to the datasets in that datastore.
89 """
91 records: Dict[str, List[Dict[str, Any]]] = dataclasses.field(default_factory=lambda: defaultdict(list)) 91 ↛ exitline 91 didn't run the lambda on line 91
92 """Opaque table data that backs one or more datastores, grouped by
93 opaque table name.
94 """
97@dataclasses.dataclass(frozen=True)
98class Event:
99 __slots__ = {"name", "undoFunc", "args", "kwargs"}
100 name: str
101 undoFunc: Callable
102 args: tuple
103 kwargs: dict
106class IngestPrepData:
107 """A helper base class for `Datastore` ingest implementations.
109 Datastore implementations will generally need a custom implementation of
110 this class.
112 Should be accessed as ``Datastore.IngestPrepData`` instead of via direct
113 import.
115 Parameters
116 ----------
117 refs : iterable of `DatasetRef`
118 References for the datasets that can be ingested by this datastore.
119 """
121 def __init__(self, refs: Iterable[DatasetRef]):
122 self.refs = {ref.id: ref for ref in refs}
125class DatastoreTransaction:
126 """Keeps a log of `Datastore` activity and allow rollback.
128 Parameters
129 ----------
130 parent : `DatastoreTransaction`, optional
131 The parent transaction (if any)
132 """
134 Event: ClassVar[Type] = Event
136 parent: Optional["DatastoreTransaction"]
137 """The parent transaction. (`DatastoreTransaction`, optional)"""
139 def __init__(self, parent: Optional[DatastoreTransaction] = None):
140 self.parent = parent
141 self._log: List[Event] = []
143 def registerUndo(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> None:
144 """Register event with undo function.
146 Parameters
147 ----------
148 name : `str`
149 Name of the event.
150 undoFunc : func
151 Function to undo this event.
152 args : `tuple`
153 Positional arguments to `undoFunc`.
154 **kwargs
155 Keyword arguments to `undoFunc`.
156 """
157 self._log.append(self.Event(name, undoFunc, args, kwargs))
159 @contextlib.contextmanager
160 def undoWith(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> Iterator[None]:
161 """Register undo function if nested operation succeeds.
163 Calls `registerUndo`.
165 This can be used to wrap individual undo-able statements within a
166 DatastoreTransaction block. Multiple statements that can fail
167 separately should not be part of the same `undoWith` block.
169 All arguments are forwarded directly to `registerUndo`.
170 """
171 try:
172 yield None
173 except BaseException:
174 raise
175 else:
176 self.registerUndo(name, undoFunc, *args, **kwargs)
178 def rollback(self) -> None:
179 """Roll back all events in this transaction."""
180 log = logging.getLogger(__name__)
181 while self._log:
182 ev = self._log.pop()
183 try:
184 log.debug(
185 "Rolling back transaction: %s: %s(%s,%s)",
186 ev.name,
187 ev.undoFunc,
188 ",".join(str(a) for a in ev.args),
189 ",".join(f"{k}={v}" for k, v in ev.kwargs.items()),
190 )
191 except Exception:
192 # In case we had a problem in stringification of arguments
193 log.warning("Rolling back transaction: %s", ev.name)
194 try:
195 ev.undoFunc(*ev.args, **ev.kwargs)
196 except BaseException as e:
197 # Deliberately swallow error that may occur in unrolling
198 log.warning("Exception: %s caught while unrolling: %s", e, ev.name)
199 pass
201 def commit(self) -> None:
202 """Commit this transaction."""
203 if self.parent is None:
204 # Just forget about the events, they have already happened.
205 return
206 else:
207 # We may still want to events from this transaction as part of
208 # the parent.
209 self.parent._log.extend(self._log)
212class Datastore(metaclass=ABCMeta):
213 """Datastore interface.
215 Parameters
216 ----------
217 config : `DatastoreConfig` or `str`
218 Load configuration either from an existing config instance or by
219 referring to a configuration file.
220 bridgeManager : `DatastoreRegistryBridgeManager`
221 Object that manages the interface between `Registry` and datastores.
222 butlerRoot : `str`, optional
223 New datastore root to use to override the configuration value.
224 """
226 defaultConfigFile: ClassVar[Optional[str]] = None
227 """Path to configuration defaults. Accessed within the ``config`` resource
228 or relative to a search path. Can be None if no defaults specified.
229 """
231 containerKey: ClassVar[Optional[str]] = None
232 """Name of the key containing a list of subconfigurations that also
233 need to be merged with defaults and will likely use different Python
234 datastore classes (but all using DatastoreConfig). Assumed to be a
235 list of configurations that can be represented in a DatastoreConfig
236 and containing a "cls" definition. None indicates that no containers
237 are expected in this Datastore."""
239 isEphemeral: bool = False
240 """Indicate whether this Datastore is ephemeral or not. An ephemeral
241 datastore is one where the contents of the datastore will not exist
242 across process restarts. This value can change per-instance."""
244 config: DatastoreConfig
245 """Configuration used to create Datastore."""
247 name: str
248 """Label associated with this Datastore."""
250 storageClassFactory: StorageClassFactory
251 """Factory for creating storage class instances from name."""
253 constraints: Constraints
254 """Constraints to apply when putting datasets into the datastore."""
256 # MyPy does not like for this to be annotated as any kind of type, because
257 # it can't do static checking on type variables that can change at runtime.
258 IngestPrepData: ClassVar[Any] = IngestPrepData
259 """Helper base class for ingest implementations.
260 """
262 @classmethod
263 @abstractmethod
264 def setConfigRoot(cls, root: str, config: Config, full: Config, overwrite: bool = True) -> None:
265 """Set filesystem-dependent config options for this datastore.
267 The options will be appropriate for a new empty repository with the
268 given root.
270 Parameters
271 ----------
272 root : `str`
273 Filesystem path to the root of the data repository.
274 config : `Config`
275 A `Config` to update. Only the subset understood by
276 this component will be updated. Will not expand
277 defaults.
278 full : `Config`
279 A complete config with all defaults expanded that can be
280 converted to a `DatastoreConfig`. Read-only and will not be
281 modified by this method.
282 Repository-specific options that should not be obtained
283 from defaults when Butler instances are constructed
284 should be copied from ``full`` to ``config``.
285 overwrite : `bool`, optional
286 If `False`, do not modify a value in ``config`` if the value
287 already exists. Default is always to overwrite with the provided
288 ``root``.
290 Notes
291 -----
292 If a keyword is explicitly defined in the supplied ``config`` it
293 will not be overridden by this method if ``overwrite`` is `False`.
294 This allows explicit values set in external configs to be retained.
295 """
296 raise NotImplementedError()
298 @staticmethod
299 def fromConfig(
300 config: Config,
301 bridgeManager: DatastoreRegistryBridgeManager,
302 butlerRoot: Optional[ResourcePathExpression] = None,
303 ) -> "Datastore":
304 """Create datastore from type specified in config file.
306 Parameters
307 ----------
308 config : `Config`
309 Configuration instance.
310 bridgeManager : `DatastoreRegistryBridgeManager`
311 Object that manages the interface between `Registry` and
312 datastores.
313 butlerRoot : `str`, optional
314 Butler root directory.
315 """
316 cls = doImportType(config["datastore", "cls"])
317 if not issubclass(cls, Datastore):
318 raise TypeError(f"Imported child class {config['datastore', 'cls']} is not a Datastore")
319 return cls(config=config, bridgeManager=bridgeManager, butlerRoot=butlerRoot)
321 def __init__(
322 self,
323 config: Union[Config, str],
324 bridgeManager: DatastoreRegistryBridgeManager,
325 butlerRoot: Optional[ResourcePathExpression] = None,
326 ):
327 self.config = DatastoreConfig(config)
328 self.name = "ABCDataStore"
329 self._transaction: Optional[DatastoreTransaction] = None
331 # All Datastores need storage classes and constraints
332 self.storageClassFactory = StorageClassFactory()
334 # And read the constraints list
335 constraintsConfig = self.config.get("constraints")
336 self.constraints = Constraints(constraintsConfig, universe=bridgeManager.universe)
338 def __str__(self) -> str:
339 return self.name
341 def __repr__(self) -> str:
342 return self.name
344 @property
345 def names(self) -> Tuple[str, ...]:
346 """Names associated with this datastore returned as a list.
348 Can be different to ``name`` for a chaining datastore.
349 """
350 # Default implementation returns solely the name itself
351 return (self.name,)
353 @contextlib.contextmanager
354 def transaction(self) -> Iterator[DatastoreTransaction]:
355 """Context manager supporting `Datastore` transactions.
357 Transactions can be nested, and are to be used in combination with
358 `Registry.transaction`.
359 """
360 self._transaction = DatastoreTransaction(self._transaction)
361 try:
362 yield self._transaction
363 except BaseException:
364 self._transaction.rollback()
365 raise
366 else:
367 self._transaction.commit()
368 self._transaction = self._transaction.parent
370 @abstractmethod
371 def knows(self, ref: DatasetRef) -> bool:
372 """Check if the dataset is known to the datastore.
374 Does not check for existence of any artifact.
376 Parameters
377 ----------
378 ref : `DatasetRef`
379 Reference to the required dataset.
381 Returns
382 -------
383 exists : `bool`
384 `True` if the dataset is known to the datastore.
385 """
386 raise NotImplementedError()
388 def mexists(
389 self, refs: Iterable[DatasetRef], artifact_existence: Optional[Dict[ResourcePath, bool]] = None
390 ) -> Dict[DatasetRef, bool]:
391 """Check the existence of multiple datasets at once.
393 Parameters
394 ----------
395 refs : iterable of `DatasetRef`
396 The datasets to be checked.
397 artifact_existence : `dict` [`lsst.resources.ResourcePath`, `bool`]
398 Optional mapping of datastore artifact to existence. Updated by
399 this method with details of all artifacts tested. Can be `None`
400 if the caller is not interested.
402 Returns
403 -------
404 existence : `dict` of [`DatasetRef`, `bool`]
405 Mapping from dataset to boolean indicating existence.
406 """
407 existence: Dict[DatasetRef, bool] = {}
408 # Non-optimized default.
409 for ref in refs:
410 existence[ref] = self.exists(ref)
411 return existence
413 @abstractmethod
414 def exists(self, datasetRef: DatasetRef) -> bool:
415 """Check if the dataset exists in the datastore.
417 Parameters
418 ----------
419 datasetRef : `DatasetRef`
420 Reference to the required dataset.
422 Returns
423 -------
424 exists : `bool`
425 `True` if the entity exists in the `Datastore`.
426 """
427 raise NotImplementedError("Must be implemented by subclass")
429 @abstractmethod
430 def get(self, datasetRef: DatasetRef, parameters: Mapping[str, Any] = None) -> Any:
431 """Load an `InMemoryDataset` from the store.
433 Parameters
434 ----------
435 datasetRef : `DatasetRef`
436 Reference to the required Dataset.
437 parameters : `dict`
438 `StorageClass`-specific parameters that specify a slice of the
439 Dataset to be loaded.
441 Returns
442 -------
443 inMemoryDataset : `object`
444 Requested Dataset or slice thereof as an InMemoryDataset.
445 """
446 raise NotImplementedError("Must be implemented by subclass")
448 @abstractmethod
449 def put(self, inMemoryDataset: Any, datasetRef: DatasetRef) -> None:
450 """Write a `InMemoryDataset` with a given `DatasetRef` to the store.
452 Parameters
453 ----------
454 inMemoryDataset : `object`
455 The Dataset to store.
456 datasetRef : `DatasetRef`
457 Reference to the associated Dataset.
458 """
459 raise NotImplementedError("Must be implemented by subclass")
461 def _overrideTransferMode(self, *datasets: FileDataset, transfer: Optional[str] = None) -> Optional[str]:
462 """Allow ingest transfer mode to be defaulted based on datasets.
464 Parameters
465 ----------
466 datasets : `FileDataset`
467 Each positional argument is a struct containing information about
468 a file to be ingested, including its path (either absolute or
469 relative to the datastore root, if applicable), a complete
470 `DatasetRef` (with ``dataset_id not None``), and optionally a
471 formatter class or its fully-qualified string name. If a formatter
472 is not provided, this method should populate that attribute with
473 the formatter the datastore would use for `put`. Subclasses are
474 also permitted to modify the path attribute (typically to put it
475 in what the datastore considers its standard form).
476 transfer : `str`, optional
477 How (and whether) the dataset should be added to the datastore.
478 See `ingest` for details of transfer modes.
480 Returns
481 -------
482 newTransfer : `str`
483 Transfer mode to use. Will be identical to the supplied transfer
484 mode unless "auto" is used.
485 """
486 if transfer != "auto":
487 return transfer
488 raise RuntimeError(f"{transfer} is not allowed without specialization.")
490 def _prepIngest(self, *datasets: FileDataset, transfer: Optional[str] = None) -> IngestPrepData:
491 """Process datasets to identify which ones can be ingested.
493 Parameters
494 ----------
495 datasets : `FileDataset`
496 Each positional argument is a struct containing information about
497 a file to be ingested, including its path (either absolute or
498 relative to the datastore root, if applicable), a complete
499 `DatasetRef` (with ``dataset_id not None``), and optionally a
500 formatter class or its fully-qualified string name. If a formatter
501 is not provided, this method should populate that attribute with
502 the formatter the datastore would use for `put`. Subclasses are
503 also permitted to modify the path attribute (typically to put it
504 in what the datastore considers its standard form).
505 transfer : `str`, optional
506 How (and whether) the dataset should be added to the datastore.
507 See `ingest` for details of transfer modes.
509 Returns
510 -------
511 data : `IngestPrepData`
512 An instance of a subclass of `IngestPrepData`, used to pass
513 arbitrary data from `_prepIngest` to `_finishIngest`. This should
514 include only the datasets this datastore can actually ingest;
515 others should be silently ignored (`Datastore.ingest` will inspect
516 `IngestPrepData.refs` and raise `DatasetTypeNotSupportedError` if
517 necessary).
519 Raises
520 ------
521 NotImplementedError
522 Raised if the datastore does not support the given transfer mode
523 (including the case where ingest is not supported at all).
524 FileNotFoundError
525 Raised if one of the given files does not exist.
526 FileExistsError
527 Raised if transfer is not `None` but the (internal) location the
528 file would be moved to is already occupied.
530 Notes
531 -----
532 This method (along with `_finishIngest`) should be implemented by
533 subclasses to provide ingest support instead of implementing `ingest`
534 directly.
536 `_prepIngest` should not modify the data repository or given files in
537 any way; all changes should be deferred to `_finishIngest`.
539 When possible, exceptions should be raised in `_prepIngest` instead of
540 `_finishIngest`. `NotImplementedError` exceptions that indicate that
541 the transfer mode is not supported must be raised by `_prepIngest`
542 instead of `_finishIngest`.
543 """
544 raise NotImplementedError(f"Datastore {self} does not support direct file-based ingest.")
546 def _finishIngest(
547 self, prepData: IngestPrepData, *, transfer: Optional[str] = None, record_validation_info: bool = True
548 ) -> None:
549 """Complete an ingest operation.
551 Parameters
552 ----------
553 data : `IngestPrepData`
554 An instance of a subclass of `IngestPrepData`. Guaranteed to be
555 the direct result of a call to `_prepIngest` on this datastore.
556 transfer : `str`, optional
557 How (and whether) the dataset should be added to the datastore.
558 See `ingest` for details of transfer modes.
559 record_validation_info : `bool`, optional
560 If `True`, the default, the datastore can record validation
561 information associated with the file. If `False` the datastore
562 will not attempt to track any information such as checksums
563 or file sizes. This can be useful if such information is tracked
564 in an external system or if the file is to be compressed in place.
565 It is up to the datastore whether this parameter is relevant.
567 Raises
568 ------
569 FileNotFoundError
570 Raised if one of the given files does not exist.
571 FileExistsError
572 Raised if transfer is not `None` but the (internal) location the
573 file would be moved to is already occupied.
575 Notes
576 -----
577 This method (along with `_prepIngest`) should be implemented by
578 subclasses to provide ingest support instead of implementing `ingest`
579 directly.
580 """
581 raise NotImplementedError(f"Datastore {self} does not support direct file-based ingest.")
583 def ingest(
584 self, *datasets: FileDataset, transfer: Optional[str] = None, record_validation_info: bool = True
585 ) -> None:
586 """Ingest one or more files into the datastore.
588 Parameters
589 ----------
590 datasets : `FileDataset`
591 Each positional argument is a struct containing information about
592 a file to be ingested, including its path (either absolute or
593 relative to the datastore root, if applicable), a complete
594 `DatasetRef` (with ``dataset_id not None``), and optionally a
595 formatter class or its fully-qualified string name. If a formatter
596 is not provided, the one the datastore would use for ``put`` on
597 that dataset is assumed.
598 transfer : `str`, optional
599 How (and whether) the dataset should be added to the datastore.
600 If `None` (default), the file must already be in a location
601 appropriate for the datastore (e.g. within its root directory),
602 and will not be modified. Other choices include "move", "copy",
603 "link", "symlink", "relsymlink", and "hardlink". "link" is a
604 special transfer mode that will first try to make a hardlink and
605 if that fails a symlink will be used instead. "relsymlink" creates
606 a relative symlink rather than use an absolute path.
607 Most datastores do not support all transfer modes.
608 "auto" is a special option that will let the
609 data store choose the most natural option for itself.
610 record_validation_info : `bool`, optional
611 If `True`, the default, the datastore can record validation
612 information associated with the file. If `False` the datastore
613 will not attempt to track any information such as checksums
614 or file sizes. This can be useful if such information is tracked
615 in an external system or if the file is to be compressed in place.
616 It is up to the datastore whether this parameter is relevant.
618 Raises
619 ------
620 NotImplementedError
621 Raised if the datastore does not support the given transfer mode
622 (including the case where ingest is not supported at all).
623 DatasetTypeNotSupportedError
624 Raised if one or more files to be ingested have a dataset type that
625 is not supported by the datastore.
626 FileNotFoundError
627 Raised if one of the given files does not exist.
628 FileExistsError
629 Raised if transfer is not `None` but the (internal) location the
630 file would be moved to is already occupied.
632 Notes
633 -----
634 Subclasses should implement `_prepIngest` and `_finishIngest` instead
635 of implementing `ingest` directly. Datastores that hold and
636 delegate to child datastores may want to call those methods as well.
638 Subclasses are encouraged to document their supported transfer modes
639 in their class documentation.
640 """
641 # Allow a datastore to select a default transfer mode
642 transfer = self._overrideTransferMode(*datasets, transfer=transfer)
643 prepData = self._prepIngest(*datasets, transfer=transfer)
644 refs = {ref.id: ref for dataset in datasets for ref in dataset.refs}
645 if None in refs:
646 # Find the file for the error message. There may be multiple
647 # bad refs so look for all of them.
648 unresolved_paths = {}
649 for dataset in datasets:
650 unresolved = []
651 for ref in dataset.refs:
652 if ref.id is None:
653 unresolved.append(ref)
654 if unresolved:
655 unresolved_paths[dataset.path] = unresolved
656 raise RuntimeError(
657 "Attempt to ingest unresolved DatasetRef from: "
658 + ",".join(f"{p}: ({[str(r) for r in ref]})" for p, ref in unresolved_paths.items())
659 )
660 if refs.keys() != prepData.refs.keys():
661 unsupported = refs.keys() - prepData.refs.keys()
662 # Group unsupported refs by DatasetType for an informative
663 # but still concise error message.
664 byDatasetType = defaultdict(list)
665 for datasetId in unsupported:
666 ref = refs[datasetId]
667 byDatasetType[ref.datasetType].append(ref)
668 raise DatasetTypeNotSupportedError(
669 "DatasetType(s) not supported in ingest: "
670 + ", ".join(f"{k.name} ({len(v)} dataset(s))" for k, v in byDatasetType.items())
671 )
672 self._finishIngest(prepData, transfer=transfer, record_validation_info=record_validation_info)
674 def transfer_from(
675 self,
676 source_datastore: Datastore,
677 refs: Iterable[DatasetRef],
678 local_refs: Optional[Iterable[DatasetRef]] = None,
679 transfer: str = "auto",
680 artifact_existence: Optional[Dict[ResourcePath, bool]] = None,
681 ) -> None:
682 """Transfer dataset artifacts from another datastore to this one.
684 Parameters
685 ----------
686 source_datastore : `Datastore`
687 The datastore from which to transfer artifacts. That datastore
688 must be compatible with this datastore receiving the artifacts.
689 refs : iterable of `DatasetRef`
690 The datasets to transfer from the source datastore.
691 local_refs : iterable of `DatasetRef`, optional
692 The dataset refs associated with the registry associated with
693 this datastore. Can be `None` if the source and target datastore
694 are using UUIDs.
695 transfer : `str`, optional
696 How (and whether) the dataset should be added to the datastore.
697 Choices include "move", "copy",
698 "link", "symlink", "relsymlink", and "hardlink". "link" is a
699 special transfer mode that will first try to make a hardlink and
700 if that fails a symlink will be used instead. "relsymlink" creates
701 a relative symlink rather than use an absolute path.
702 Most datastores do not support all transfer modes.
703 "auto" (the default) is a special option that will let the
704 data store choose the most natural option for itself.
705 If the source location and transfer location are identical the
706 transfer mode will be ignored.
707 artifact_existence : `dict` [`lsst.resources.ResourcePath`, `bool`]
708 Optional mapping of datastore artifact to existence. Updated by
709 this method with details of all artifacts tested. Can be `None`
710 if the caller is not interested.
712 Raises
713 ------
714 TypeError
715 Raised if the two datastores are not compatible.
716 """
717 if type(self) is not type(source_datastore):
718 raise TypeError(
719 f"Datastore mismatch between this datastore ({type(self)}) and the "
720 f"source datastore ({type(source_datastore)})."
721 )
723 raise NotImplementedError(f"Datastore {type(self)} must implement a transfer_from method.")
725 @abstractmethod
726 def getURIs(
727 self, datasetRef: DatasetRef, predict: bool = False
728 ) -> Tuple[Optional[ResourcePath], Dict[str, ResourcePath]]:
729 """Return URIs associated with dataset.
731 Parameters
732 ----------
733 ref : `DatasetRef`
734 Reference to the required dataset.
735 predict : `bool`, optional
736 If the datastore does not know about the dataset, should it
737 return a predicted URI or not?
739 Returns
740 -------
741 primary : `lsst.resources.ResourcePath`
742 The URI to the primary artifact associated with this dataset.
743 If the dataset was disassembled within the datastore this
744 may be `None`.
745 components : `dict`
746 URIs to any components associated with the dataset artifact.
747 Can be empty if there are no components.
748 """
749 raise NotImplementedError()
751 @abstractmethod
752 def getURI(self, datasetRef: DatasetRef, predict: bool = False) -> ResourcePath:
753 """URI to the Dataset.
755 Parameters
756 ----------
757 datasetRef : `DatasetRef`
758 Reference to the required Dataset.
759 predict : `bool`
760 If `True` attempt to predict the URI for a dataset if it does
761 not exist in datastore.
763 Returns
764 -------
765 uri : `str`
766 URI string pointing to the Dataset within the datastore. If the
767 Dataset does not exist in the datastore, the URI may be a guess.
768 If the datastore does not have entities that relate well
769 to the concept of a URI the returned URI string will be
770 descriptive. The returned URI is not guaranteed to be obtainable.
772 Raises
773 ------
774 FileNotFoundError
775 A URI has been requested for a dataset that does not exist and
776 guessing is not allowed.
777 """
778 raise NotImplementedError("Must be implemented by subclass")
780 @abstractmethod
781 def retrieveArtifacts(
782 self,
783 refs: Iterable[DatasetRef],
784 destination: ResourcePath,
785 transfer: str = "auto",
786 preserve_path: bool = True,
787 overwrite: bool = False,
788 ) -> List[ResourcePath]:
789 """Retrieve the artifacts associated with the supplied refs.
791 Parameters
792 ----------
793 refs : iterable of `DatasetRef`
794 The datasets for which artifacts are to be retrieved.
795 A single ref can result in multiple artifacts. The refs must
796 be resolved.
797 destination : `lsst.resources.ResourcePath`
798 Location to write the artifacts.
799 transfer : `str`, optional
800 Method to use to transfer the artifacts. Must be one of the options
801 supported by `lsst.resources.ResourcePath.transfer_from()`.
802 "move" is not allowed.
803 preserve_path : `bool`, optional
804 If `True` the full path of the artifact within the datastore
805 is preserved. If `False` the final file component of the path
806 is used.
807 overwrite : `bool`, optional
808 If `True` allow transfers to overwrite existing files at the
809 destination.
811 Returns
812 -------
813 targets : `list` of `lsst.resources.ResourcePath`
814 URIs of file artifacts in destination location. Order is not
815 preserved.
817 Notes
818 -----
819 For non-file datastores the artifacts written to the destination
820 may not match the representation inside the datastore. For example
821 a hierarchichal data structure in a NoSQL database may well be stored
822 as a JSON file.
823 """
824 raise NotImplementedError()
826 @abstractmethod
827 def remove(self, datasetRef: DatasetRef) -> None:
828 """Indicate to the Datastore that a Dataset can be removed.
830 Parameters
831 ----------
832 datasetRef : `DatasetRef`
833 Reference to the required Dataset.
835 Raises
836 ------
837 FileNotFoundError
838 When Dataset does not exist.
840 Notes
841 -----
842 Some Datastores may implement this method as a silent no-op to
843 disable Dataset deletion through standard interfaces.
844 """
845 raise NotImplementedError("Must be implemented by subclass")
847 @abstractmethod
848 def forget(self, refs: Iterable[DatasetRef]) -> None:
849 """Indicate to the Datastore that it should remove all records of the
850 given datasets, without actually deleting them.
852 Parameters
853 ----------
854 refs : `Iterable` [ `DatasetRef` ]
855 References to the datasets being forgotten.
857 Notes
858 -----
859 Asking a datastore to forget a `DatasetRef` it does not hold should be
860 a silent no-op, not an error.
861 """
862 raise NotImplementedError("Must be implemented by subclass")
864 @abstractmethod
865 def trash(self, ref: Union[DatasetRef, Iterable[DatasetRef]], ignore_errors: bool = True) -> None:
866 """Indicate to the Datastore that a Dataset can be moved to the trash.
868 Parameters
869 ----------
870 ref : `DatasetRef` or iterable thereof
871 Reference(s) to the required Dataset.
872 ignore_errors : `bool`, optional
873 Determine whether errors should be ignored. When multiple
874 refs are being trashed there will be no per-ref check.
876 Raises
877 ------
878 FileNotFoundError
879 When Dataset does not exist and errors are not ignored. Only
880 checked if a single ref is supplied (and not in a list).
882 Notes
883 -----
884 Some Datastores may implement this method as a silent no-op to
885 disable Dataset deletion through standard interfaces.
886 """
887 raise NotImplementedError("Must be implemented by subclass")
889 @abstractmethod
890 def emptyTrash(self, ignore_errors: bool = True) -> None:
891 """Remove all datasets from the trash.
893 Parameters
894 ----------
895 ignore_errors : `bool`, optional
896 Determine whether errors should be ignored.
898 Notes
899 -----
900 Some Datastores may implement this method as a silent no-op to
901 disable Dataset deletion through standard interfaces.
902 """
903 raise NotImplementedError("Must be implemented by subclass")
905 @abstractmethod
906 def transfer(self, inputDatastore: Datastore, datasetRef: DatasetRef) -> None:
907 """Transfer a dataset from another datastore to this datastore.
909 Parameters
910 ----------
911 inputDatastore : `Datastore`
912 The external `Datastore` from which to retrieve the Dataset.
913 datasetRef : `DatasetRef`
914 Reference to the required Dataset.
915 """
916 raise NotImplementedError("Must be implemented by subclass")
918 def export(
919 self, refs: Iterable[DatasetRef], *, directory: Optional[str] = None, transfer: Optional[str] = None
920 ) -> Iterable[FileDataset]:
921 """Export datasets for transfer to another data repository.
923 Parameters
924 ----------
925 refs : iterable of `DatasetRef`
926 Dataset references to be exported.
927 directory : `str`, optional
928 Path to a directory that should contain files corresponding to
929 output datasets. Ignored if ``transfer`` is `None`.
930 transfer : `str`, optional
931 Mode that should be used to move datasets out of the repository.
932 Valid options are the same as those of the ``transfer`` argument
933 to ``ingest``, and datastores may similarly signal that a transfer
934 mode is not supported by raising `NotImplementedError`.
936 Returns
937 -------
938 dataset : iterable of `DatasetTransfer`
939 Structs containing information about the exported datasets, in the
940 same order as ``refs``.
942 Raises
943 ------
944 NotImplementedError
945 Raised if the given transfer mode is not supported.
946 """
947 raise NotImplementedError(f"Transfer mode {transfer} not supported.")
949 @abstractmethod
950 def validateConfiguration(
951 self, entities: Iterable[Union[DatasetRef, DatasetType, StorageClass]], logFailures: bool = False
952 ) -> None:
953 """Validate some of the configuration for this datastore.
955 Parameters
956 ----------
957 entities : iterable of `DatasetRef`, `DatasetType`, or `StorageClass`
958 Entities to test against this configuration. Can be differing
959 types.
960 logFailures : `bool`, optional
961 If `True`, output a log message for every validation error
962 detected.
964 Raises
965 ------
966 DatastoreValidationError
967 Raised if there is a validation problem with a configuration.
969 Notes
970 -----
971 Which parts of the configuration are validated is at the discretion
972 of each Datastore implementation.
973 """
974 raise NotImplementedError("Must be implemented by subclass")
976 @abstractmethod
977 def validateKey(self, lookupKey: LookupKey, entity: Union[DatasetRef, DatasetType, StorageClass]) -> None:
978 """Validate a specific look up key with supplied entity.
980 Parameters
981 ----------
982 lookupKey : `LookupKey`
983 Key to use to retrieve information from the datastore
984 configuration.
985 entity : `DatasetRef`, `DatasetType`, or `StorageClass`
986 Entity to compare with configuration retrieved using the
987 specified lookup key.
989 Raises
990 ------
991 DatastoreValidationError
992 Raised if there is a problem with the combination of entity
993 and lookup key.
995 Notes
996 -----
997 Bypasses the normal selection priorities by allowing a key that
998 would normally not be selected to be validated.
999 """
1000 raise NotImplementedError("Must be implemented by subclass")
1002 @abstractmethod
1003 def getLookupKeys(self) -> Set[LookupKey]:
1004 """Return all the lookup keys relevant to this datastore.
1006 Returns
1007 -------
1008 keys : `set` of `LookupKey`
1009 The keys stored internally for looking up information based
1010 on `DatasetType` name or `StorageClass`.
1011 """
1012 raise NotImplementedError("Must be implemented by subclass")
1014 def needs_expanded_data_ids(
1015 self,
1016 transfer: Optional[str],
1017 entity: Optional[Union[DatasetRef, DatasetType, StorageClass]] = None,
1018 ) -> bool:
1019 """Test whether this datastore needs expanded data IDs to ingest.
1021 Parameters
1022 ----------
1023 transfer : `str` or `None`
1024 Transfer mode for ingest.
1025 entity, optional
1026 Object representing what will be ingested. If not provided (or not
1027 specific enough), `True` may be returned even if expanded data
1028 IDs aren't necessary.
1030 Returns
1031 -------
1032 needed : `bool`
1033 If `True`, expanded data IDs may be needed. `False` only if
1034 expansion definitely isn't necessary.
1035 """
1036 return True
1038 # TODO: make abstract, implement in all concrete datastores
1039 def import_records(
1040 self,
1041 data: DatastoreRecordData,
1042 ) -> None:
1043 """Import datastore location and record data from an in-memory data
1044 structure.
1046 Parameters
1047 ----------
1048 data : `DatastoreRecordData`
1049 Data structure to load from. May contain data for other
1050 `Datastore` instances (generally because they are chained to this
1051 one), which should be ignored.
1053 Notes
1054 -----
1055 Implementations should generally not check that any external resources
1056 (e.g. files) referred to by these records actually exist, for
1057 performance reasons; we expect higher-level code to guarantee that they
1058 do.
1060 Implementations are responsible for calling
1061 `DatastoreRegistryBridge.insert` on all datasets in ``data.locations``
1062 where the key is in `names`, as well as loading any opaque table data.
1063 """
1064 raise NotImplementedError()
1066 # TODO: make abstract, implement in all concrete datastores
1067 def export_records(
1068 self,
1069 refs: Iterable[DatasetIdRef],
1070 ) -> DatastoreRecordData:
1071 """Export datastore records and locations from an in-memory data
1072 structure.
1074 Parameters
1075 ----------
1076 refs : `Iterable` [ `DatasetIdRef` ]
1077 Datasets to save. This may include datasets not known to this
1078 datastore, which should be ignored.
1080 Returns
1081 -------
1082 data : `DatastoreRecordData`
1083 Populated data structure.
1084 """
1085 raise NotImplementedError()