Coverage for python/lsst/daf/butler/core/datastore.py: 47%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Support for generic data stores."""
24from __future__ import annotations
26__all__ = ("DatastoreConfig", "Datastore", "DatastoreValidationError", "DatastoreRecordData")
28import contextlib
29import dataclasses
30import logging
31from abc import ABCMeta, abstractmethod
32from collections import defaultdict
33from typing import (
34 TYPE_CHECKING,
35 Any,
36 Callable,
37 ClassVar,
38 Dict,
39 Iterable,
40 Iterator,
41 List,
42 Mapping,
43 Optional,
44 Set,
45 Tuple,
46 Type,
47 Union,
48)
50from lsst.utils import doImportType
52from .config import Config, ConfigSubset
53from .constraints import Constraints
54from .exceptions import DatasetTypeNotSupportedError, ValidationError
55from .fileDataset import FileDataset
56from .storageClass import StorageClassFactory
58if TYPE_CHECKING: 58 ↛ 59line 58 didn't jump to line 59, because the condition on line 58 was never true
59 from lsst.resources import ResourcePath, ResourcePathExpression
61 from ..registry.interfaces import DatasetIdRef, DatastoreRegistryBridgeManager
62 from .configSupport import LookupKey
63 from .datasets import DatasetRef, DatasetType
64 from .storageClass import StorageClass
65 from .storedFileInfo import StoredDatastoreItemInfo
68class DatastoreConfig(ConfigSubset):
69 """Configuration for Datastores."""
71 component = "datastore"
72 requiredKeys = ("cls",)
73 defaultConfigFile = "datastore.yaml"
76class DatastoreValidationError(ValidationError):
77 """There is a problem with the Datastore configuration."""
79 pass
82@dataclasses.dataclass
83class DatastoreRecordData:
84 """A struct that represents a tabular data export from a single
85 datastore.
86 """
88 refs: List[DatasetIdRef] = dataclasses.field(default_factory=list)
89 """List of DatasetRefs known to this datastore.
90 """
92 records: Dict[str, List[StoredDatastoreItemInfo]] = dataclasses.field( 92 ↛ exitline 92 didn't jump to the function exit
93 default_factory=lambda: defaultdict(list)
94 )
95 """Opaque table data, grouped by opaque table name.
96 """
99@dataclasses.dataclass(frozen=True)
100class Event:
101 __slots__ = {"name", "undoFunc", "args", "kwargs"}
102 name: str
103 undoFunc: Callable
104 args: tuple
105 kwargs: dict
108class IngestPrepData:
109 """A helper base class for `Datastore` ingest implementations.
111 Datastore implementations will generally need a custom implementation of
112 this class.
114 Should be accessed as ``Datastore.IngestPrepData`` instead of via direct
115 import.
117 Parameters
118 ----------
119 refs : iterable of `DatasetRef`
120 References for the datasets that can be ingested by this datastore.
121 """
123 def __init__(self, refs: Iterable[DatasetRef]):
124 self.refs = {ref.id: ref for ref in refs}
127class DatastoreTransaction:
128 """Keeps a log of `Datastore` activity and allow rollback.
130 Parameters
131 ----------
132 parent : `DatastoreTransaction`, optional
133 The parent transaction (if any)
134 """
136 Event: ClassVar[Type] = Event
138 parent: Optional["DatastoreTransaction"]
139 """The parent transaction. (`DatastoreTransaction`, optional)"""
141 def __init__(self, parent: Optional[DatastoreTransaction] = None):
142 self.parent = parent
143 self._log: List[Event] = []
145 def registerUndo(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> None:
146 """Register event with undo function.
148 Parameters
149 ----------
150 name : `str`
151 Name of the event.
152 undoFunc : func
153 Function to undo this event.
154 args : `tuple`
155 Positional arguments to `undoFunc`.
156 **kwargs
157 Keyword arguments to `undoFunc`.
158 """
159 self._log.append(self.Event(name, undoFunc, args, kwargs))
161 @contextlib.contextmanager
162 def undoWith(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> Iterator[None]:
163 """Register undo function if nested operation succeeds.
165 Calls `registerUndo`.
167 This can be used to wrap individual undo-able statements within a
168 DatastoreTransaction block. Multiple statements that can fail
169 separately should not be part of the same `undoWith` block.
171 All arguments are forwarded directly to `registerUndo`.
172 """
173 try:
174 yield None
175 except BaseException:
176 raise
177 else:
178 self.registerUndo(name, undoFunc, *args, **kwargs)
180 def rollback(self) -> None:
181 """Roll back all events in this transaction."""
182 log = logging.getLogger(__name__)
183 while self._log:
184 ev = self._log.pop()
185 try:
186 log.debug(
187 "Rolling back transaction: %s: %s(%s,%s)",
188 ev.name,
189 ev.undoFunc,
190 ",".join(str(a) for a in ev.args),
191 ",".join(f"{k}={v}" for k, v in ev.kwargs.items()),
192 )
193 except Exception:
194 # In case we had a problem in stringification of arguments
195 log.warning("Rolling back transaction: %s", ev.name)
196 try:
197 ev.undoFunc(*ev.args, **ev.kwargs)
198 except BaseException as e:
199 # Deliberately swallow error that may occur in unrolling
200 log.warning("Exception: %s caught while unrolling: %s", e, ev.name)
201 pass
203 def commit(self) -> None:
204 """Commit this transaction."""
205 if self.parent is None:
206 # Just forget about the events, they have already happened.
207 return
208 else:
209 # We may still want to events from this transaction as part of
210 # the parent.
211 self.parent._log.extend(self._log)
214class Datastore(metaclass=ABCMeta):
215 """Datastore interface.
217 Parameters
218 ----------
219 config : `DatastoreConfig` or `str`
220 Load configuration either from an existing config instance or by
221 referring to a configuration file.
222 bridgeManager : `DatastoreRegistryBridgeManager`
223 Object that manages the interface between `Registry` and datastores.
224 butlerRoot : `str`, optional
225 New datastore root to use to override the configuration value.
226 """
228 defaultConfigFile: ClassVar[Optional[str]] = None
229 """Path to configuration defaults. Accessed within the ``config`` resource
230 or relative to a search path. Can be None if no defaults specified.
231 """
233 containerKey: ClassVar[Optional[str]] = None
234 """Name of the key containing a list of subconfigurations that also
235 need to be merged with defaults and will likely use different Python
236 datastore classes (but all using DatastoreConfig). Assumed to be a
237 list of configurations that can be represented in a DatastoreConfig
238 and containing a "cls" definition. None indicates that no containers
239 are expected in this Datastore."""
241 isEphemeral: bool = False
242 """Indicate whether this Datastore is ephemeral or not. An ephemeral
243 datastore is one where the contents of the datastore will not exist
244 across process restarts. This value can change per-instance."""
246 config: DatastoreConfig
247 """Configuration used to create Datastore."""
249 name: str
250 """Label associated with this Datastore."""
252 storageClassFactory: StorageClassFactory
253 """Factory for creating storage class instances from name."""
255 constraints: Constraints
256 """Constraints to apply when putting datasets into the datastore."""
258 # MyPy does not like for this to be annotated as any kind of type, because
259 # it can't do static checking on type variables that can change at runtime.
260 IngestPrepData: ClassVar[Any] = IngestPrepData
261 """Helper base class for ingest implementations.
262 """
264 @classmethod
265 @abstractmethod
266 def setConfigRoot(cls, root: str, config: Config, full: Config, overwrite: bool = True) -> None:
267 """Set filesystem-dependent config options for this datastore.
269 The options will be appropriate for a new empty repository with the
270 given root.
272 Parameters
273 ----------
274 root : `str`
275 Filesystem path to the root of the data repository.
276 config : `Config`
277 A `Config` to update. Only the subset understood by
278 this component will be updated. Will not expand
279 defaults.
280 full : `Config`
281 A complete config with all defaults expanded that can be
282 converted to a `DatastoreConfig`. Read-only and will not be
283 modified by this method.
284 Repository-specific options that should not be obtained
285 from defaults when Butler instances are constructed
286 should be copied from ``full`` to ``config``.
287 overwrite : `bool`, optional
288 If `False`, do not modify a value in ``config`` if the value
289 already exists. Default is always to overwrite with the provided
290 ``root``.
292 Notes
293 -----
294 If a keyword is explicitly defined in the supplied ``config`` it
295 will not be overridden by this method if ``overwrite`` is `False`.
296 This allows explicit values set in external configs to be retained.
297 """
298 raise NotImplementedError()
300 @staticmethod
301 def fromConfig(
302 config: Config,
303 bridgeManager: DatastoreRegistryBridgeManager,
304 butlerRoot: Optional[ResourcePathExpression] = None,
305 ) -> "Datastore":
306 """Create datastore from type specified in config file.
308 Parameters
309 ----------
310 config : `Config`
311 Configuration instance.
312 bridgeManager : `DatastoreRegistryBridgeManager`
313 Object that manages the interface between `Registry` and
314 datastores.
315 butlerRoot : `str`, optional
316 Butler root directory.
317 """
318 cls = doImportType(config["datastore", "cls"])
319 if not issubclass(cls, Datastore):
320 raise TypeError(f"Imported child class {config['datastore', 'cls']} is not a Datastore")
321 return cls(config=config, bridgeManager=bridgeManager, butlerRoot=butlerRoot)
323 def __init__(
324 self,
325 config: Union[Config, str],
326 bridgeManager: DatastoreRegistryBridgeManager,
327 butlerRoot: Optional[ResourcePathExpression] = None,
328 ):
329 self.config = DatastoreConfig(config)
330 self.name = "ABCDataStore"
331 self._transaction: Optional[DatastoreTransaction] = None
333 # All Datastores need storage classes and constraints
334 self.storageClassFactory = StorageClassFactory()
336 # And read the constraints list
337 constraintsConfig = self.config.get("constraints")
338 self.constraints = Constraints(constraintsConfig, universe=bridgeManager.universe)
340 def __str__(self) -> str:
341 return self.name
343 def __repr__(self) -> str:
344 return self.name
346 @property
347 def names(self) -> Tuple[str, ...]:
348 """Names associated with this datastore returned as a list.
350 Can be different to ``name`` for a chaining datastore.
351 """
352 # Default implementation returns solely the name itself
353 return (self.name,)
355 @contextlib.contextmanager
356 def transaction(self) -> Iterator[DatastoreTransaction]:
357 """Context manager supporting `Datastore` transactions.
359 Transactions can be nested, and are to be used in combination with
360 `Registry.transaction`.
361 """
362 self._transaction = DatastoreTransaction(self._transaction)
363 try:
364 yield self._transaction
365 except BaseException:
366 self._transaction.rollback()
367 raise
368 else:
369 self._transaction.commit()
370 self._transaction = self._transaction.parent
372 @abstractmethod
373 def knows(self, ref: DatasetRef) -> bool:
374 """Check if the dataset is known to the datastore.
376 Does not check for existence of any artifact.
378 Parameters
379 ----------
380 ref : `DatasetRef`
381 Reference to the required dataset.
383 Returns
384 -------
385 exists : `bool`
386 `True` if the dataset is known to the datastore.
387 """
388 raise NotImplementedError()
390 def mexists(
391 self, refs: Iterable[DatasetRef], artifact_existence: Optional[Dict[ResourcePath, bool]] = None
392 ) -> Dict[DatasetRef, bool]:
393 """Check the existence of multiple datasets at once.
395 Parameters
396 ----------
397 refs : iterable of `DatasetRef`
398 The datasets to be checked.
399 artifact_existence : `dict` [`lsst.resources.ResourcePath`, `bool`]
400 Optional mapping of datastore artifact to existence. Updated by
401 this method with details of all artifacts tested. Can be `None`
402 if the caller is not interested.
404 Returns
405 -------
406 existence : `dict` of [`DatasetRef`, `bool`]
407 Mapping from dataset to boolean indicating existence.
408 """
409 existence: Dict[DatasetRef, bool] = {}
410 # Non-optimized default.
411 for ref in refs:
412 existence[ref] = self.exists(ref)
413 return existence
415 @abstractmethod
416 def exists(self, datasetRef: DatasetRef) -> bool:
417 """Check if the dataset exists in the datastore.
419 Parameters
420 ----------
421 datasetRef : `DatasetRef`
422 Reference to the required dataset.
424 Returns
425 -------
426 exists : `bool`
427 `True` if the entity exists in the `Datastore`.
428 """
429 raise NotImplementedError("Must be implemented by subclass")
431 @abstractmethod
432 def get(self, datasetRef: DatasetRef, parameters: Mapping[str, Any] = None) -> Any:
433 """Load an `InMemoryDataset` from the store.
435 Parameters
436 ----------
437 datasetRef : `DatasetRef`
438 Reference to the required Dataset.
439 parameters : `dict`
440 `StorageClass`-specific parameters that specify a slice of the
441 Dataset to be loaded.
443 Returns
444 -------
445 inMemoryDataset : `object`
446 Requested Dataset or slice thereof as an InMemoryDataset.
447 """
448 raise NotImplementedError("Must be implemented by subclass")
450 @abstractmethod
451 def put(self, inMemoryDataset: Any, datasetRef: DatasetRef) -> None:
452 """Write a `InMemoryDataset` with a given `DatasetRef` to the store.
454 Parameters
455 ----------
456 inMemoryDataset : `object`
457 The Dataset to store.
458 datasetRef : `DatasetRef`
459 Reference to the associated Dataset.
460 """
461 raise NotImplementedError("Must be implemented by subclass")
463 def _overrideTransferMode(self, *datasets: FileDataset, transfer: Optional[str] = None) -> Optional[str]:
464 """Allow ingest transfer mode to be defaulted based on datasets.
466 Parameters
467 ----------
468 datasets : `FileDataset`
469 Each positional argument is a struct containing information about
470 a file to be ingested, including its path (either absolute or
471 relative to the datastore root, if applicable), a complete
472 `DatasetRef` (with ``dataset_id not None``), and optionally a
473 formatter class or its fully-qualified string name. If a formatter
474 is not provided, this method should populate that attribute with
475 the formatter the datastore would use for `put`. Subclasses are
476 also permitted to modify the path attribute (typically to put it
477 in what the datastore considers its standard form).
478 transfer : `str`, optional
479 How (and whether) the dataset should be added to the datastore.
480 See `ingest` for details of transfer modes.
482 Returns
483 -------
484 newTransfer : `str`
485 Transfer mode to use. Will be identical to the supplied transfer
486 mode unless "auto" is used.
487 """
488 if transfer != "auto":
489 return transfer
490 raise RuntimeError(f"{transfer} is not allowed without specialization.")
492 def _prepIngest(self, *datasets: FileDataset, transfer: Optional[str] = None) -> IngestPrepData:
493 """Process datasets to identify which ones can be ingested.
495 Parameters
496 ----------
497 datasets : `FileDataset`
498 Each positional argument is a struct containing information about
499 a file to be ingested, including its path (either absolute or
500 relative to the datastore root, if applicable), a complete
501 `DatasetRef` (with ``dataset_id not None``), and optionally a
502 formatter class or its fully-qualified string name. If a formatter
503 is not provided, this method should populate that attribute with
504 the formatter the datastore would use for `put`. Subclasses are
505 also permitted to modify the path attribute (typically to put it
506 in what the datastore considers its standard form).
507 transfer : `str`, optional
508 How (and whether) the dataset should be added to the datastore.
509 See `ingest` for details of transfer modes.
511 Returns
512 -------
513 data : `IngestPrepData`
514 An instance of a subclass of `IngestPrepData`, used to pass
515 arbitrary data from `_prepIngest` to `_finishIngest`. This should
516 include only the datasets this datastore can actually ingest;
517 others should be silently ignored (`Datastore.ingest` will inspect
518 `IngestPrepData.refs` and raise `DatasetTypeNotSupportedError` if
519 necessary).
521 Raises
522 ------
523 NotImplementedError
524 Raised if the datastore does not support the given transfer mode
525 (including the case where ingest is not supported at all).
526 FileNotFoundError
527 Raised if one of the given files does not exist.
528 FileExistsError
529 Raised if transfer is not `None` but the (internal) location the
530 file would be moved to is already occupied.
532 Notes
533 -----
534 This method (along with `_finishIngest`) should be implemented by
535 subclasses to provide ingest support instead of implementing `ingest`
536 directly.
538 `_prepIngest` should not modify the data repository or given files in
539 any way; all changes should be deferred to `_finishIngest`.
541 When possible, exceptions should be raised in `_prepIngest` instead of
542 `_finishIngest`. `NotImplementedError` exceptions that indicate that
543 the transfer mode is not supported must be raised by `_prepIngest`
544 instead of `_finishIngest`.
545 """
546 raise NotImplementedError(f"Datastore {self} does not support direct file-based ingest.")
548 def _finishIngest(
549 self, prepData: IngestPrepData, *, transfer: Optional[str] = None, record_validation_info: bool = True
550 ) -> None:
551 """Complete an ingest operation.
553 Parameters
554 ----------
555 data : `IngestPrepData`
556 An instance of a subclass of `IngestPrepData`. Guaranteed to be
557 the direct result of a call to `_prepIngest` on this datastore.
558 transfer : `str`, optional
559 How (and whether) the dataset should be added to the datastore.
560 See `ingest` for details of transfer modes.
561 record_validation_info : `bool`, optional
562 If `True`, the default, the datastore can record validation
563 information associated with the file. If `False` the datastore
564 will not attempt to track any information such as checksums
565 or file sizes. This can be useful if such information is tracked
566 in an external system or if the file is to be compressed in place.
567 It is up to the datastore whether this parameter is relevant.
569 Raises
570 ------
571 FileNotFoundError
572 Raised if one of the given files does not exist.
573 FileExistsError
574 Raised if transfer is not `None` but the (internal) location the
575 file would be moved to is already occupied.
577 Notes
578 -----
579 This method (along with `_prepIngest`) should be implemented by
580 subclasses to provide ingest support instead of implementing `ingest`
581 directly.
582 """
583 raise NotImplementedError(f"Datastore {self} does not support direct file-based ingest.")
585 def ingest(
586 self, *datasets: FileDataset, transfer: Optional[str] = None, record_validation_info: bool = True
587 ) -> None:
588 """Ingest one or more files into the datastore.
590 Parameters
591 ----------
592 datasets : `FileDataset`
593 Each positional argument is a struct containing information about
594 a file to be ingested, including its path (either absolute or
595 relative to the datastore root, if applicable), a complete
596 `DatasetRef` (with ``dataset_id not None``), and optionally a
597 formatter class or its fully-qualified string name. If a formatter
598 is not provided, the one the datastore would use for ``put`` on
599 that dataset is assumed.
600 transfer : `str`, optional
601 How (and whether) the dataset should be added to the datastore.
602 If `None` (default), the file must already be in a location
603 appropriate for the datastore (e.g. within its root directory),
604 and will not be modified. Other choices include "move", "copy",
605 "link", "symlink", "relsymlink", and "hardlink". "link" is a
606 special transfer mode that will first try to make a hardlink and
607 if that fails a symlink will be used instead. "relsymlink" creates
608 a relative symlink rather than use an absolute path.
609 Most datastores do not support all transfer modes.
610 "auto" is a special option that will let the
611 data store choose the most natural option for itself.
612 record_validation_info : `bool`, optional
613 If `True`, the default, the datastore can record validation
614 information associated with the file. If `False` the datastore
615 will not attempt to track any information such as checksums
616 or file sizes. This can be useful if such information is tracked
617 in an external system or if the file is to be compressed in place.
618 It is up to the datastore whether this parameter is relevant.
620 Raises
621 ------
622 NotImplementedError
623 Raised if the datastore does not support the given transfer mode
624 (including the case where ingest is not supported at all).
625 DatasetTypeNotSupportedError
626 Raised if one or more files to be ingested have a dataset type that
627 is not supported by the datastore.
628 FileNotFoundError
629 Raised if one of the given files does not exist.
630 FileExistsError
631 Raised if transfer is not `None` but the (internal) location the
632 file would be moved to is already occupied.
634 Notes
635 -----
636 Subclasses should implement `_prepIngest` and `_finishIngest` instead
637 of implementing `ingest` directly. Datastores that hold and
638 delegate to child datastores may want to call those methods as well.
640 Subclasses are encouraged to document their supported transfer modes
641 in their class documentation.
642 """
643 # Allow a datastore to select a default transfer mode
644 transfer = self._overrideTransferMode(*datasets, transfer=transfer)
645 prepData = self._prepIngest(*datasets, transfer=transfer)
646 refs = {ref.id: ref for dataset in datasets for ref in dataset.refs}
647 if None in refs:
648 # Find the file for the error message. There may be multiple
649 # bad refs so look for all of them.
650 unresolved_paths = {}
651 for dataset in datasets:
652 unresolved = []
653 for ref in dataset.refs:
654 if ref.id is None:
655 unresolved.append(ref)
656 if unresolved:
657 unresolved_paths[dataset.path] = unresolved
658 raise RuntimeError(
659 "Attempt to ingest unresolved DatasetRef from: "
660 + ",".join(f"{p}: ({[str(r) for r in ref]})" for p, ref in unresolved_paths.items())
661 )
662 if refs.keys() != prepData.refs.keys():
663 unsupported = refs.keys() - prepData.refs.keys()
664 # Group unsupported refs by DatasetType for an informative
665 # but still concise error message.
666 byDatasetType = defaultdict(list)
667 for datasetId in unsupported:
668 ref = refs[datasetId]
669 byDatasetType[ref.datasetType].append(ref)
670 raise DatasetTypeNotSupportedError(
671 "DatasetType(s) not supported in ingest: "
672 + ", ".join(f"{k.name} ({len(v)} dataset(s))" for k, v in byDatasetType.items())
673 )
674 self._finishIngest(prepData, transfer=transfer, record_validation_info=record_validation_info)
676 def transfer_from(
677 self,
678 source_datastore: Datastore,
679 refs: Iterable[DatasetRef],
680 local_refs: Optional[Iterable[DatasetRef]] = None,
681 transfer: str = "auto",
682 artifact_existence: Optional[Dict[ResourcePath, bool]] = None,
683 ) -> None:
684 """Transfer dataset artifacts from another datastore to this one.
686 Parameters
687 ----------
688 source_datastore : `Datastore`
689 The datastore from which to transfer artifacts. That datastore
690 must be compatible with this datastore receiving the artifacts.
691 refs : iterable of `DatasetRef`
692 The datasets to transfer from the source datastore.
693 local_refs : iterable of `DatasetRef`, optional
694 The dataset refs associated with the registry associated with
695 this datastore. Can be `None` if the source and target datastore
696 are using UUIDs.
697 transfer : `str`, optional
698 How (and whether) the dataset should be added to the datastore.
699 Choices include "move", "copy",
700 "link", "symlink", "relsymlink", and "hardlink". "link" is a
701 special transfer mode that will first try to make a hardlink and
702 if that fails a symlink will be used instead. "relsymlink" creates
703 a relative symlink rather than use an absolute path.
704 Most datastores do not support all transfer modes.
705 "auto" (the default) is a special option that will let the
706 data store choose the most natural option for itself.
707 If the source location and transfer location are identical the
708 transfer mode will be ignored.
709 artifact_existence : `dict` [`lsst.resources.ResourcePath`, `bool`]
710 Optional mapping of datastore artifact to existence. Updated by
711 this method with details of all artifacts tested. Can be `None`
712 if the caller is not interested.
714 Raises
715 ------
716 TypeError
717 Raised if the two datastores are not compatible.
718 """
719 if type(self) is not type(source_datastore):
720 raise TypeError(
721 f"Datastore mismatch between this datastore ({type(self)}) and the "
722 f"source datastore ({type(source_datastore)})."
723 )
725 raise NotImplementedError(f"Datastore {type(self)} must implement a transfer_from method.")
727 @abstractmethod
728 def getURIs(
729 self, datasetRef: DatasetRef, predict: bool = False
730 ) -> Tuple[Optional[ResourcePath], Dict[str, ResourcePath]]:
731 """Return URIs associated with dataset.
733 Parameters
734 ----------
735 ref : `DatasetRef`
736 Reference to the required dataset.
737 predict : `bool`, optional
738 If the datastore does not know about the dataset, should it
739 return a predicted URI or not?
741 Returns
742 -------
743 primary : `lsst.resources.ResourcePath`
744 The URI to the primary artifact associated with this dataset.
745 If the dataset was disassembled within the datastore this
746 may be `None`.
747 components : `dict`
748 URIs to any components associated with the dataset artifact.
749 Can be empty if there are no components.
750 """
751 raise NotImplementedError()
753 @abstractmethod
754 def getURI(self, datasetRef: DatasetRef, predict: bool = False) -> ResourcePath:
755 """URI to the Dataset.
757 Parameters
758 ----------
759 datasetRef : `DatasetRef`
760 Reference to the required Dataset.
761 predict : `bool`
762 If `True` attempt to predict the URI for a dataset if it does
763 not exist in datastore.
765 Returns
766 -------
767 uri : `str`
768 URI string pointing to the Dataset within the datastore. If the
769 Dataset does not exist in the datastore, the URI may be a guess.
770 If the datastore does not have entities that relate well
771 to the concept of a URI the returned URI string will be
772 descriptive. The returned URI is not guaranteed to be obtainable.
774 Raises
775 ------
776 FileNotFoundError
777 A URI has been requested for a dataset that does not exist and
778 guessing is not allowed.
779 """
780 raise NotImplementedError("Must be implemented by subclass")
782 @abstractmethod
783 def retrieveArtifacts(
784 self,
785 refs: Iterable[DatasetRef],
786 destination: ResourcePath,
787 transfer: str = "auto",
788 preserve_path: bool = True,
789 overwrite: bool = False,
790 ) -> List[ResourcePath]:
791 """Retrieve the artifacts associated with the supplied refs.
793 Parameters
794 ----------
795 refs : iterable of `DatasetRef`
796 The datasets for which artifacts are to be retrieved.
797 A single ref can result in multiple artifacts. The refs must
798 be resolved.
799 destination : `lsst.resources.ResourcePath`
800 Location to write the artifacts.
801 transfer : `str`, optional
802 Method to use to transfer the artifacts. Must be one of the options
803 supported by `lsst.resources.ResourcePath.transfer_from()`.
804 "move" is not allowed.
805 preserve_path : `bool`, optional
806 If `True` the full path of the artifact within the datastore
807 is preserved. If `False` the final file component of the path
808 is used.
809 overwrite : `bool`, optional
810 If `True` allow transfers to overwrite existing files at the
811 destination.
813 Returns
814 -------
815 targets : `list` of `lsst.resources.ResourcePath`
816 URIs of file artifacts in destination location. Order is not
817 preserved.
819 Notes
820 -----
821 For non-file datastores the artifacts written to the destination
822 may not match the representation inside the datastore. For example
823 a hierarchichal data structure in a NoSQL database may well be stored
824 as a JSON file.
825 """
826 raise NotImplementedError()
828 @abstractmethod
829 def remove(self, datasetRef: DatasetRef) -> None:
830 """Indicate to the Datastore that a Dataset can be removed.
832 Parameters
833 ----------
834 datasetRef : `DatasetRef`
835 Reference to the required Dataset.
837 Raises
838 ------
839 FileNotFoundError
840 When Dataset does not exist.
842 Notes
843 -----
844 Some Datastores may implement this method as a silent no-op to
845 disable Dataset deletion through standard interfaces.
846 """
847 raise NotImplementedError("Must be implemented by subclass")
849 @abstractmethod
850 def forget(self, refs: Iterable[DatasetRef]) -> None:
851 """Indicate to the Datastore that it should remove all records of the
852 given datasets, without actually deleting them.
854 Parameters
855 ----------
856 refs : `Iterable` [ `DatasetRef` ]
857 References to the datasets being forgotten.
859 Notes
860 -----
861 Asking a datastore to forget a `DatasetRef` it does not hold should be
862 a silent no-op, not an error.
863 """
864 raise NotImplementedError("Must be implemented by subclass")
866 @abstractmethod
867 def trash(self, ref: Union[DatasetRef, Iterable[DatasetRef]], ignore_errors: bool = True) -> None:
868 """Indicate to the Datastore that a Dataset can be moved to the trash.
870 Parameters
871 ----------
872 ref : `DatasetRef` or iterable thereof
873 Reference(s) to the required Dataset.
874 ignore_errors : `bool`, optional
875 Determine whether errors should be ignored. When multiple
876 refs are being trashed there will be no per-ref check.
878 Raises
879 ------
880 FileNotFoundError
881 When Dataset does not exist and errors are not ignored. Only
882 checked if a single ref is supplied (and not in a list).
884 Notes
885 -----
886 Some Datastores may implement this method as a silent no-op to
887 disable Dataset deletion through standard interfaces.
888 """
889 raise NotImplementedError("Must be implemented by subclass")
891 @abstractmethod
892 def emptyTrash(self, ignore_errors: bool = True) -> None:
893 """Remove all datasets from the trash.
895 Parameters
896 ----------
897 ignore_errors : `bool`, optional
898 Determine whether errors should be ignored.
900 Notes
901 -----
902 Some Datastores may implement this method as a silent no-op to
903 disable Dataset deletion through standard interfaces.
904 """
905 raise NotImplementedError("Must be implemented by subclass")
907 @abstractmethod
908 def transfer(self, inputDatastore: Datastore, datasetRef: DatasetRef) -> None:
909 """Transfer a dataset from another datastore to this datastore.
911 Parameters
912 ----------
913 inputDatastore : `Datastore`
914 The external `Datastore` from which to retrieve the Dataset.
915 datasetRef : `DatasetRef`
916 Reference to the required Dataset.
917 """
918 raise NotImplementedError("Must be implemented by subclass")
920 def export(
921 self, refs: Iterable[DatasetRef], *, directory: Optional[str] = None, transfer: Optional[str] = None
922 ) -> Iterable[FileDataset]:
923 """Export datasets for transfer to another data repository.
925 Parameters
926 ----------
927 refs : iterable of `DatasetRef`
928 Dataset references to be exported.
929 directory : `str`, optional
930 Path to a directory that should contain files corresponding to
931 output datasets. Ignored if ``transfer`` is `None`.
932 transfer : `str`, optional
933 Mode that should be used to move datasets out of the repository.
934 Valid options are the same as those of the ``transfer`` argument
935 to ``ingest``, and datastores may similarly signal that a transfer
936 mode is not supported by raising `NotImplementedError`.
938 Returns
939 -------
940 dataset : iterable of `DatasetTransfer`
941 Structs containing information about the exported datasets, in the
942 same order as ``refs``.
944 Raises
945 ------
946 NotImplementedError
947 Raised if the given transfer mode is not supported.
948 """
949 raise NotImplementedError(f"Transfer mode {transfer} not supported.")
951 @abstractmethod
952 def validateConfiguration(
953 self, entities: Iterable[Union[DatasetRef, DatasetType, StorageClass]], logFailures: bool = False
954 ) -> None:
955 """Validate some of the configuration for this datastore.
957 Parameters
958 ----------
959 entities : iterable of `DatasetRef`, `DatasetType`, or `StorageClass`
960 Entities to test against this configuration. Can be differing
961 types.
962 logFailures : `bool`, optional
963 If `True`, output a log message for every validation error
964 detected.
966 Raises
967 ------
968 DatastoreValidationError
969 Raised if there is a validation problem with a configuration.
971 Notes
972 -----
973 Which parts of the configuration are validated is at the discretion
974 of each Datastore implementation.
975 """
976 raise NotImplementedError("Must be implemented by subclass")
978 @abstractmethod
979 def validateKey(self, lookupKey: LookupKey, entity: Union[DatasetRef, DatasetType, StorageClass]) -> None:
980 """Validate a specific look up key with supplied entity.
982 Parameters
983 ----------
984 lookupKey : `LookupKey`
985 Key to use to retrieve information from the datastore
986 configuration.
987 entity : `DatasetRef`, `DatasetType`, or `StorageClass`
988 Entity to compare with configuration retrieved using the
989 specified lookup key.
991 Raises
992 ------
993 DatastoreValidationError
994 Raised if there is a problem with the combination of entity
995 and lookup key.
997 Notes
998 -----
999 Bypasses the normal selection priorities by allowing a key that
1000 would normally not be selected to be validated.
1001 """
1002 raise NotImplementedError("Must be implemented by subclass")
1004 @abstractmethod
1005 def getLookupKeys(self) -> Set[LookupKey]:
1006 """Return all the lookup keys relevant to this datastore.
1008 Returns
1009 -------
1010 keys : `set` of `LookupKey`
1011 The keys stored internally for looking up information based
1012 on `DatasetType` name or `StorageClass`.
1013 """
1014 raise NotImplementedError("Must be implemented by subclass")
1016 def needs_expanded_data_ids(
1017 self,
1018 transfer: Optional[str],
1019 entity: Optional[Union[DatasetRef, DatasetType, StorageClass]] = None,
1020 ) -> bool:
1021 """Test whether this datastore needs expanded data IDs to ingest.
1023 Parameters
1024 ----------
1025 transfer : `str` or `None`
1026 Transfer mode for ingest.
1027 entity, optional
1028 Object representing what will be ingested. If not provided (or not
1029 specific enough), `True` may be returned even if expanded data
1030 IDs aren't necessary.
1032 Returns
1033 -------
1034 needed : `bool`
1035 If `True`, expanded data IDs may be needed. `False` only if
1036 expansion definitely isn't necessary.
1037 """
1038 return True
1040 @abstractmethod
1041 def import_records(
1042 self,
1043 data: Mapping[str, DatastoreRecordData],
1044 ) -> None:
1045 """Import datastore location and record data from an in-memory data
1046 structure.
1048 Parameters
1049 ----------
1050 data : `Mapping` [ `str`, `DatastoreRecordData` ]
1051 Datastore records indexed by datastore name. May contain data for
1052 other `Datastore` instances (generally because they are chained to
1053 this one), which should be ignored.
1055 Notes
1056 -----
1057 Implementations should generally not check that any external resources
1058 (e.g. files) referred to by these records actually exist, for
1059 performance reasons; we expect higher-level code to guarantee that they
1060 do.
1062 Implementations are responsible for calling
1063 `DatastoreRegistryBridge.insert` on all datasets in ``data.locations``
1064 where the key is in `names`, as well as loading any opaque table data.
1065 """
1066 raise NotImplementedError()
1068 @abstractmethod
1069 def export_records(
1070 self,
1071 refs: Iterable[DatasetIdRef],
1072 ) -> Mapping[str, DatastoreRecordData]:
1073 """Export datastore records and locations to an in-memory data
1074 structure.
1076 Parameters
1077 ----------
1078 refs : `Iterable` [ `DatasetIdRef` ]
1079 Datasets to save. This may include datasets not known to this
1080 datastore, which should be ignored.
1082 Returns
1083 -------
1084 data : `Mapping` [ `str`, `DatastoreRecordData` ]
1085 Exported datastore records indexed by datastore name.
1086 """
1087 raise NotImplementedError()