Coverage for python/lsst/daf/butler/core/datastore.py: 45%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Support for generic data stores."""
24from __future__ import annotations
26__all__ = ("DatastoreConfig", "Datastore", "DatastoreValidationError")
28import contextlib
29import logging
30from abc import ABCMeta, abstractmethod
31from collections import defaultdict
32from dataclasses import dataclass
33from typing import (
34 TYPE_CHECKING,
35 Any,
36 Callable,
37 ClassVar,
38 Dict,
39 Iterable,
40 Iterator,
41 List,
42 Mapping,
43 Optional,
44 Set,
45 Tuple,
46 Type,
47 Union,
48)
50from lsst.utils import doImportType
52from .config import Config, ConfigSubset
53from .constraints import Constraints
54from .exceptions import DatasetTypeNotSupportedError, ValidationError
55from .fileDataset import FileDataset
56from .storageClass import StorageClassFactory
58if TYPE_CHECKING: 58 ↛ 59line 58 didn't jump to line 59, because the condition on line 58 was never true
59 from lsst.resources import ResourcePath, ResourcePathExpression
61 from ..registry.interfaces import DatastoreRegistryBridgeManager
62 from .configSupport import LookupKey
63 from .datasets import DatasetRef, DatasetType
64 from .storageClass import StorageClass
67class DatastoreConfig(ConfigSubset):
68 """Configuration for Datastores."""
70 component = "datastore"
71 requiredKeys = ("cls",)
72 defaultConfigFile = "datastore.yaml"
75class DatastoreValidationError(ValidationError):
76 """There is a problem with the Datastore configuration."""
78 pass
81@dataclass(frozen=True)
82class Event:
83 __slots__ = {"name", "undoFunc", "args", "kwargs"}
84 name: str
85 undoFunc: Callable
86 args: tuple
87 kwargs: dict
90class IngestPrepData:
91 """A helper base class for `Datastore` ingest implementations.
93 Datastore implementations will generally need a custom implementation of
94 this class.
96 Should be accessed as ``Datastore.IngestPrepData`` instead of via direct
97 import.
99 Parameters
100 ----------
101 refs : iterable of `DatasetRef`
102 References for the datasets that can be ingested by this datastore.
103 """
105 def __init__(self, refs: Iterable[DatasetRef]):
106 self.refs = {ref.id: ref for ref in refs}
109class DatastoreTransaction:
110 """Keeps a log of `Datastore` activity and allow rollback.
112 Parameters
113 ----------
114 parent : `DatastoreTransaction`, optional
115 The parent transaction (if any)
116 """
118 Event: ClassVar[Type] = Event
120 parent: Optional["DatastoreTransaction"]
121 """The parent transaction. (`DatastoreTransaction`, optional)"""
123 def __init__(self, parent: Optional[DatastoreTransaction] = None):
124 self.parent = parent
125 self._log: List[Event] = []
127 def registerUndo(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> None:
128 """Register event with undo function.
130 Parameters
131 ----------
132 name : `str`
133 Name of the event.
134 undoFunc : func
135 Function to undo this event.
136 args : `tuple`
137 Positional arguments to `undoFunc`.
138 **kwargs
139 Keyword arguments to `undoFunc`.
140 """
141 self._log.append(self.Event(name, undoFunc, args, kwargs))
143 @contextlib.contextmanager
144 def undoWith(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> Iterator[None]:
145 """Register undo function if nested operation succeeds.
147 Calls `registerUndo`.
149 This can be used to wrap individual undo-able statements within a
150 DatastoreTransaction block. Multiple statements that can fail
151 separately should not be part of the same `undoWith` block.
153 All arguments are forwarded directly to `registerUndo`.
154 """
155 try:
156 yield None
157 except BaseException:
158 raise
159 else:
160 self.registerUndo(name, undoFunc, *args, **kwargs)
162 def rollback(self) -> None:
163 """Roll back all events in this transaction."""
164 log = logging.getLogger(__name__)
165 while self._log:
166 ev = self._log.pop()
167 try:
168 log.debug(
169 "Rolling back transaction: %s: %s(%s,%s)",
170 ev.name,
171 ev.undoFunc,
172 ",".join(str(a) for a in ev.args),
173 ",".join(f"{k}={v}" for k, v in ev.kwargs.items()),
174 )
175 except Exception:
176 # In case we had a problem in stringification of arguments
177 log.warning("Rolling back transaction: %s", ev.name)
178 try:
179 ev.undoFunc(*ev.args, **ev.kwargs)
180 except BaseException as e:
181 # Deliberately swallow error that may occur in unrolling
182 log.warning("Exception: %s caught while unrolling: %s", e, ev.name)
183 pass
185 def commit(self) -> None:
186 """Commit this transaction."""
187 if self.parent is None:
188 # Just forget about the events, they have already happened.
189 return
190 else:
191 # We may still want to events from this transaction as part of
192 # the parent.
193 self.parent._log.extend(self._log)
196class Datastore(metaclass=ABCMeta):
197 """Datastore interface.
199 Parameters
200 ----------
201 config : `DatastoreConfig` or `str`
202 Load configuration either from an existing config instance or by
203 referring to a configuration file.
204 bridgeManager : `DatastoreRegistryBridgeManager`
205 Object that manages the interface between `Registry` and datastores.
206 butlerRoot : `str`, optional
207 New datastore root to use to override the configuration value.
208 """
210 defaultConfigFile: ClassVar[Optional[str]] = None
211 """Path to configuration defaults. Accessed within the ``config`` resource
212 or relative to a search path. Can be None if no defaults specified.
213 """
215 containerKey: ClassVar[Optional[str]] = None
216 """Name of the key containing a list of subconfigurations that also
217 need to be merged with defaults and will likely use different Python
218 datastore classes (but all using DatastoreConfig). Assumed to be a
219 list of configurations that can be represented in a DatastoreConfig
220 and containing a "cls" definition. None indicates that no containers
221 are expected in this Datastore."""
223 isEphemeral: bool = False
224 """Indicate whether this Datastore is ephemeral or not. An ephemeral
225 datastore is one where the contents of the datastore will not exist
226 across process restarts. This value can change per-instance."""
228 config: DatastoreConfig
229 """Configuration used to create Datastore."""
231 name: str
232 """Label associated with this Datastore."""
234 storageClassFactory: StorageClassFactory
235 """Factory for creating storage class instances from name."""
237 constraints: Constraints
238 """Constraints to apply when putting datasets into the datastore."""
240 # MyPy does not like for this to be annotated as any kind of type, because
241 # it can't do static checking on type variables that can change at runtime.
242 IngestPrepData: ClassVar[Any] = IngestPrepData
243 """Helper base class for ingest implementations.
244 """
246 @classmethod
247 @abstractmethod
248 def setConfigRoot(cls, root: str, config: Config, full: Config, overwrite: bool = True) -> None:
249 """Set filesystem-dependent config options for this datastore.
251 The options will be appropriate for a new empty repository with the
252 given root.
254 Parameters
255 ----------
256 root : `str`
257 Filesystem path to the root of the data repository.
258 config : `Config`
259 A `Config` to update. Only the subset understood by
260 this component will be updated. Will not expand
261 defaults.
262 full : `Config`
263 A complete config with all defaults expanded that can be
264 converted to a `DatastoreConfig`. Read-only and will not be
265 modified by this method.
266 Repository-specific options that should not be obtained
267 from defaults when Butler instances are constructed
268 should be copied from ``full`` to ``config``.
269 overwrite : `bool`, optional
270 If `False`, do not modify a value in ``config`` if the value
271 already exists. Default is always to overwrite with the provided
272 ``root``.
274 Notes
275 -----
276 If a keyword is explicitly defined in the supplied ``config`` it
277 will not be overridden by this method if ``overwrite`` is `False`.
278 This allows explicit values set in external configs to be retained.
279 """
280 raise NotImplementedError()
282 @staticmethod
283 def fromConfig(
284 config: Config,
285 bridgeManager: DatastoreRegistryBridgeManager,
286 butlerRoot: Optional[ResourcePathExpression] = None,
287 ) -> "Datastore":
288 """Create datastore from type specified in config file.
290 Parameters
291 ----------
292 config : `Config`
293 Configuration instance.
294 bridgeManager : `DatastoreRegistryBridgeManager`
295 Object that manages the interface between `Registry` and
296 datastores.
297 butlerRoot : `str`, optional
298 Butler root directory.
299 """
300 cls = doImportType(config["datastore", "cls"])
301 if not issubclass(cls, Datastore):
302 raise TypeError(f"Imported child class {config['datastore', 'cls']} is not a Datastore")
303 return cls(config=config, bridgeManager=bridgeManager, butlerRoot=butlerRoot)
305 def __init__(
306 self,
307 config: Union[Config, str],
308 bridgeManager: DatastoreRegistryBridgeManager,
309 butlerRoot: Optional[ResourcePathExpression] = None,
310 ):
311 self.config = DatastoreConfig(config)
312 self.name = "ABCDataStore"
313 self._transaction: Optional[DatastoreTransaction] = None
315 # All Datastores need storage classes and constraints
316 self.storageClassFactory = StorageClassFactory()
318 # And read the constraints list
319 constraintsConfig = self.config.get("constraints")
320 self.constraints = Constraints(constraintsConfig, universe=bridgeManager.universe)
322 def __str__(self) -> str:
323 return self.name
325 def __repr__(self) -> str:
326 return self.name
328 @property
329 def names(self) -> Tuple[str, ...]:
330 """Names associated with this datastore returned as a list.
332 Can be different to ``name`` for a chaining datastore.
333 """
334 # Default implementation returns solely the name itself
335 return (self.name,)
337 @contextlib.contextmanager
338 def transaction(self) -> Iterator[DatastoreTransaction]:
339 """Context manager supporting `Datastore` transactions.
341 Transactions can be nested, and are to be used in combination with
342 `Registry.transaction`.
343 """
344 self._transaction = DatastoreTransaction(self._transaction)
345 try:
346 yield self._transaction
347 except BaseException:
348 self._transaction.rollback()
349 raise
350 else:
351 self._transaction.commit()
352 self._transaction = self._transaction.parent
354 @abstractmethod
355 def knows(self, ref: DatasetRef) -> bool:
356 """Check if the dataset is known to the datastore.
358 Does not check for existence of any artifact.
360 Parameters
361 ----------
362 ref : `DatasetRef`
363 Reference to the required dataset.
365 Returns
366 -------
367 exists : `bool`
368 `True` if the dataset is known to the datastore.
369 """
370 raise NotImplementedError()
372 def mexists(
373 self, refs: Iterable[DatasetRef], artifact_existence: Optional[Dict[ResourcePath, bool]] = None
374 ) -> Dict[DatasetRef, bool]:
375 """Check the existence of multiple datasets at once.
377 Parameters
378 ----------
379 refs : iterable of `DatasetRef`
380 The datasets to be checked.
381 artifact_existence : `dict` [`lsst.resources.ResourcePath`, `bool`]
382 Optional mapping of datastore artifact to existence. Updated by
383 this method with details of all artifacts tested. Can be `None`
384 if the caller is not interested.
386 Returns
387 -------
388 existence : `dict` of [`DatasetRef`, `bool`]
389 Mapping from dataset to boolean indicating existence.
390 """
391 existence: Dict[DatasetRef, bool] = {}
392 # Non-optimized default.
393 for ref in refs:
394 existence[ref] = self.exists(ref)
395 return existence
397 @abstractmethod
398 def exists(self, datasetRef: DatasetRef) -> bool:
399 """Check if the dataset exists in the datastore.
401 Parameters
402 ----------
403 datasetRef : `DatasetRef`
404 Reference to the required dataset.
406 Returns
407 -------
408 exists : `bool`
409 `True` if the entity exists in the `Datastore`.
410 """
411 raise NotImplementedError("Must be implemented by subclass")
413 @abstractmethod
414 def get(self, datasetRef: DatasetRef, parameters: Mapping[str, Any] = None) -> Any:
415 """Load an `InMemoryDataset` from the store.
417 Parameters
418 ----------
419 datasetRef : `DatasetRef`
420 Reference to the required Dataset.
421 parameters : `dict`
422 `StorageClass`-specific parameters that specify a slice of the
423 Dataset to be loaded.
425 Returns
426 -------
427 inMemoryDataset : `object`
428 Requested Dataset or slice thereof as an InMemoryDataset.
429 """
430 raise NotImplementedError("Must be implemented by subclass")
432 @abstractmethod
433 def put(self, inMemoryDataset: Any, datasetRef: DatasetRef) -> None:
434 """Write a `InMemoryDataset` with a given `DatasetRef` to the store.
436 Parameters
437 ----------
438 inMemoryDataset : `object`
439 The Dataset to store.
440 datasetRef : `DatasetRef`
441 Reference to the associated Dataset.
442 """
443 raise NotImplementedError("Must be implemented by subclass")
445 def _overrideTransferMode(self, *datasets: FileDataset, transfer: Optional[str] = None) -> Optional[str]:
446 """Allow ingest transfer mode to be defaulted based on datasets.
448 Parameters
449 ----------
450 datasets : `FileDataset`
451 Each positional argument is a struct containing information about
452 a file to be ingested, including its path (either absolute or
453 relative to the datastore root, if applicable), a complete
454 `DatasetRef` (with ``dataset_id not None``), and optionally a
455 formatter class or its fully-qualified string name. If a formatter
456 is not provided, this method should populate that attribute with
457 the formatter the datastore would use for `put`. Subclasses are
458 also permitted to modify the path attribute (typically to put it
459 in what the datastore considers its standard form).
460 transfer : `str`, optional
461 How (and whether) the dataset should be added to the datastore.
462 See `ingest` for details of transfer modes.
464 Returns
465 -------
466 newTransfer : `str`
467 Transfer mode to use. Will be identical to the supplied transfer
468 mode unless "auto" is used.
469 """
470 if transfer != "auto":
471 return transfer
472 raise RuntimeError(f"{transfer} is not allowed without specialization.")
474 def _prepIngest(self, *datasets: FileDataset, transfer: Optional[str] = None) -> IngestPrepData:
475 """Process datasets to identify which ones can be ingested.
477 Parameters
478 ----------
479 datasets : `FileDataset`
480 Each positional argument is a struct containing information about
481 a file to be ingested, including its path (either absolute or
482 relative to the datastore root, if applicable), a complete
483 `DatasetRef` (with ``dataset_id not None``), and optionally a
484 formatter class or its fully-qualified string name. If a formatter
485 is not provided, this method should populate that attribute with
486 the formatter the datastore would use for `put`. Subclasses are
487 also permitted to modify the path attribute (typically to put it
488 in what the datastore considers its standard form).
489 transfer : `str`, optional
490 How (and whether) the dataset should be added to the datastore.
491 See `ingest` for details of transfer modes.
493 Returns
494 -------
495 data : `IngestPrepData`
496 An instance of a subclass of `IngestPrepData`, used to pass
497 arbitrary data from `_prepIngest` to `_finishIngest`. This should
498 include only the datasets this datastore can actually ingest;
499 others should be silently ignored (`Datastore.ingest` will inspect
500 `IngestPrepData.refs` and raise `DatasetTypeNotSupportedError` if
501 necessary).
503 Raises
504 ------
505 NotImplementedError
506 Raised if the datastore does not support the given transfer mode
507 (including the case where ingest is not supported at all).
508 FileNotFoundError
509 Raised if one of the given files does not exist.
510 FileExistsError
511 Raised if transfer is not `None` but the (internal) location the
512 file would be moved to is already occupied.
514 Notes
515 -----
516 This method (along with `_finishIngest`) should be implemented by
517 subclasses to provide ingest support instead of implementing `ingest`
518 directly.
520 `_prepIngest` should not modify the data repository or given files in
521 any way; all changes should be deferred to `_finishIngest`.
523 When possible, exceptions should be raised in `_prepIngest` instead of
524 `_finishIngest`. `NotImplementedError` exceptions that indicate that
525 the transfer mode is not supported must be raised by `_prepIngest`
526 instead of `_finishIngest`.
527 """
528 raise NotImplementedError(f"Datastore {self} does not support direct file-based ingest.")
530 def _finishIngest(self, prepData: IngestPrepData, *, transfer: Optional[str] = None) -> None:
531 """Complete an ingest operation.
533 Parameters
534 ----------
535 data : `IngestPrepData`
536 An instance of a subclass of `IngestPrepData`. Guaranteed to be
537 the direct result of a call to `_prepIngest` on this datastore.
538 transfer : `str`, optional
539 How (and whether) the dataset should be added to the datastore.
540 See `ingest` for details of transfer modes.
542 Raises
543 ------
544 FileNotFoundError
545 Raised if one of the given files does not exist.
546 FileExistsError
547 Raised if transfer is not `None` but the (internal) location the
548 file would be moved to is already occupied.
550 Notes
551 -----
552 This method (along with `_prepIngest`) should be implemented by
553 subclasses to provide ingest support instead of implementing `ingest`
554 directly.
555 """
556 raise NotImplementedError(f"Datastore {self} does not support direct file-based ingest.")
558 def ingest(self, *datasets: FileDataset, transfer: Optional[str] = None) -> None:
559 """Ingest one or more files into the datastore.
561 Parameters
562 ----------
563 datasets : `FileDataset`
564 Each positional argument is a struct containing information about
565 a file to be ingested, including its path (either absolute or
566 relative to the datastore root, if applicable), a complete
567 `DatasetRef` (with ``dataset_id not None``), and optionally a
568 formatter class or its fully-qualified string name. If a formatter
569 is not provided, the one the datastore would use for ``put`` on
570 that dataset is assumed.
571 transfer : `str`, optional
572 How (and whether) the dataset should be added to the datastore.
573 If `None` (default), the file must already be in a location
574 appropriate for the datastore (e.g. within its root directory),
575 and will not be modified. Other choices include "move", "copy",
576 "link", "symlink", "relsymlink", and "hardlink". "link" is a
577 special transfer mode that will first try to make a hardlink and
578 if that fails a symlink will be used instead. "relsymlink" creates
579 a relative symlink rather than use an absolute path.
580 Most datastores do not support all transfer modes.
581 "auto" is a special option that will let the
582 data store choose the most natural option for itself.
584 Raises
585 ------
586 NotImplementedError
587 Raised if the datastore does not support the given transfer mode
588 (including the case where ingest is not supported at all).
589 DatasetTypeNotSupportedError
590 Raised if one or more files to be ingested have a dataset type that
591 is not supported by the datastore.
592 FileNotFoundError
593 Raised if one of the given files does not exist.
594 FileExistsError
595 Raised if transfer is not `None` but the (internal) location the
596 file would be moved to is already occupied.
598 Notes
599 -----
600 Subclasses should implement `_prepIngest` and `_finishIngest` instead
601 of implementing `ingest` directly. Datastores that hold and
602 delegate to child datastores may want to call those methods as well.
604 Subclasses are encouraged to document their supported transfer modes
605 in their class documentation.
606 """
607 # Allow a datastore to select a default transfer mode
608 transfer = self._overrideTransferMode(*datasets, transfer=transfer)
609 prepData = self._prepIngest(*datasets, transfer=transfer)
610 refs = {ref.id: ref for dataset in datasets for ref in dataset.refs}
611 if None in refs:
612 # Find the file for the error message. There may be multiple
613 # bad refs so look for all of them.
614 unresolved_paths = {}
615 for dataset in datasets:
616 unresolved = []
617 for ref in dataset.refs:
618 if ref.id is None:
619 unresolved.append(ref)
620 if unresolved:
621 unresolved_paths[dataset.path] = unresolved
622 raise RuntimeError(
623 "Attempt to ingest unresolved DatasetRef from: "
624 + ",".join(f"{p}: ({[str(r) for r in ref]})" for p, ref in unresolved_paths.items())
625 )
626 if refs.keys() != prepData.refs.keys():
627 unsupported = refs.keys() - prepData.refs.keys()
628 # Group unsupported refs by DatasetType for an informative
629 # but still concise error message.
630 byDatasetType = defaultdict(list)
631 for datasetId in unsupported:
632 ref = refs[datasetId]
633 byDatasetType[ref.datasetType].append(ref)
634 raise DatasetTypeNotSupportedError(
635 "DatasetType(s) not supported in ingest: "
636 + ", ".join(f"{k.name} ({len(v)} dataset(s))" for k, v in byDatasetType.items())
637 )
638 self._finishIngest(prepData, transfer=transfer)
640 def transfer_from(
641 self,
642 source_datastore: Datastore,
643 refs: Iterable[DatasetRef],
644 local_refs: Optional[Iterable[DatasetRef]] = None,
645 transfer: str = "auto",
646 artifact_existence: Optional[Dict[ResourcePath, bool]] = None,
647 ) -> None:
648 """Transfer dataset artifacts from another datastore to this one.
650 Parameters
651 ----------
652 source_datastore : `Datastore`
653 The datastore from which to transfer artifacts. That datastore
654 must be compatible with this datastore receiving the artifacts.
655 refs : iterable of `DatasetRef`
656 The datasets to transfer from the source datastore.
657 local_refs : iterable of `DatasetRef`, optional
658 The dataset refs associated with the registry associated with
659 this datastore. Can be `None` if the source and target datastore
660 are using UUIDs.
661 transfer : `str`, optional
662 How (and whether) the dataset should be added to the datastore.
663 Choices include "move", "copy",
664 "link", "symlink", "relsymlink", and "hardlink". "link" is a
665 special transfer mode that will first try to make a hardlink and
666 if that fails a symlink will be used instead. "relsymlink" creates
667 a relative symlink rather than use an absolute path.
668 Most datastores do not support all transfer modes.
669 "auto" (the default) is a special option that will let the
670 data store choose the most natural option for itself.
671 If the source location and transfer location are identical the
672 transfer mode will be ignored.
673 artifact_existence : `dict` [`lsst.resources.ResourcePath`, `bool`]
674 Optional mapping of datastore artifact to existence. Updated by
675 this method with details of all artifacts tested. Can be `None`
676 if the caller is not interested.
678 Raises
679 ------
680 TypeError
681 Raised if the two datastores are not compatible.
682 """
683 if type(self) is not type(source_datastore):
684 raise TypeError(
685 f"Datastore mismatch between this datastore ({type(self)}) and the "
686 f"source datastore ({type(source_datastore)})."
687 )
689 raise NotImplementedError(f"Datastore {type(self)} must implement a transfer_from method.")
691 @abstractmethod
692 def getURIs(
693 self, datasetRef: DatasetRef, predict: bool = False
694 ) -> Tuple[Optional[ResourcePath], Dict[str, ResourcePath]]:
695 """Return URIs associated with dataset.
697 Parameters
698 ----------
699 ref : `DatasetRef`
700 Reference to the required dataset.
701 predict : `bool`, optional
702 If the datastore does not know about the dataset, should it
703 return a predicted URI or not?
705 Returns
706 -------
707 primary : `lsst.resources.ResourcePath`
708 The URI to the primary artifact associated with this dataset.
709 If the dataset was disassembled within the datastore this
710 may be `None`.
711 components : `dict`
712 URIs to any components associated with the dataset artifact.
713 Can be empty if there are no components.
714 """
715 raise NotImplementedError()
717 @abstractmethod
718 def getURI(self, datasetRef: DatasetRef, predict: bool = False) -> ResourcePath:
719 """URI to the Dataset.
721 Parameters
722 ----------
723 datasetRef : `DatasetRef`
724 Reference to the required Dataset.
725 predict : `bool`
726 If `True` attempt to predict the URI for a dataset if it does
727 not exist in datastore.
729 Returns
730 -------
731 uri : `str`
732 URI string pointing to the Dataset within the datastore. If the
733 Dataset does not exist in the datastore, the URI may be a guess.
734 If the datastore does not have entities that relate well
735 to the concept of a URI the returned URI string will be
736 descriptive. The returned URI is not guaranteed to be obtainable.
738 Raises
739 ------
740 FileNotFoundError
741 A URI has been requested for a dataset that does not exist and
742 guessing is not allowed.
743 """
744 raise NotImplementedError("Must be implemented by subclass")
746 @abstractmethod
747 def retrieveArtifacts(
748 self,
749 refs: Iterable[DatasetRef],
750 destination: ResourcePath,
751 transfer: str = "auto",
752 preserve_path: bool = True,
753 overwrite: bool = False,
754 ) -> List[ResourcePath]:
755 """Retrieve the artifacts associated with the supplied refs.
757 Parameters
758 ----------
759 refs : iterable of `DatasetRef`
760 The datasets for which artifacts are to be retrieved.
761 A single ref can result in multiple artifacts. The refs must
762 be resolved.
763 destination : `lsst.resources.ResourcePath`
764 Location to write the artifacts.
765 transfer : `str`, optional
766 Method to use to transfer the artifacts. Must be one of the options
767 supported by `lsst.resources.ResourcePath.transfer_from()`.
768 "move" is not allowed.
769 preserve_path : `bool`, optional
770 If `True` the full path of the artifact within the datastore
771 is preserved. If `False` the final file component of the path
772 is used.
773 overwrite : `bool`, optional
774 If `True` allow transfers to overwrite existing files at the
775 destination.
777 Returns
778 -------
779 targets : `list` of `lsst.resources.ResourcePath`
780 URIs of file artifacts in destination location. Order is not
781 preserved.
783 Notes
784 -----
785 For non-file datastores the artifacts written to the destination
786 may not match the representation inside the datastore. For example
787 a hierarchichal data structure in a NoSQL database may well be stored
788 as a JSON file.
789 """
790 raise NotImplementedError()
792 @abstractmethod
793 def remove(self, datasetRef: DatasetRef) -> None:
794 """Indicate to the Datastore that a Dataset can be removed.
796 Parameters
797 ----------
798 datasetRef : `DatasetRef`
799 Reference to the required Dataset.
801 Raises
802 ------
803 FileNotFoundError
804 When Dataset does not exist.
806 Notes
807 -----
808 Some Datastores may implement this method as a silent no-op to
809 disable Dataset deletion through standard interfaces.
810 """
811 raise NotImplementedError("Must be implemented by subclass")
813 @abstractmethod
814 def forget(self, refs: Iterable[DatasetRef]) -> None:
815 """Indicate to the Datastore that it should remove all records of the
816 given datasets, without actually deleting them.
818 Parameters
819 ----------
820 refs : `Iterable` [ `DatasetRef` ]
821 References to the datasets being forgotten.
823 Notes
824 -----
825 Asking a datastore to forget a `DatasetRef` it does not hold should be
826 a silent no-op, not an error.
827 """
828 raise NotImplementedError("Must be implemented by subclass")
830 @abstractmethod
831 def trash(self, ref: Union[DatasetRef, Iterable[DatasetRef]], ignore_errors: bool = True) -> None:
832 """Indicate to the Datastore that a Dataset can be moved to the trash.
834 Parameters
835 ----------
836 ref : `DatasetRef` or iterable thereof
837 Reference(s) to the required Dataset.
838 ignore_errors : `bool`, optional
839 Determine whether errors should be ignored. When multiple
840 refs are being trashed there will be no per-ref check.
842 Raises
843 ------
844 FileNotFoundError
845 When Dataset does not exist and errors are not ignored. Only
846 checked if a single ref is supplied (and not in a list).
848 Notes
849 -----
850 Some Datastores may implement this method as a silent no-op to
851 disable Dataset deletion through standard interfaces.
852 """
853 raise NotImplementedError("Must be implemented by subclass")
855 @abstractmethod
856 def emptyTrash(self, ignore_errors: bool = True) -> None:
857 """Remove all datasets from the trash.
859 Parameters
860 ----------
861 ignore_errors : `bool`, optional
862 Determine whether errors should be ignored.
864 Notes
865 -----
866 Some Datastores may implement this method as a silent no-op to
867 disable Dataset deletion through standard interfaces.
868 """
869 raise NotImplementedError("Must be implemented by subclass")
871 @abstractmethod
872 def transfer(self, inputDatastore: Datastore, datasetRef: DatasetRef) -> None:
873 """Transfer a dataset from another datastore to this datastore.
875 Parameters
876 ----------
877 inputDatastore : `Datastore`
878 The external `Datastore` from which to retrieve the Dataset.
879 datasetRef : `DatasetRef`
880 Reference to the required Dataset.
881 """
882 raise NotImplementedError("Must be implemented by subclass")
884 def export(
885 self, refs: Iterable[DatasetRef], *, directory: Optional[str] = None, transfer: Optional[str] = None
886 ) -> Iterable[FileDataset]:
887 """Export datasets for transfer to another data repository.
889 Parameters
890 ----------
891 refs : iterable of `DatasetRef`
892 Dataset references to be exported.
893 directory : `str`, optional
894 Path to a directory that should contain files corresponding to
895 output datasets. Ignored if ``transfer`` is `None`.
896 transfer : `str`, optional
897 Mode that should be used to move datasets out of the repository.
898 Valid options are the same as those of the ``transfer`` argument
899 to ``ingest``, and datastores may similarly signal that a transfer
900 mode is not supported by raising `NotImplementedError`.
902 Returns
903 -------
904 dataset : iterable of `DatasetTransfer`
905 Structs containing information about the exported datasets, in the
906 same order as ``refs``.
908 Raises
909 ------
910 NotImplementedError
911 Raised if the given transfer mode is not supported.
912 """
913 raise NotImplementedError(f"Transfer mode {transfer} not supported.")
915 @abstractmethod
916 def validateConfiguration(
917 self, entities: Iterable[Union[DatasetRef, DatasetType, StorageClass]], logFailures: bool = False
918 ) -> None:
919 """Validate some of the configuration for this datastore.
921 Parameters
922 ----------
923 entities : iterable of `DatasetRef`, `DatasetType`, or `StorageClass`
924 Entities to test against this configuration. Can be differing
925 types.
926 logFailures : `bool`, optional
927 If `True`, output a log message for every validation error
928 detected.
930 Raises
931 ------
932 DatastoreValidationError
933 Raised if there is a validation problem with a configuration.
935 Notes
936 -----
937 Which parts of the configuration are validated is at the discretion
938 of each Datastore implementation.
939 """
940 raise NotImplementedError("Must be implemented by subclass")
942 @abstractmethod
943 def validateKey(self, lookupKey: LookupKey, entity: Union[DatasetRef, DatasetType, StorageClass]) -> None:
944 """Validate a specific look up key with supplied entity.
946 Parameters
947 ----------
948 lookupKey : `LookupKey`
949 Key to use to retrieve information from the datastore
950 configuration.
951 entity : `DatasetRef`, `DatasetType`, or `StorageClass`
952 Entity to compare with configuration retrieved using the
953 specified lookup key.
955 Raises
956 ------
957 DatastoreValidationError
958 Raised if there is a problem with the combination of entity
959 and lookup key.
961 Notes
962 -----
963 Bypasses the normal selection priorities by allowing a key that
964 would normally not be selected to be validated.
965 """
966 raise NotImplementedError("Must be implemented by subclass")
968 @abstractmethod
969 def getLookupKeys(self) -> Set[LookupKey]:
970 """Return all the lookup keys relevant to this datastore.
972 Returns
973 -------
974 keys : `set` of `LookupKey`
975 The keys stored internally for looking up information based
976 on `DatasetType` name or `StorageClass`.
977 """
978 raise NotImplementedError("Must be implemented by subclass")
980 def needs_expanded_data_ids(
981 self,
982 transfer: Optional[str],
983 entity: Optional[Union[DatasetRef, DatasetType, StorageClass]] = None,
984 ) -> bool:
985 """Test whether this datastore needs expanded data IDs to ingest.
987 Parameters
988 ----------
989 transfer : `str` or `None`
990 Transfer mode for ingest.
991 entity, optional
992 Object representing what will be ingested. If not provided (or not
993 specific enough), `True` may be returned even if expanded data
994 IDs aren't necessary.
996 Returns
997 -------
998 needed : `bool`
999 If `True`, expanded data IDs may be needed. `False` only if
1000 expansion definitely isn't necessary.
1001 """
1002 return True