Coverage for python/lsst/daf/butler/core/datastore.py: 45%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Support for generic data stores."""
24from __future__ import annotations
26__all__ = ("DatastoreConfig", "Datastore", "DatastoreValidationError")
28import contextlib
29import logging
30from abc import ABCMeta, abstractmethod
31from collections import defaultdict
32from dataclasses import dataclass
33from typing import (
34 TYPE_CHECKING,
35 Any,
36 Callable,
37 ClassVar,
38 Dict,
39 Iterable,
40 Iterator,
41 List,
42 Mapping,
43 Optional,
44 Set,
45 Tuple,
46 Type,
47 Union,
48)
50from lsst.utils import doImportType
52from .config import Config, ConfigSubset
53from .constraints import Constraints
54from .exceptions import DatasetTypeNotSupportedError, ValidationError
55from .fileDataset import FileDataset
56from .storageClass import StorageClassFactory
58if TYPE_CHECKING: 58 ↛ 59line 58 didn't jump to line 59, because the condition on line 58 was never true
59 from lsst.resources import ResourcePath, ResourcePathExpression
61 from ..registry.interfaces import DatastoreRegistryBridgeManager
62 from .configSupport import LookupKey
63 from .datasets import DatasetRef, DatasetType
64 from .storageClass import StorageClass
67class DatastoreConfig(ConfigSubset):
68 """Configuration for Datastores."""
70 component = "datastore"
71 requiredKeys = ("cls",)
72 defaultConfigFile = "datastore.yaml"
75class DatastoreValidationError(ValidationError):
76 """There is a problem with the Datastore configuration."""
78 pass
81@dataclass(frozen=True)
82class Event:
83 __slots__ = {"name", "undoFunc", "args", "kwargs"}
84 name: str
85 undoFunc: Callable
86 args: tuple
87 kwargs: dict
90class IngestPrepData:
91 """A helper base class for `Datastore` ingest implementations.
93 Datastore implementations will generally need a custom implementation of
94 this class.
96 Should be accessed as ``Datastore.IngestPrepData`` instead of via direct
97 import.
99 Parameters
100 ----------
101 refs : iterable of `DatasetRef`
102 References for the datasets that can be ingested by this datastore.
103 """
105 def __init__(self, refs: Iterable[DatasetRef]):
106 self.refs = {ref.id: ref for ref in refs}
109class DatastoreTransaction:
110 """Keeps a log of `Datastore` activity and allow rollback.
112 Parameters
113 ----------
114 parent : `DatastoreTransaction`, optional
115 The parent transaction (if any)
116 """
118 Event: ClassVar[Type] = Event
120 parent: Optional["DatastoreTransaction"]
121 """The parent transaction. (`DatastoreTransaction`, optional)"""
123 def __init__(self, parent: Optional[DatastoreTransaction] = None):
124 self.parent = parent
125 self._log: List[Event] = []
127 def registerUndo(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> None:
128 """Register event with undo function.
130 Parameters
131 ----------
132 name : `str`
133 Name of the event.
134 undoFunc : func
135 Function to undo this event.
136 args : `tuple`
137 Positional arguments to `undoFunc`.
138 **kwargs
139 Keyword arguments to `undoFunc`.
140 """
141 self._log.append(self.Event(name, undoFunc, args, kwargs))
143 @contextlib.contextmanager
144 def undoWith(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> Iterator[None]:
145 """Register undo function if nested operation succeeds.
147 Calls `registerUndo`.
149 This can be used to wrap individual undo-able statements within a
150 DatastoreTransaction block. Multiple statements that can fail
151 separately should not be part of the same `undoWith` block.
153 All arguments are forwarded directly to `registerUndo`.
154 """
155 try:
156 yield None
157 except BaseException:
158 raise
159 else:
160 self.registerUndo(name, undoFunc, *args, **kwargs)
162 def rollback(self) -> None:
163 """Roll back all events in this transaction."""
164 log = logging.getLogger(__name__)
165 while self._log:
166 ev = self._log.pop()
167 try:
168 log.debug(
169 "Rolling back transaction: %s: %s(%s,%s)",
170 ev.name,
171 ev.undoFunc,
172 ",".join(str(a) for a in ev.args),
173 ",".join(f"{k}={v}" for k, v in ev.kwargs.items()),
174 )
175 except Exception:
176 # In case we had a problem in stringification of arguments
177 log.warning("Rolling back transaction: %s", ev.name)
178 try:
179 ev.undoFunc(*ev.args, **ev.kwargs)
180 except BaseException as e:
181 # Deliberately swallow error that may occur in unrolling
182 log.warning("Exception: %s caught while unrolling: %s", e, ev.name)
183 pass
185 def commit(self) -> None:
186 """Commit this transaction."""
187 if self.parent is None:
188 # Just forget about the events, they have already happened.
189 return
190 else:
191 # We may still want to events from this transaction as part of
192 # the parent.
193 self.parent._log.extend(self._log)
196class Datastore(metaclass=ABCMeta):
197 """Datastore interface.
199 Parameters
200 ----------
201 config : `DatastoreConfig` or `str`
202 Load configuration either from an existing config instance or by
203 referring to a configuration file.
204 bridgeManager : `DatastoreRegistryBridgeManager`
205 Object that manages the interface between `Registry` and datastores.
206 butlerRoot : `str`, optional
207 New datastore root to use to override the configuration value.
208 """
210 defaultConfigFile: ClassVar[Optional[str]] = None
211 """Path to configuration defaults. Accessed within the ``config`` resource
212 or relative to a search path. Can be None if no defaults specified.
213 """
215 containerKey: ClassVar[Optional[str]] = None
216 """Name of the key containing a list of subconfigurations that also
217 need to be merged with defaults and will likely use different Python
218 datastore classes (but all using DatastoreConfig). Assumed to be a
219 list of configurations that can be represented in a DatastoreConfig
220 and containing a "cls" definition. None indicates that no containers
221 are expected in this Datastore."""
223 isEphemeral: bool = False
224 """Indicate whether this Datastore is ephemeral or not. An ephemeral
225 datastore is one where the contents of the datastore will not exist
226 across process restarts. This value can change per-instance."""
228 config: DatastoreConfig
229 """Configuration used to create Datastore."""
231 name: str
232 """Label associated with this Datastore."""
234 storageClassFactory: StorageClassFactory
235 """Factory for creating storage class instances from name."""
237 constraints: Constraints
238 """Constraints to apply when putting datasets into the datastore."""
240 # MyPy does not like for this to be annotated as any kind of type, because
241 # it can't do static checking on type variables that can change at runtime.
242 IngestPrepData: ClassVar[Any] = IngestPrepData
243 """Helper base class for ingest implementations.
244 """
246 @classmethod
247 @abstractmethod
248 def setConfigRoot(cls, root: str, config: Config, full: Config, overwrite: bool = True) -> None:
249 """Set filesystem-dependent config options for this datastore.
251 The options will be appropriate for a new empty repository with the
252 given root.
254 Parameters
255 ----------
256 root : `str`
257 Filesystem path to the root of the data repository.
258 config : `Config`
259 A `Config` to update. Only the subset understood by
260 this component will be updated. Will not expand
261 defaults.
262 full : `Config`
263 A complete config with all defaults expanded that can be
264 converted to a `DatastoreConfig`. Read-only and will not be
265 modified by this method.
266 Repository-specific options that should not be obtained
267 from defaults when Butler instances are constructed
268 should be copied from ``full`` to ``config``.
269 overwrite : `bool`, optional
270 If `False`, do not modify a value in ``config`` if the value
271 already exists. Default is always to overwrite with the provided
272 ``root``.
274 Notes
275 -----
276 If a keyword is explicitly defined in the supplied ``config`` it
277 will not be overridden by this method if ``overwrite`` is `False`.
278 This allows explicit values set in external configs to be retained.
279 """
280 raise NotImplementedError()
282 @staticmethod
283 def fromConfig(
284 config: Config,
285 bridgeManager: DatastoreRegistryBridgeManager,
286 butlerRoot: Optional[ResourcePathExpression] = None,
287 ) -> "Datastore":
288 """Create datastore from type specified in config file.
290 Parameters
291 ----------
292 config : `Config`
293 Configuration instance.
294 bridgeManager : `DatastoreRegistryBridgeManager`
295 Object that manages the interface between `Registry` and
296 datastores.
297 butlerRoot : `str`, optional
298 Butler root directory.
299 """
300 cls = doImportType(config["datastore", "cls"])
301 if not issubclass(cls, Datastore):
302 raise TypeError(f"Imported child class {config['datastore', 'cls']} is not a Datastore")
303 return cls(config=config, bridgeManager=bridgeManager, butlerRoot=butlerRoot)
305 def __init__(
306 self,
307 config: Union[Config, str],
308 bridgeManager: DatastoreRegistryBridgeManager,
309 butlerRoot: Optional[ResourcePathExpression] = None,
310 ):
311 self.config = DatastoreConfig(config)
312 self.name = "ABCDataStore"
313 self._transaction: Optional[DatastoreTransaction] = None
315 # All Datastores need storage classes and constraints
316 self.storageClassFactory = StorageClassFactory()
318 # And read the constraints list
319 constraintsConfig = self.config.get("constraints")
320 self.constraints = Constraints(constraintsConfig, universe=bridgeManager.universe)
322 def __str__(self) -> str:
323 return self.name
325 def __repr__(self) -> str:
326 return self.name
328 @property
329 def names(self) -> Tuple[str, ...]:
330 """Names associated with this datastore returned as a list.
332 Can be different to ``name`` for a chaining datastore.
333 """
334 # Default implementation returns solely the name itself
335 return (self.name,)
337 @contextlib.contextmanager
338 def transaction(self) -> Iterator[DatastoreTransaction]:
339 """Context manager supporting `Datastore` transactions.
341 Transactions can be nested, and are to be used in combination with
342 `Registry.transaction`.
343 """
344 self._transaction = DatastoreTransaction(self._transaction)
345 try:
346 yield self._transaction
347 except BaseException:
348 self._transaction.rollback()
349 raise
350 else:
351 self._transaction.commit()
352 self._transaction = self._transaction.parent
354 @abstractmethod
355 def knows(self, ref: DatasetRef) -> bool:
356 """Check if the dataset is known to the datastore.
358 Does not check for existence of any artifact.
360 Parameters
361 ----------
362 ref : `DatasetRef`
363 Reference to the required dataset.
365 Returns
366 -------
367 exists : `bool`
368 `True` if the dataset is known to the datastore.
369 """
370 raise NotImplementedError()
372 def mexists(
373 self, refs: Iterable[DatasetRef], artifact_existence: Optional[Dict[ResourcePath, bool]] = None
374 ) -> Dict[DatasetRef, bool]:
375 """Check the existence of multiple datasets at once.
377 Parameters
378 ----------
379 refs : iterable of `DatasetRef`
380 The datasets to be checked.
381 artifact_existence : `dict` [`lsst.resources.ResourcePath`, `bool`]
382 Optional mapping of datastore artifact to existence. Updated by
383 this method with details of all artifacts tested. Can be `None`
384 if the caller is not interested.
386 Returns
387 -------
388 existence : `dict` of [`DatasetRef`, `bool`]
389 Mapping from dataset to boolean indicating existence.
390 """
391 existence: Dict[DatasetRef, bool] = {}
392 # Non-optimized default.
393 for ref in refs:
394 existence[ref] = self.exists(ref)
395 return existence
397 @abstractmethod
398 def exists(self, datasetRef: DatasetRef) -> bool:
399 """Check if the dataset exists in the datastore.
401 Parameters
402 ----------
403 datasetRef : `DatasetRef`
404 Reference to the required dataset.
406 Returns
407 -------
408 exists : `bool`
409 `True` if the entity exists in the `Datastore`.
410 """
411 raise NotImplementedError("Must be implemented by subclass")
413 @abstractmethod
414 def get(self, datasetRef: DatasetRef, parameters: Mapping[str, Any] = None) -> Any:
415 """Load an `InMemoryDataset` from the store.
417 Parameters
418 ----------
419 datasetRef : `DatasetRef`
420 Reference to the required Dataset.
421 parameters : `dict`
422 `StorageClass`-specific parameters that specify a slice of the
423 Dataset to be loaded.
425 Returns
426 -------
427 inMemoryDataset : `object`
428 Requested Dataset or slice thereof as an InMemoryDataset.
429 """
430 raise NotImplementedError("Must be implemented by subclass")
432 @abstractmethod
433 def put(self, inMemoryDataset: Any, datasetRef: DatasetRef) -> None:
434 """Write a `InMemoryDataset` with a given `DatasetRef` to the store.
436 Parameters
437 ----------
438 inMemoryDataset : `object`
439 The Dataset to store.
440 datasetRef : `DatasetRef`
441 Reference to the associated Dataset.
442 """
443 raise NotImplementedError("Must be implemented by subclass")
445 def _overrideTransferMode(self, *datasets: FileDataset, transfer: Optional[str] = None) -> Optional[str]:
446 """Allow ingest transfer mode to be defaulted based on datasets.
448 Parameters
449 ----------
450 datasets : `FileDataset`
451 Each positional argument is a struct containing information about
452 a file to be ingested, including its path (either absolute or
453 relative to the datastore root, if applicable), a complete
454 `DatasetRef` (with ``dataset_id not None``), and optionally a
455 formatter class or its fully-qualified string name. If a formatter
456 is not provided, this method should populate that attribute with
457 the formatter the datastore would use for `put`. Subclasses are
458 also permitted to modify the path attribute (typically to put it
459 in what the datastore considers its standard form).
460 transfer : `str`, optional
461 How (and whether) the dataset should be added to the datastore.
462 See `ingest` for details of transfer modes.
464 Returns
465 -------
466 newTransfer : `str`
467 Transfer mode to use. Will be identical to the supplied transfer
468 mode unless "auto" is used.
469 """
470 if transfer != "auto":
471 return transfer
472 raise RuntimeError(f"{transfer} is not allowed without specialization.")
474 def _prepIngest(self, *datasets: FileDataset, transfer: Optional[str] = None) -> IngestPrepData:
475 """Process datasets to identify which ones can be ingested.
477 Parameters
478 ----------
479 datasets : `FileDataset`
480 Each positional argument is a struct containing information about
481 a file to be ingested, including its path (either absolute or
482 relative to the datastore root, if applicable), a complete
483 `DatasetRef` (with ``dataset_id not None``), and optionally a
484 formatter class or its fully-qualified string name. If a formatter
485 is not provided, this method should populate that attribute with
486 the formatter the datastore would use for `put`. Subclasses are
487 also permitted to modify the path attribute (typically to put it
488 in what the datastore considers its standard form).
489 transfer : `str`, optional
490 How (and whether) the dataset should be added to the datastore.
491 See `ingest` for details of transfer modes.
493 Returns
494 -------
495 data : `IngestPrepData`
496 An instance of a subclass of `IngestPrepData`, used to pass
497 arbitrary data from `_prepIngest` to `_finishIngest`. This should
498 include only the datasets this datastore can actually ingest;
499 others should be silently ignored (`Datastore.ingest` will inspect
500 `IngestPrepData.refs` and raise `DatasetTypeNotSupportedError` if
501 necessary).
503 Raises
504 ------
505 NotImplementedError
506 Raised if the datastore does not support the given transfer mode
507 (including the case where ingest is not supported at all).
508 FileNotFoundError
509 Raised if one of the given files does not exist.
510 FileExistsError
511 Raised if transfer is not `None` but the (internal) location the
512 file would be moved to is already occupied.
514 Notes
515 -----
516 This method (along with `_finishIngest`) should be implemented by
517 subclasses to provide ingest support instead of implementing `ingest`
518 directly.
520 `_prepIngest` should not modify the data repository or given files in
521 any way; all changes should be deferred to `_finishIngest`.
523 When possible, exceptions should be raised in `_prepIngest` instead of
524 `_finishIngest`. `NotImplementedError` exceptions that indicate that
525 the transfer mode is not supported must be raised by `_prepIngest`
526 instead of `_finishIngest`.
527 """
528 raise NotImplementedError(f"Datastore {self} does not support direct file-based ingest.")
530 def _finishIngest(
531 self, prepData: IngestPrepData, *, transfer: Optional[str] = None, record_validation_info: bool = True
532 ) -> None:
533 """Complete an ingest operation.
535 Parameters
536 ----------
537 data : `IngestPrepData`
538 An instance of a subclass of `IngestPrepData`. Guaranteed to be
539 the direct result of a call to `_prepIngest` on this datastore.
540 transfer : `str`, optional
541 How (and whether) the dataset should be added to the datastore.
542 See `ingest` for details of transfer modes.
543 record_validation_info : `bool`, optional
544 If `True`, the default, the datastore can record validation
545 information associated with the file. If `False` the datastore
546 will not attempt to track any information such as checksums
547 or file sizes. This can be useful if such information is tracked
548 in an external system or if the file is to be compressed in place.
549 It is up to the datastore whether this parameter is relevant.
551 Raises
552 ------
553 FileNotFoundError
554 Raised if one of the given files does not exist.
555 FileExistsError
556 Raised if transfer is not `None` but the (internal) location the
557 file would be moved to is already occupied.
559 Notes
560 -----
561 This method (along with `_prepIngest`) should be implemented by
562 subclasses to provide ingest support instead of implementing `ingest`
563 directly.
564 """
565 raise NotImplementedError(f"Datastore {self} does not support direct file-based ingest.")
567 def ingest(
568 self, *datasets: FileDataset, transfer: Optional[str] = None, record_validation_info: bool = True
569 ) -> None:
570 """Ingest one or more files into the datastore.
572 Parameters
573 ----------
574 datasets : `FileDataset`
575 Each positional argument is a struct containing information about
576 a file to be ingested, including its path (either absolute or
577 relative to the datastore root, if applicable), a complete
578 `DatasetRef` (with ``dataset_id not None``), and optionally a
579 formatter class or its fully-qualified string name. If a formatter
580 is not provided, the one the datastore would use for ``put`` on
581 that dataset is assumed.
582 transfer : `str`, optional
583 How (and whether) the dataset should be added to the datastore.
584 If `None` (default), the file must already be in a location
585 appropriate for the datastore (e.g. within its root directory),
586 and will not be modified. Other choices include "move", "copy",
587 "link", "symlink", "relsymlink", and "hardlink". "link" is a
588 special transfer mode that will first try to make a hardlink and
589 if that fails a symlink will be used instead. "relsymlink" creates
590 a relative symlink rather than use an absolute path.
591 Most datastores do not support all transfer modes.
592 "auto" is a special option that will let the
593 data store choose the most natural option for itself.
594 record_validation_info : `bool`, optional
595 If `True`, the default, the datastore can record validation
596 information associated with the file. If `False` the datastore
597 will not attempt to track any information such as checksums
598 or file sizes. This can be useful if such information is tracked
599 in an external system or if the file is to be compressed in place.
600 It is up to the datastore whether this parameter is relevant.
602 Raises
603 ------
604 NotImplementedError
605 Raised if the datastore does not support the given transfer mode
606 (including the case where ingest is not supported at all).
607 DatasetTypeNotSupportedError
608 Raised if one or more files to be ingested have a dataset type that
609 is not supported by the datastore.
610 FileNotFoundError
611 Raised if one of the given files does not exist.
612 FileExistsError
613 Raised if transfer is not `None` but the (internal) location the
614 file would be moved to is already occupied.
616 Notes
617 -----
618 Subclasses should implement `_prepIngest` and `_finishIngest` instead
619 of implementing `ingest` directly. Datastores that hold and
620 delegate to child datastores may want to call those methods as well.
622 Subclasses are encouraged to document their supported transfer modes
623 in their class documentation.
624 """
625 # Allow a datastore to select a default transfer mode
626 transfer = self._overrideTransferMode(*datasets, transfer=transfer)
627 prepData = self._prepIngest(*datasets, transfer=transfer)
628 refs = {ref.id: ref for dataset in datasets for ref in dataset.refs}
629 if None in refs:
630 # Find the file for the error message. There may be multiple
631 # bad refs so look for all of them.
632 unresolved_paths = {}
633 for dataset in datasets:
634 unresolved = []
635 for ref in dataset.refs:
636 if ref.id is None:
637 unresolved.append(ref)
638 if unresolved:
639 unresolved_paths[dataset.path] = unresolved
640 raise RuntimeError(
641 "Attempt to ingest unresolved DatasetRef from: "
642 + ",".join(f"{p}: ({[str(r) for r in ref]})" for p, ref in unresolved_paths.items())
643 )
644 if refs.keys() != prepData.refs.keys():
645 unsupported = refs.keys() - prepData.refs.keys()
646 # Group unsupported refs by DatasetType for an informative
647 # but still concise error message.
648 byDatasetType = defaultdict(list)
649 for datasetId in unsupported:
650 ref = refs[datasetId]
651 byDatasetType[ref.datasetType].append(ref)
652 raise DatasetTypeNotSupportedError(
653 "DatasetType(s) not supported in ingest: "
654 + ", ".join(f"{k.name} ({len(v)} dataset(s))" for k, v in byDatasetType.items())
655 )
656 self._finishIngest(prepData, transfer=transfer, record_validation_info=record_validation_info)
658 def transfer_from(
659 self,
660 source_datastore: Datastore,
661 refs: Iterable[DatasetRef],
662 local_refs: Optional[Iterable[DatasetRef]] = None,
663 transfer: str = "auto",
664 artifact_existence: Optional[Dict[ResourcePath, bool]] = None,
665 ) -> None:
666 """Transfer dataset artifacts from another datastore to this one.
668 Parameters
669 ----------
670 source_datastore : `Datastore`
671 The datastore from which to transfer artifacts. That datastore
672 must be compatible with this datastore receiving the artifacts.
673 refs : iterable of `DatasetRef`
674 The datasets to transfer from the source datastore.
675 local_refs : iterable of `DatasetRef`, optional
676 The dataset refs associated with the registry associated with
677 this datastore. Can be `None` if the source and target datastore
678 are using UUIDs.
679 transfer : `str`, optional
680 How (and whether) the dataset should be added to the datastore.
681 Choices include "move", "copy",
682 "link", "symlink", "relsymlink", and "hardlink". "link" is a
683 special transfer mode that will first try to make a hardlink and
684 if that fails a symlink will be used instead. "relsymlink" creates
685 a relative symlink rather than use an absolute path.
686 Most datastores do not support all transfer modes.
687 "auto" (the default) is a special option that will let the
688 data store choose the most natural option for itself.
689 If the source location and transfer location are identical the
690 transfer mode will be ignored.
691 artifact_existence : `dict` [`lsst.resources.ResourcePath`, `bool`]
692 Optional mapping of datastore artifact to existence. Updated by
693 this method with details of all artifacts tested. Can be `None`
694 if the caller is not interested.
696 Raises
697 ------
698 TypeError
699 Raised if the two datastores are not compatible.
700 """
701 if type(self) is not type(source_datastore):
702 raise TypeError(
703 f"Datastore mismatch between this datastore ({type(self)}) and the "
704 f"source datastore ({type(source_datastore)})."
705 )
707 raise NotImplementedError(f"Datastore {type(self)} must implement a transfer_from method.")
709 @abstractmethod
710 def getURIs(
711 self, datasetRef: DatasetRef, predict: bool = False
712 ) -> Tuple[Optional[ResourcePath], Dict[str, ResourcePath]]:
713 """Return URIs associated with dataset.
715 Parameters
716 ----------
717 ref : `DatasetRef`
718 Reference to the required dataset.
719 predict : `bool`, optional
720 If the datastore does not know about the dataset, should it
721 return a predicted URI or not?
723 Returns
724 -------
725 primary : `lsst.resources.ResourcePath`
726 The URI to the primary artifact associated with this dataset.
727 If the dataset was disassembled within the datastore this
728 may be `None`.
729 components : `dict`
730 URIs to any components associated with the dataset artifact.
731 Can be empty if there are no components.
732 """
733 raise NotImplementedError()
735 @abstractmethod
736 def getURI(self, datasetRef: DatasetRef, predict: bool = False) -> ResourcePath:
737 """URI to the Dataset.
739 Parameters
740 ----------
741 datasetRef : `DatasetRef`
742 Reference to the required Dataset.
743 predict : `bool`
744 If `True` attempt to predict the URI for a dataset if it does
745 not exist in datastore.
747 Returns
748 -------
749 uri : `str`
750 URI string pointing to the Dataset within the datastore. If the
751 Dataset does not exist in the datastore, the URI may be a guess.
752 If the datastore does not have entities that relate well
753 to the concept of a URI the returned URI string will be
754 descriptive. The returned URI is not guaranteed to be obtainable.
756 Raises
757 ------
758 FileNotFoundError
759 A URI has been requested for a dataset that does not exist and
760 guessing is not allowed.
761 """
762 raise NotImplementedError("Must be implemented by subclass")
764 @abstractmethod
765 def retrieveArtifacts(
766 self,
767 refs: Iterable[DatasetRef],
768 destination: ResourcePath,
769 transfer: str = "auto",
770 preserve_path: bool = True,
771 overwrite: bool = False,
772 ) -> List[ResourcePath]:
773 """Retrieve the artifacts associated with the supplied refs.
775 Parameters
776 ----------
777 refs : iterable of `DatasetRef`
778 The datasets for which artifacts are to be retrieved.
779 A single ref can result in multiple artifacts. The refs must
780 be resolved.
781 destination : `lsst.resources.ResourcePath`
782 Location to write the artifacts.
783 transfer : `str`, optional
784 Method to use to transfer the artifacts. Must be one of the options
785 supported by `lsst.resources.ResourcePath.transfer_from()`.
786 "move" is not allowed.
787 preserve_path : `bool`, optional
788 If `True` the full path of the artifact within the datastore
789 is preserved. If `False` the final file component of the path
790 is used.
791 overwrite : `bool`, optional
792 If `True` allow transfers to overwrite existing files at the
793 destination.
795 Returns
796 -------
797 targets : `list` of `lsst.resources.ResourcePath`
798 URIs of file artifacts in destination location. Order is not
799 preserved.
801 Notes
802 -----
803 For non-file datastores the artifacts written to the destination
804 may not match the representation inside the datastore. For example
805 a hierarchichal data structure in a NoSQL database may well be stored
806 as a JSON file.
807 """
808 raise NotImplementedError()
810 @abstractmethod
811 def remove(self, datasetRef: DatasetRef) -> None:
812 """Indicate to the Datastore that a Dataset can be removed.
814 Parameters
815 ----------
816 datasetRef : `DatasetRef`
817 Reference to the required Dataset.
819 Raises
820 ------
821 FileNotFoundError
822 When Dataset does not exist.
824 Notes
825 -----
826 Some Datastores may implement this method as a silent no-op to
827 disable Dataset deletion through standard interfaces.
828 """
829 raise NotImplementedError("Must be implemented by subclass")
831 @abstractmethod
832 def forget(self, refs: Iterable[DatasetRef]) -> None:
833 """Indicate to the Datastore that it should remove all records of the
834 given datasets, without actually deleting them.
836 Parameters
837 ----------
838 refs : `Iterable` [ `DatasetRef` ]
839 References to the datasets being forgotten.
841 Notes
842 -----
843 Asking a datastore to forget a `DatasetRef` it does not hold should be
844 a silent no-op, not an error.
845 """
846 raise NotImplementedError("Must be implemented by subclass")
848 @abstractmethod
849 def trash(self, ref: Union[DatasetRef, Iterable[DatasetRef]], ignore_errors: bool = True) -> None:
850 """Indicate to the Datastore that a Dataset can be moved to the trash.
852 Parameters
853 ----------
854 ref : `DatasetRef` or iterable thereof
855 Reference(s) to the required Dataset.
856 ignore_errors : `bool`, optional
857 Determine whether errors should be ignored. When multiple
858 refs are being trashed there will be no per-ref check.
860 Raises
861 ------
862 FileNotFoundError
863 When Dataset does not exist and errors are not ignored. Only
864 checked if a single ref is supplied (and not in a list).
866 Notes
867 -----
868 Some Datastores may implement this method as a silent no-op to
869 disable Dataset deletion through standard interfaces.
870 """
871 raise NotImplementedError("Must be implemented by subclass")
873 @abstractmethod
874 def emptyTrash(self, ignore_errors: bool = True) -> None:
875 """Remove all datasets from the trash.
877 Parameters
878 ----------
879 ignore_errors : `bool`, optional
880 Determine whether errors should be ignored.
882 Notes
883 -----
884 Some Datastores may implement this method as a silent no-op to
885 disable Dataset deletion through standard interfaces.
886 """
887 raise NotImplementedError("Must be implemented by subclass")
889 @abstractmethod
890 def transfer(self, inputDatastore: Datastore, datasetRef: DatasetRef) -> None:
891 """Transfer a dataset from another datastore to this datastore.
893 Parameters
894 ----------
895 inputDatastore : `Datastore`
896 The external `Datastore` from which to retrieve the Dataset.
897 datasetRef : `DatasetRef`
898 Reference to the required Dataset.
899 """
900 raise NotImplementedError("Must be implemented by subclass")
902 def export(
903 self, refs: Iterable[DatasetRef], *, directory: Optional[str] = None, transfer: Optional[str] = None
904 ) -> Iterable[FileDataset]:
905 """Export datasets for transfer to another data repository.
907 Parameters
908 ----------
909 refs : iterable of `DatasetRef`
910 Dataset references to be exported.
911 directory : `str`, optional
912 Path to a directory that should contain files corresponding to
913 output datasets. Ignored if ``transfer`` is `None`.
914 transfer : `str`, optional
915 Mode that should be used to move datasets out of the repository.
916 Valid options are the same as those of the ``transfer`` argument
917 to ``ingest``, and datastores may similarly signal that a transfer
918 mode is not supported by raising `NotImplementedError`.
920 Returns
921 -------
922 dataset : iterable of `DatasetTransfer`
923 Structs containing information about the exported datasets, in the
924 same order as ``refs``.
926 Raises
927 ------
928 NotImplementedError
929 Raised if the given transfer mode is not supported.
930 """
931 raise NotImplementedError(f"Transfer mode {transfer} not supported.")
933 @abstractmethod
934 def validateConfiguration(
935 self, entities: Iterable[Union[DatasetRef, DatasetType, StorageClass]], logFailures: bool = False
936 ) -> None:
937 """Validate some of the configuration for this datastore.
939 Parameters
940 ----------
941 entities : iterable of `DatasetRef`, `DatasetType`, or `StorageClass`
942 Entities to test against this configuration. Can be differing
943 types.
944 logFailures : `bool`, optional
945 If `True`, output a log message for every validation error
946 detected.
948 Raises
949 ------
950 DatastoreValidationError
951 Raised if there is a validation problem with a configuration.
953 Notes
954 -----
955 Which parts of the configuration are validated is at the discretion
956 of each Datastore implementation.
957 """
958 raise NotImplementedError("Must be implemented by subclass")
960 @abstractmethod
961 def validateKey(self, lookupKey: LookupKey, entity: Union[DatasetRef, DatasetType, StorageClass]) -> None:
962 """Validate a specific look up key with supplied entity.
964 Parameters
965 ----------
966 lookupKey : `LookupKey`
967 Key to use to retrieve information from the datastore
968 configuration.
969 entity : `DatasetRef`, `DatasetType`, or `StorageClass`
970 Entity to compare with configuration retrieved using the
971 specified lookup key.
973 Raises
974 ------
975 DatastoreValidationError
976 Raised if there is a problem with the combination of entity
977 and lookup key.
979 Notes
980 -----
981 Bypasses the normal selection priorities by allowing a key that
982 would normally not be selected to be validated.
983 """
984 raise NotImplementedError("Must be implemented by subclass")
986 @abstractmethod
987 def getLookupKeys(self) -> Set[LookupKey]:
988 """Return all the lookup keys relevant to this datastore.
990 Returns
991 -------
992 keys : `set` of `LookupKey`
993 The keys stored internally for looking up information based
994 on `DatasetType` name or `StorageClass`.
995 """
996 raise NotImplementedError("Must be implemented by subclass")
998 def needs_expanded_data_ids(
999 self,
1000 transfer: Optional[str],
1001 entity: Optional[Union[DatasetRef, DatasetType, StorageClass]] = None,
1002 ) -> bool:
1003 """Test whether this datastore needs expanded data IDs to ingest.
1005 Parameters
1006 ----------
1007 transfer : `str` or `None`
1008 Transfer mode for ingest.
1009 entity, optional
1010 Object representing what will be ingested. If not provided (or not
1011 specific enough), `True` may be returned even if expanded data
1012 IDs aren't necessary.
1014 Returns
1015 -------
1016 needed : `bool`
1017 If `True`, expanded data IDs may be needed. `False` only if
1018 expansion definitely isn't necessary.
1019 """
1020 return True