Coverage for python/lsst/daf/butler/core/datastore.py: 45%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Support for generic data stores."""
24from __future__ import annotations
26__all__ = ("DatastoreConfig", "Datastore", "DatastoreValidationError")
28import contextlib
29import logging
30from abc import ABCMeta, abstractmethod
31from collections import defaultdict
32from dataclasses import dataclass
33from typing import (
34 TYPE_CHECKING,
35 Any,
36 Callable,
37 ClassVar,
38 Dict,
39 Iterable,
40 Iterator,
41 List,
42 Mapping,
43 Optional,
44 Set,
45 Tuple,
46 Type,
47 Union,
48)
50from lsst.utils import doImportType
52from .config import Config, ConfigSubset
53from .constraints import Constraints
54from .exceptions import DatasetTypeNotSupportedError, ValidationError
55from .fileDataset import FileDataset
56from .storageClass import StorageClassFactory
58if TYPE_CHECKING: 58 ↛ 59line 58 didn't jump to line 59, because the condition on line 58 was never true
59 from ..registry.interfaces import DatastoreRegistryBridgeManager
60 from ._butlerUri import ButlerURI
61 from .configSupport import LookupKey
62 from .datasets import DatasetRef, DatasetType
63 from .storageClass import StorageClass
66class DatastoreConfig(ConfigSubset):
67 """Configuration for Datastores."""
69 component = "datastore"
70 requiredKeys = ("cls",)
71 defaultConfigFile = "datastore.yaml"
74class DatastoreValidationError(ValidationError):
75 """There is a problem with the Datastore configuration."""
77 pass
80@dataclass(frozen=True)
81class Event:
82 __slots__ = {"name", "undoFunc", "args", "kwargs"}
83 name: str
84 undoFunc: Callable
85 args: tuple
86 kwargs: dict
89class IngestPrepData:
90 """A helper base class for `Datastore` ingest implementations.
92 Datastore implementations will generally need a custom implementation of
93 this class.
95 Should be accessed as ``Datastore.IngestPrepData`` instead of via direct
96 import.
98 Parameters
99 ----------
100 refs : iterable of `DatasetRef`
101 References for the datasets that can be ingested by this datastore.
102 """
104 def __init__(self, refs: Iterable[DatasetRef]):
105 self.refs = {ref.id: ref for ref in refs}
108class DatastoreTransaction:
109 """Keeps a log of `Datastore` activity and allow rollback.
111 Parameters
112 ----------
113 parent : `DatastoreTransaction`, optional
114 The parent transaction (if any)
115 """
117 Event: ClassVar[Type] = Event
119 parent: Optional["DatastoreTransaction"]
120 """The parent transaction. (`DatastoreTransaction`, optional)"""
122 def __init__(self, parent: Optional[DatastoreTransaction] = None):
123 self.parent = parent
124 self._log: List[Event] = []
126 def registerUndo(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> None:
127 """Register event with undo function.
129 Parameters
130 ----------
131 name : `str`
132 Name of the event.
133 undoFunc : func
134 Function to undo this event.
135 args : `tuple`
136 Positional arguments to `undoFunc`.
137 **kwargs
138 Keyword arguments to `undoFunc`.
139 """
140 self._log.append(self.Event(name, undoFunc, args, kwargs))
142 @contextlib.contextmanager
143 def undoWith(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> Iterator[None]:
144 """Register undo function if nested operation succeeds.
146 Calls `registerUndo`.
148 This can be used to wrap individual undo-able statements within a
149 DatastoreTransaction block. Multiple statements that can fail
150 separately should not be part of the same `undoWith` block.
152 All arguments are forwarded directly to `registerUndo`.
153 """
154 try:
155 yield None
156 except BaseException:
157 raise
158 else:
159 self.registerUndo(name, undoFunc, *args, **kwargs)
161 def rollback(self) -> None:
162 """Roll back all events in this transaction."""
163 log = logging.getLogger(__name__)
164 while self._log:
165 ev = self._log.pop()
166 try:
167 log.debug(
168 "Rolling back transaction: %s: %s(%s,%s)",
169 ev.name,
170 ev.undoFunc,
171 ",".join(str(a) for a in ev.args),
172 ",".join(f"{k}={v}" for k, v in ev.kwargs.items()),
173 )
174 except Exception:
175 # In case we had a problem in stringification of arguments
176 log.warning("Rolling back transaction: %s", ev.name)
177 try:
178 ev.undoFunc(*ev.args, **ev.kwargs)
179 except BaseException as e:
180 # Deliberately swallow error that may occur in unrolling
181 log.warning("Exception: %s caught while unrolling: %s", e, ev.name)
182 pass
184 def commit(self) -> None:
185 """Commit this transaction."""
186 if self.parent is None:
187 # Just forget about the events, they have already happened.
188 return
189 else:
190 # We may still want to events from this transaction as part of
191 # the parent.
192 self.parent._log.extend(self._log)
195class Datastore(metaclass=ABCMeta):
196 """Datastore interface.
198 Parameters
199 ----------
200 config : `DatastoreConfig` or `str`
201 Load configuration either from an existing config instance or by
202 referring to a configuration file.
203 bridgeManager : `DatastoreRegistryBridgeManager`
204 Object that manages the interface between `Registry` and datastores.
205 butlerRoot : `str`, optional
206 New datastore root to use to override the configuration value.
207 """
209 defaultConfigFile: ClassVar[Optional[str]] = None
210 """Path to configuration defaults. Accessed within the ``config`` resource
211 or relative to a search path. Can be None if no defaults specified.
212 """
214 containerKey: ClassVar[Optional[str]] = None
215 """Name of the key containing a list of subconfigurations that also
216 need to be merged with defaults and will likely use different Python
217 datastore classes (but all using DatastoreConfig). Assumed to be a
218 list of configurations that can be represented in a DatastoreConfig
219 and containing a "cls" definition. None indicates that no containers
220 are expected in this Datastore."""
222 isEphemeral: bool = False
223 """Indicate whether this Datastore is ephemeral or not. An ephemeral
224 datastore is one where the contents of the datastore will not exist
225 across process restarts. This value can change per-instance."""
227 config: DatastoreConfig
228 """Configuration used to create Datastore."""
230 name: str
231 """Label associated with this Datastore."""
233 storageClassFactory: StorageClassFactory
234 """Factory for creating storage class instances from name."""
236 constraints: Constraints
237 """Constraints to apply when putting datasets into the datastore."""
239 # MyPy does not like for this to be annotated as any kind of type, because
240 # it can't do static checking on type variables that can change at runtime.
241 IngestPrepData: ClassVar[Any] = IngestPrepData
242 """Helper base class for ingest implementations.
243 """
245 @classmethod
246 @abstractmethod
247 def setConfigRoot(cls, root: str, config: Config, full: Config, overwrite: bool = True) -> None:
248 """Set filesystem-dependent config options for this datastore.
250 The options will be appropriate for a new empty repository with the
251 given root.
253 Parameters
254 ----------
255 root : `str`
256 Filesystem path to the root of the data repository.
257 config : `Config`
258 A `Config` to update. Only the subset understood by
259 this component will be updated. Will not expand
260 defaults.
261 full : `Config`
262 A complete config with all defaults expanded that can be
263 converted to a `DatastoreConfig`. Read-only and will not be
264 modified by this method.
265 Repository-specific options that should not be obtained
266 from defaults when Butler instances are constructed
267 should be copied from ``full`` to ``config``.
268 overwrite : `bool`, optional
269 If `False`, do not modify a value in ``config`` if the value
270 already exists. Default is always to overwrite with the provided
271 ``root``.
273 Notes
274 -----
275 If a keyword is explicitly defined in the supplied ``config`` it
276 will not be overridden by this method if ``overwrite`` is `False`.
277 This allows explicit values set in external configs to be retained.
278 """
279 raise NotImplementedError()
281 @staticmethod
282 def fromConfig(
283 config: Config,
284 bridgeManager: DatastoreRegistryBridgeManager,
285 butlerRoot: Optional[Union[str, ButlerURI]] = None,
286 ) -> "Datastore":
287 """Create datastore from type specified in config file.
289 Parameters
290 ----------
291 config : `Config`
292 Configuration instance.
293 bridgeManager : `DatastoreRegistryBridgeManager`
294 Object that manages the interface between `Registry` and
295 datastores.
296 butlerRoot : `str`, optional
297 Butler root directory.
298 """
299 cls = doImportType(config["datastore", "cls"])
300 if not issubclass(cls, Datastore):
301 raise TypeError(f"Imported child class {config['datastore', 'cls']} is not a Datastore")
302 return cls(config=config, bridgeManager=bridgeManager, butlerRoot=butlerRoot)
304 def __init__(
305 self,
306 config: Union[Config, str],
307 bridgeManager: DatastoreRegistryBridgeManager,
308 butlerRoot: Optional[Union[str, ButlerURI]] = None,
309 ):
310 self.config = DatastoreConfig(config)
311 self.name = "ABCDataStore"
312 self._transaction: Optional[DatastoreTransaction] = None
314 # All Datastores need storage classes and constraints
315 self.storageClassFactory = StorageClassFactory()
317 # And read the constraints list
318 constraintsConfig = self.config.get("constraints")
319 self.constraints = Constraints(constraintsConfig, universe=bridgeManager.universe)
321 def __str__(self) -> str:
322 return self.name
324 def __repr__(self) -> str:
325 return self.name
327 @property
328 def names(self) -> Tuple[str, ...]:
329 """Names associated with this datastore returned as a list.
331 Can be different to ``name`` for a chaining datastore.
332 """
333 # Default implementation returns solely the name itself
334 return (self.name,)
336 @contextlib.contextmanager
337 def transaction(self) -> Iterator[DatastoreTransaction]:
338 """Context manager supporting `Datastore` transactions.
340 Transactions can be nested, and are to be used in combination with
341 `Registry.transaction`.
342 """
343 self._transaction = DatastoreTransaction(self._transaction)
344 try:
345 yield self._transaction
346 except BaseException:
347 self._transaction.rollback()
348 raise
349 else:
350 self._transaction.commit()
351 self._transaction = self._transaction.parent
353 @abstractmethod
354 def knows(self, ref: DatasetRef) -> bool:
355 """Check if the dataset is known to the datastore.
357 Does not check for existence of any artifact.
359 Parameters
360 ----------
361 ref : `DatasetRef`
362 Reference to the required dataset.
364 Returns
365 -------
366 exists : `bool`
367 `True` if the dataset is known to the datastore.
368 """
369 raise NotImplementedError()
371 def mexists(
372 self, refs: Iterable[DatasetRef], artifact_existence: Optional[Dict[ButlerURI, bool]] = None
373 ) -> Dict[DatasetRef, bool]:
374 """Check the existence of multiple datasets at once.
376 Parameters
377 ----------
378 refs : iterable of `DatasetRef`
379 The datasets to be checked.
380 artifact_existence : `dict` of [`ButlerURI`, `bool`], optional
381 Mapping of datastore artifact to existence. Updated by this
382 method with details of all artifacts tested. Can be `None`
383 if the caller is not interested.
385 Returns
386 -------
387 existence : `dict` of [`DatasetRef`, `bool`]
388 Mapping from dataset to boolean indicating existence.
389 """
390 existence: Dict[DatasetRef, bool] = {}
391 # Non-optimized default.
392 for ref in refs:
393 existence[ref] = self.exists(ref)
394 return existence
396 @abstractmethod
397 def exists(self, datasetRef: DatasetRef) -> bool:
398 """Check if the dataset exists in the datastore.
400 Parameters
401 ----------
402 datasetRef : `DatasetRef`
403 Reference to the required dataset.
405 Returns
406 -------
407 exists : `bool`
408 `True` if the entity exists in the `Datastore`.
409 """
410 raise NotImplementedError("Must be implemented by subclass")
412 @abstractmethod
413 def get(self, datasetRef: DatasetRef, parameters: Mapping[str, Any] = None) -> Any:
414 """Load an `InMemoryDataset` from the store.
416 Parameters
417 ----------
418 datasetRef : `DatasetRef`
419 Reference to the required Dataset.
420 parameters : `dict`
421 `StorageClass`-specific parameters that specify a slice of the
422 Dataset to be loaded.
424 Returns
425 -------
426 inMemoryDataset : `object`
427 Requested Dataset or slice thereof as an InMemoryDataset.
428 """
429 raise NotImplementedError("Must be implemented by subclass")
431 @abstractmethod
432 def put(self, inMemoryDataset: Any, datasetRef: DatasetRef) -> None:
433 """Write a `InMemoryDataset` with a given `DatasetRef` to the store.
435 Parameters
436 ----------
437 inMemoryDataset : `object`
438 The Dataset to store.
439 datasetRef : `DatasetRef`
440 Reference to the associated Dataset.
441 """
442 raise NotImplementedError("Must be implemented by subclass")
444 def _overrideTransferMode(self, *datasets: FileDataset, transfer: Optional[str] = None) -> Optional[str]:
445 """Allow ingest transfer mode to be defaulted based on datasets.
447 Parameters
448 ----------
449 datasets : `FileDataset`
450 Each positional argument is a struct containing information about
451 a file to be ingested, including its path (either absolute or
452 relative to the datastore root, if applicable), a complete
453 `DatasetRef` (with ``dataset_id not None``), and optionally a
454 formatter class or its fully-qualified string name. If a formatter
455 is not provided, this method should populate that attribute with
456 the formatter the datastore would use for `put`. Subclasses are
457 also permitted to modify the path attribute (typically to put it
458 in what the datastore considers its standard form).
459 transfer : `str`, optional
460 How (and whether) the dataset should be added to the datastore.
461 See `ingest` for details of transfer modes.
463 Returns
464 -------
465 newTransfer : `str`
466 Transfer mode to use. Will be identical to the supplied transfer
467 mode unless "auto" is used.
468 """
469 if transfer != "auto":
470 return transfer
471 raise RuntimeError(f"{transfer} is not allowed without specialization.")
473 def _prepIngest(self, *datasets: FileDataset, transfer: Optional[str] = None) -> IngestPrepData:
474 """Process datasets to identify which ones can be ingested.
476 Parameters
477 ----------
478 datasets : `FileDataset`
479 Each positional argument is a struct containing information about
480 a file to be ingested, including its path (either absolute or
481 relative to the datastore root, if applicable), a complete
482 `DatasetRef` (with ``dataset_id not None``), and optionally a
483 formatter class or its fully-qualified string name. If a formatter
484 is not provided, this method should populate that attribute with
485 the formatter the datastore would use for `put`. Subclasses are
486 also permitted to modify the path attribute (typically to put it
487 in what the datastore considers its standard form).
488 transfer : `str`, optional
489 How (and whether) the dataset should be added to the datastore.
490 See `ingest` for details of transfer modes.
492 Returns
493 -------
494 data : `IngestPrepData`
495 An instance of a subclass of `IngestPrepData`, used to pass
496 arbitrary data from `_prepIngest` to `_finishIngest`. This should
497 include only the datasets this datastore can actually ingest;
498 others should be silently ignored (`Datastore.ingest` will inspect
499 `IngestPrepData.refs` and raise `DatasetTypeNotSupportedError` if
500 necessary).
502 Raises
503 ------
504 NotImplementedError
505 Raised if the datastore does not support the given transfer mode
506 (including the case where ingest is not supported at all).
507 FileNotFoundError
508 Raised if one of the given files does not exist.
509 FileExistsError
510 Raised if transfer is not `None` but the (internal) location the
511 file would be moved to is already occupied.
513 Notes
514 -----
515 This method (along with `_finishIngest`) should be implemented by
516 subclasses to provide ingest support instead of implementing `ingest`
517 directly.
519 `_prepIngest` should not modify the data repository or given files in
520 any way; all changes should be deferred to `_finishIngest`.
522 When possible, exceptions should be raised in `_prepIngest` instead of
523 `_finishIngest`. `NotImplementedError` exceptions that indicate that
524 the transfer mode is not supported must be raised by `_prepIngest`
525 instead of `_finishIngest`.
526 """
527 raise NotImplementedError(f"Datastore {self} does not support direct file-based ingest.")
529 def _finishIngest(self, prepData: IngestPrepData, *, transfer: Optional[str] = None) -> None:
530 """Complete an ingest operation.
532 Parameters
533 ----------
534 data : `IngestPrepData`
535 An instance of a subclass of `IngestPrepData`. Guaranteed to be
536 the direct result of a call to `_prepIngest` on this datastore.
537 transfer : `str`, optional
538 How (and whether) the dataset should be added to the datastore.
539 See `ingest` for details of transfer modes.
541 Raises
542 ------
543 FileNotFoundError
544 Raised if one of the given files does not exist.
545 FileExistsError
546 Raised if transfer is not `None` but the (internal) location the
547 file would be moved to is already occupied.
549 Notes
550 -----
551 This method (along with `_prepIngest`) should be implemented by
552 subclasses to provide ingest support instead of implementing `ingest`
553 directly.
554 """
555 raise NotImplementedError(f"Datastore {self} does not support direct file-based ingest.")
557 def ingest(self, *datasets: FileDataset, transfer: Optional[str] = None) -> None:
558 """Ingest one or more files into the datastore.
560 Parameters
561 ----------
562 datasets : `FileDataset`
563 Each positional argument is a struct containing information about
564 a file to be ingested, including its path (either absolute or
565 relative to the datastore root, if applicable), a complete
566 `DatasetRef` (with ``dataset_id not None``), and optionally a
567 formatter class or its fully-qualified string name. If a formatter
568 is not provided, the one the datastore would use for ``put`` on
569 that dataset is assumed.
570 transfer : `str`, optional
571 How (and whether) the dataset should be added to the datastore.
572 If `None` (default), the file must already be in a location
573 appropriate for the datastore (e.g. within its root directory),
574 and will not be modified. Other choices include "move", "copy",
575 "link", "symlink", "relsymlink", and "hardlink". "link" is a
576 special transfer mode that will first try to make a hardlink and
577 if that fails a symlink will be used instead. "relsymlink" creates
578 a relative symlink rather than use an absolute path.
579 Most datastores do not support all transfer modes.
580 "auto" is a special option that will let the
581 data store choose the most natural option for itself.
583 Raises
584 ------
585 NotImplementedError
586 Raised if the datastore does not support the given transfer mode
587 (including the case where ingest is not supported at all).
588 DatasetTypeNotSupportedError
589 Raised if one or more files to be ingested have a dataset type that
590 is not supported by the datastore.
591 FileNotFoundError
592 Raised if one of the given files does not exist.
593 FileExistsError
594 Raised if transfer is not `None` but the (internal) location the
595 file would be moved to is already occupied.
597 Notes
598 -----
599 Subclasses should implement `_prepIngest` and `_finishIngest` instead
600 of implementing `ingest` directly. Datastores that hold and
601 delegate to child datastores may want to call those methods as well.
603 Subclasses are encouraged to document their supported transfer modes
604 in their class documentation.
605 """
606 # Allow a datastore to select a default transfer mode
607 transfer = self._overrideTransferMode(*datasets, transfer=transfer)
608 prepData = self._prepIngest(*datasets, transfer=transfer)
609 refs = {ref.id: ref for dataset in datasets for ref in dataset.refs}
610 if None in refs:
611 # Find the file for the error message. There may be multiple
612 # bad refs so look for all of them.
613 unresolved_paths = {}
614 for dataset in datasets:
615 unresolved = []
616 for ref in dataset.refs:
617 if ref.id is None:
618 unresolved.append(ref)
619 if unresolved:
620 unresolved_paths[dataset.path] = unresolved
621 raise RuntimeError(
622 "Attempt to ingest unresolved DatasetRef from: "
623 + ",".join(f"{p}: ({[str(r) for r in ref]})" for p, ref in unresolved_paths.items())
624 )
625 if refs.keys() != prepData.refs.keys():
626 unsupported = refs.keys() - prepData.refs.keys()
627 # Group unsupported refs by DatasetType for an informative
628 # but still concise error message.
629 byDatasetType = defaultdict(list)
630 for datasetId in unsupported:
631 ref = refs[datasetId]
632 byDatasetType[ref.datasetType].append(ref)
633 raise DatasetTypeNotSupportedError(
634 "DatasetType(s) not supported in ingest: "
635 + ", ".join(f"{k.name} ({len(v)} dataset(s))" for k, v in byDatasetType.items())
636 )
637 self._finishIngest(prepData, transfer=transfer)
639 def transfer_from(
640 self,
641 source_datastore: Datastore,
642 refs: Iterable[DatasetRef],
643 local_refs: Optional[Iterable[DatasetRef]] = None,
644 transfer: str = "auto",
645 artifact_existence: Optional[Dict[ButlerURI, bool]] = None,
646 ) -> None:
647 """Transfer dataset artifacts from another datastore to this one.
649 Parameters
650 ----------
651 source_datastore : `Datastore`
652 The datastore from which to transfer artifacts. That datastore
653 must be compatible with this datastore receiving the artifacts.
654 refs : iterable of `DatasetRef`
655 The datasets to transfer from the source datastore.
656 local_refs : iterable of `DatasetRef`, optional
657 The dataset refs associated with the registry associated with
658 this datastore. Can be `None` if the source and target datastore
659 are using UUIDs.
660 transfer : `str`, optional
661 How (and whether) the dataset should be added to the datastore.
662 Choices include "move", "copy",
663 "link", "symlink", "relsymlink", and "hardlink". "link" is a
664 special transfer mode that will first try to make a hardlink and
665 if that fails a symlink will be used instead. "relsymlink" creates
666 a relative symlink rather than use an absolute path.
667 Most datastores do not support all transfer modes.
668 "auto" (the default) is a special option that will let the
669 data store choose the most natural option for itself.
670 If the source location and transfer location are identical the
671 transfer mode will be ignored.
672 artifact_existence : `dict` of [`ButlerURI`, `bool`], optional
673 Mapping of datastore artifact to existence. Updated by this
674 method with details of all artifacts tested. Can be `None`
675 if the caller is not interested.
677 Raises
678 ------
679 TypeError
680 Raised if the two datastores are not compatible.
681 """
682 if type(self) is not type(source_datastore):
683 raise TypeError(
684 f"Datastore mismatch between this datastore ({type(self)}) and the "
685 f"source datastore ({type(source_datastore)})."
686 )
688 raise NotImplementedError(f"Datastore {type(self)} must implement a transfer_from method.")
690 @abstractmethod
691 def getURIs(
692 self, datasetRef: DatasetRef, predict: bool = False
693 ) -> Tuple[Optional[ButlerURI], Dict[str, ButlerURI]]:
694 """Return URIs associated with dataset.
696 Parameters
697 ----------
698 ref : `DatasetRef`
699 Reference to the required dataset.
700 predict : `bool`, optional
701 If the datastore does not know about the dataset, should it
702 return a predicted URI or not?
704 Returns
705 -------
706 primary : `ButlerURI`
707 The URI to the primary artifact associated with this dataset.
708 If the dataset was disassembled within the datastore this
709 may be `None`.
710 components : `dict`
711 URIs to any components associated with the dataset artifact.
712 Can be empty if there are no components.
713 """
714 raise NotImplementedError()
716 @abstractmethod
717 def getURI(self, datasetRef: DatasetRef, predict: bool = False) -> ButlerURI:
718 """URI to the Dataset.
720 Parameters
721 ----------
722 datasetRef : `DatasetRef`
723 Reference to the required Dataset.
724 predict : `bool`
725 If `True` attempt to predict the URI for a dataset if it does
726 not exist in datastore.
728 Returns
729 -------
730 uri : `str`
731 URI string pointing to the Dataset within the datastore. If the
732 Dataset does not exist in the datastore, the URI may be a guess.
733 If the datastore does not have entities that relate well
734 to the concept of a URI the returned URI string will be
735 descriptive. The returned URI is not guaranteed to be obtainable.
737 Raises
738 ------
739 FileNotFoundError
740 A URI has been requested for a dataset that does not exist and
741 guessing is not allowed.
742 """
743 raise NotImplementedError("Must be implemented by subclass")
745 @abstractmethod
746 def retrieveArtifacts(
747 self,
748 refs: Iterable[DatasetRef],
749 destination: ButlerURI,
750 transfer: str = "auto",
751 preserve_path: bool = True,
752 overwrite: bool = False,
753 ) -> List[ButlerURI]:
754 """Retrieve the artifacts associated with the supplied refs.
756 Parameters
757 ----------
758 refs : iterable of `DatasetRef`
759 The datasets for which artifacts are to be retrieved.
760 A single ref can result in multiple artifacts. The refs must
761 be resolved.
762 destination : `ButlerURI`
763 Location to write the artifacts.
764 transfer : `str`, optional
765 Method to use to transfer the artifacts. Must be one of the options
766 supported by `ButlerURI.transfer_from()`. "move" is not allowed.
767 preserve_path : `bool`, optional
768 If `True` the full path of the artifact within the datastore
769 is preserved. If `False` the final file component of the path
770 is used.
771 overwrite : `bool`, optional
772 If `True` allow transfers to overwrite existing files at the
773 destination.
775 Returns
776 -------
777 targets : `list` of `ButlerURI`
778 URIs of file artifacts in destination location. Order is not
779 preserved.
781 Notes
782 -----
783 For non-file datastores the artifacts written to the destination
784 may not match the representation inside the datastore. For example
785 a hierarchichal data structure in a NoSQL database may well be stored
786 as a JSON file.
787 """
788 raise NotImplementedError()
790 @abstractmethod
791 def remove(self, datasetRef: DatasetRef) -> None:
792 """Indicate to the Datastore that a Dataset can be removed.
794 Parameters
795 ----------
796 datasetRef : `DatasetRef`
797 Reference to the required Dataset.
799 Raises
800 ------
801 FileNotFoundError
802 When Dataset does not exist.
804 Notes
805 -----
806 Some Datastores may implement this method as a silent no-op to
807 disable Dataset deletion through standard interfaces.
808 """
809 raise NotImplementedError("Must be implemented by subclass")
811 @abstractmethod
812 def forget(self, refs: Iterable[DatasetRef]) -> None:
813 """Indicate to the Datastore that it should remove all records of the
814 given datasets, without actually deleting them.
816 Parameters
817 ----------
818 refs : `Iterable` [ `DatasetRef` ]
819 References to the datasets being forgotten.
821 Notes
822 -----
823 Asking a datastore to forget a `DatasetRef` it does not hold should be
824 a silent no-op, not an error.
825 """
826 raise NotImplementedError("Must be implemented by subclass")
828 @abstractmethod
829 def trash(self, ref: Union[DatasetRef, Iterable[DatasetRef]], ignore_errors: bool = True) -> None:
830 """Indicate to the Datastore that a Dataset can be moved to the trash.
832 Parameters
833 ----------
834 ref : `DatasetRef` or iterable thereof
835 Reference(s) to the required Dataset.
836 ignore_errors : `bool`, optional
837 Determine whether errors should be ignored. When multiple
838 refs are being trashed there will be no per-ref check.
840 Raises
841 ------
842 FileNotFoundError
843 When Dataset does not exist and errors are not ignored. Only
844 checked if a single ref is supplied (and not in a list).
846 Notes
847 -----
848 Some Datastores may implement this method as a silent no-op to
849 disable Dataset deletion through standard interfaces.
850 """
851 raise NotImplementedError("Must be implemented by subclass")
853 @abstractmethod
854 def emptyTrash(self, ignore_errors: bool = True) -> None:
855 """Remove all datasets from the trash.
857 Parameters
858 ----------
859 ignore_errors : `bool`, optional
860 Determine whether errors should be ignored.
862 Notes
863 -----
864 Some Datastores may implement this method as a silent no-op to
865 disable Dataset deletion through standard interfaces.
866 """
867 raise NotImplementedError("Must be implemented by subclass")
869 @abstractmethod
870 def transfer(self, inputDatastore: Datastore, datasetRef: DatasetRef) -> None:
871 """Transfer a dataset from another datastore to this datastore.
873 Parameters
874 ----------
875 inputDatastore : `Datastore`
876 The external `Datastore` from which to retrieve the Dataset.
877 datasetRef : `DatasetRef`
878 Reference to the required Dataset.
879 """
880 raise NotImplementedError("Must be implemented by subclass")
882 def export(
883 self, refs: Iterable[DatasetRef], *, directory: Optional[str] = None, transfer: Optional[str] = None
884 ) -> Iterable[FileDataset]:
885 """Export datasets for transfer to another data repository.
887 Parameters
888 ----------
889 refs : iterable of `DatasetRef`
890 Dataset references to be exported.
891 directory : `str`, optional
892 Path to a directory that should contain files corresponding to
893 output datasets. Ignored if ``transfer`` is `None`.
894 transfer : `str`, optional
895 Mode that should be used to move datasets out of the repository.
896 Valid options are the same as those of the ``transfer`` argument
897 to ``ingest``, and datastores may similarly signal that a transfer
898 mode is not supported by raising `NotImplementedError`.
900 Returns
901 -------
902 dataset : iterable of `DatasetTransfer`
903 Structs containing information about the exported datasets, in the
904 same order as ``refs``.
906 Raises
907 ------
908 NotImplementedError
909 Raised if the given transfer mode is not supported.
910 """
911 raise NotImplementedError(f"Transfer mode {transfer} not supported.")
913 @abstractmethod
914 def validateConfiguration(
915 self, entities: Iterable[Union[DatasetRef, DatasetType, StorageClass]], logFailures: bool = False
916 ) -> None:
917 """Validate some of the configuration for this datastore.
919 Parameters
920 ----------
921 entities : iterable of `DatasetRef`, `DatasetType`, or `StorageClass`
922 Entities to test against this configuration. Can be differing
923 types.
924 logFailures : `bool`, optional
925 If `True`, output a log message for every validation error
926 detected.
928 Raises
929 ------
930 DatastoreValidationError
931 Raised if there is a validation problem with a configuration.
933 Notes
934 -----
935 Which parts of the configuration are validated is at the discretion
936 of each Datastore implementation.
937 """
938 raise NotImplementedError("Must be implemented by subclass")
940 @abstractmethod
941 def validateKey(self, lookupKey: LookupKey, entity: Union[DatasetRef, DatasetType, StorageClass]) -> None:
942 """Validate a specific look up key with supplied entity.
944 Parameters
945 ----------
946 lookupKey : `LookupKey`
947 Key to use to retrieve information from the datastore
948 configuration.
949 entity : `DatasetRef`, `DatasetType`, or `StorageClass`
950 Entity to compare with configuration retrieved using the
951 specified lookup key.
953 Raises
954 ------
955 DatastoreValidationError
956 Raised if there is a problem with the combination of entity
957 and lookup key.
959 Notes
960 -----
961 Bypasses the normal selection priorities by allowing a key that
962 would normally not be selected to be validated.
963 """
964 raise NotImplementedError("Must be implemented by subclass")
966 @abstractmethod
967 def getLookupKeys(self) -> Set[LookupKey]:
968 """Return all the lookup keys relevant to this datastore.
970 Returns
971 -------
972 keys : `set` of `LookupKey`
973 The keys stored internally for looking up information based
974 on `DatasetType` name or `StorageClass`.
975 """
976 raise NotImplementedError("Must be implemented by subclass")
978 def needs_expanded_data_ids(
979 self,
980 transfer: Optional[str],
981 entity: Optional[Union[DatasetRef, DatasetType, StorageClass]] = None,
982 ) -> bool:
983 """Test whether this datastore needs expanded data IDs to ingest.
985 Parameters
986 ----------
987 transfer : `str` or `None`
988 Transfer mode for ingest.
989 entity, optional
990 Object representing what will be ingested. If not provided (or not
991 specific enough), `True` may be returned even if expanded data
992 IDs aren't necessary.
994 Returns
995 -------
996 needed : `bool`
997 If `True`, expanded data IDs may be needed. `False` only if
998 expansion definitely isn't necessary.
999 """
1000 return True