Coverage for python/lsst/daf/butler/core/datastore.py : 42%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Support for generic data stores."""
24from __future__ import annotations
26__all__ = ("DatastoreConfig", "Datastore", "DatastoreValidationError")
28import contextlib
29import logging
30from collections import defaultdict
31from typing import (
32 TYPE_CHECKING,
33 Any,
34 Callable,
35 ClassVar,
36 Dict,
37 Iterable,
38 Iterator,
39 List,
40 Mapping,
41 Optional,
42 Set,
43 Tuple,
44 Type,
45 Union,
46)
48from dataclasses import dataclass
49from abc import ABCMeta, abstractmethod
51from lsst.utils import doImportType
52from .config import ConfigSubset, Config
53from .exceptions import ValidationError, DatasetTypeNotSupportedError
54from .constraints import Constraints
55from .storageClass import StorageClassFactory
56from .fileDataset import FileDataset
58if TYPE_CHECKING: 58 ↛ 59line 58 didn't jump to line 59, because the condition on line 58 was never true
59 from ..registry.interfaces import DatastoreRegistryBridgeManager
60 from .datasets import DatasetRef, DatasetType
61 from .configSupport import LookupKey
62 from .storageClass import StorageClass
63 from ._butlerUri import ButlerURI
66class DatastoreConfig(ConfigSubset):
67 """Configuration for Datastores."""
69 component = "datastore"
70 requiredKeys = ("cls",)
71 defaultConfigFile = "datastore.yaml"
74class DatastoreValidationError(ValidationError):
75 """There is a problem with the Datastore configuration."""
77 pass
80@dataclass(frozen=True)
81class Event:
82 __slots__ = {"name", "undoFunc", "args", "kwargs"}
83 name: str
84 undoFunc: Callable
85 args: tuple
86 kwargs: dict
89class IngestPrepData:
90 """A helper base class for `Datastore` ingest implementations.
92 Datastore implementations will generally need a custom implementation of
93 this class.
95 Should be accessed as ``Datastore.IngestPrepData`` instead of via direct
96 import.
98 Parameters
99 ----------
100 refs : iterable of `DatasetRef`
101 References for the datasets that can be ingested by this datastore.
102 """
104 def __init__(self, refs: Iterable[DatasetRef]):
105 self.refs = {ref.id: ref for ref in refs}
108class DatastoreTransaction:
109 """Keeps a log of `Datastore` activity and allow rollback.
111 Parameters
112 ----------
113 parent : `DatastoreTransaction`, optional
114 The parent transaction (if any)
115 """
117 Event: ClassVar[Type] = Event
119 parent: Optional['DatastoreTransaction']
120 """The parent transaction. (`DatastoreTransaction`, optional)"""
122 def __init__(self, parent: Optional[DatastoreTransaction] = None):
123 self.parent = parent
124 self._log: List[Event] = []
126 def registerUndo(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> None:
127 """Register event with undo function.
129 Parameters
130 ----------
131 name : `str`
132 Name of the event.
133 undoFunc : func
134 Function to undo this event.
135 args : `tuple`
136 Positional arguments to `undoFunc`.
137 **kwargs
138 Keyword arguments to `undoFunc`.
139 """
140 self._log.append(self.Event(name, undoFunc, args, kwargs))
142 @contextlib.contextmanager
143 def undoWith(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> Iterator[None]:
144 """Register undo function if nested operation succeeds.
146 Calls `registerUndo`.
148 This can be used to wrap individual undo-able statements within a
149 DatastoreTransaction block. Multiple statements that can fail
150 separately should not be part of the same `undoWith` block.
152 All arguments are forwarded directly to `registerUndo`.
153 """
154 try:
155 yield None
156 except BaseException:
157 raise
158 else:
159 self.registerUndo(name, undoFunc, *args, **kwargs)
161 def rollback(self) -> None:
162 """Roll back all events in this transaction."""
163 log = logging.getLogger(__name__)
164 while self._log:
165 ev = self._log.pop()
166 try:
167 log.debug("Rolling back transaction: %s: %s(%s,%s)", ev.name,
168 ev.undoFunc,
169 ",".join(str(a) for a in ev.args),
170 ",".join(f"{k}={v}" for k, v in ev.kwargs.items()))
171 except Exception:
172 # In case we had a problem in stringification of arguments
173 log.warning("Rolling back transaction: %s", ev.name)
174 try:
175 ev.undoFunc(*ev.args, **ev.kwargs)
176 except BaseException as e:
177 # Deliberately swallow error that may occur in unrolling
178 log.warning("Exception: %s caught while unrolling: %s", e, ev.name)
179 pass
181 def commit(self) -> None:
182 """Commit this transaction."""
183 if self.parent is None:
184 # Just forget about the events, they have already happened.
185 return
186 else:
187 # We may still want to events from this transaction as part of
188 # the parent.
189 self.parent._log.extend(self._log)
192class Datastore(metaclass=ABCMeta):
193 """Datastore interface.
195 Parameters
196 ----------
197 config : `DatastoreConfig` or `str`
198 Load configuration either from an existing config instance or by
199 referring to a configuration file.
200 bridgeManager : `DatastoreRegistryBridgeManager`
201 Object that manages the interface between `Registry` and datastores.
202 butlerRoot : `str`, optional
203 New datastore root to use to override the configuration value.
204 """
206 defaultConfigFile: ClassVar[Optional[str]] = None
207 """Path to configuration defaults. Accessed within the ``config`` resource
208 or relative to a search path. Can be None if no defaults specified.
209 """
211 containerKey: ClassVar[Optional[str]] = None
212 """Name of the key containing a list of subconfigurations that also
213 need to be merged with defaults and will likely use different Python
214 datastore classes (but all using DatastoreConfig). Assumed to be a
215 list of configurations that can be represented in a DatastoreConfig
216 and containing a "cls" definition. None indicates that no containers
217 are expected in this Datastore."""
219 isEphemeral: bool = False
220 """Indicate whether this Datastore is ephemeral or not. An ephemeral
221 datastore is one where the contents of the datastore will not exist
222 across process restarts. This value can change per-instance."""
224 config: DatastoreConfig
225 """Configuration used to create Datastore."""
227 name: str
228 """Label associated with this Datastore."""
230 storageClassFactory: StorageClassFactory
231 """Factory for creating storage class instances from name."""
233 constraints: Constraints
234 """Constraints to apply when putting datasets into the datastore."""
236 # MyPy does not like for this to be annotated as any kind of type, because
237 # it can't do static checking on type variables that can change at runtime.
238 IngestPrepData: ClassVar[Any] = IngestPrepData
239 """Helper base class for ingest implementations.
240 """
242 @classmethod
243 @abstractmethod
244 def setConfigRoot(cls, root: str, config: Config, full: Config, overwrite: bool = True) -> None:
245 """Set filesystem-dependent config options for this datastore.
247 The options will be appropriate for a new empty repository with the
248 given root.
250 Parameters
251 ----------
252 root : `str`
253 Filesystem path to the root of the data repository.
254 config : `Config`
255 A `Config` to update. Only the subset understood by
256 this component will be updated. Will not expand
257 defaults.
258 full : `Config`
259 A complete config with all defaults expanded that can be
260 converted to a `DatastoreConfig`. Read-only and will not be
261 modified by this method.
262 Repository-specific options that should not be obtained
263 from defaults when Butler instances are constructed
264 should be copied from ``full`` to ``config``.
265 overwrite : `bool`, optional
266 If `False`, do not modify a value in ``config`` if the value
267 already exists. Default is always to overwrite with the provided
268 ``root``.
270 Notes
271 -----
272 If a keyword is explicitly defined in the supplied ``config`` it
273 will not be overridden by this method if ``overwrite`` is `False`.
274 This allows explicit values set in external configs to be retained.
275 """
276 raise NotImplementedError()
278 @staticmethod
279 def fromConfig(config: Config, bridgeManager: DatastoreRegistryBridgeManager,
280 butlerRoot: Optional[Union[str, ButlerURI]] = None) -> 'Datastore':
281 """Create datastore from type specified in config file.
283 Parameters
284 ----------
285 config : `Config`
286 Configuration instance.
287 bridgeManager : `DatastoreRegistryBridgeManager`
288 Object that manages the interface between `Registry` and
289 datastores.
290 butlerRoot : `str`, optional
291 Butler root directory.
292 """
293 cls = doImportType(config["datastore", "cls"])
294 if not issubclass(cls, Datastore):
295 raise TypeError(f"Imported child class {config['datastore', 'cls']} is not a Datastore")
296 return cls(config=config, bridgeManager=bridgeManager, butlerRoot=butlerRoot)
298 def __init__(self, config: Union[Config, str],
299 bridgeManager: DatastoreRegistryBridgeManager,
300 butlerRoot: Optional[Union[str, ButlerURI]] = None):
301 self.config = DatastoreConfig(config)
302 self.name = "ABCDataStore"
303 self._transaction: Optional[DatastoreTransaction] = None
305 # All Datastores need storage classes and constraints
306 self.storageClassFactory = StorageClassFactory()
308 # And read the constraints list
309 constraintsConfig = self.config.get("constraints")
310 self.constraints = Constraints(constraintsConfig, universe=bridgeManager.universe)
312 def __str__(self) -> str:
313 return self.name
315 def __repr__(self) -> str:
316 return self.name
318 @property
319 def names(self) -> Tuple[str, ...]:
320 """Names associated with this datastore returned as a list.
322 Can be different to ``name`` for a chaining datastore.
323 """
324 # Default implementation returns solely the name itself
325 return (self.name, )
327 @contextlib.contextmanager
328 def transaction(self) -> Iterator[DatastoreTransaction]:
329 """Context manager supporting `Datastore` transactions.
331 Transactions can be nested, and are to be used in combination with
332 `Registry.transaction`.
333 """
334 self._transaction = DatastoreTransaction(self._transaction)
335 try:
336 yield self._transaction
337 except BaseException:
338 self._transaction.rollback()
339 raise
340 else:
341 self._transaction.commit()
342 self._transaction = self._transaction.parent
344 @abstractmethod
345 def knows(self, ref: DatasetRef) -> bool:
346 """Check if the dataset is known to the datastore.
348 Does not check for existence of any artifact.
350 Parameters
351 ----------
352 ref : `DatasetRef`
353 Reference to the required dataset.
355 Returns
356 -------
357 exists : `bool`
358 `True` if the dataset is known to the datastore.
359 """
360 raise NotImplementedError()
362 def mexists(self, refs: Iterable[DatasetRef],
363 artifact_existence: Optional[Dict[ButlerURI, bool]] = None) -> Dict[DatasetRef, bool]:
364 """Check the existence of multiple datasets at once.
366 Parameters
367 ----------
368 refs : iterable of `DatasetRef`
369 The datasets to be checked.
370 artifact_existence : `dict` of [`ButlerURI`, `bool`], optional
371 Mapping of datastore artifact to existence. Updated by this
372 method with details of all artifacts tested. Can be `None`
373 if the caller is not interested.
375 Returns
376 -------
377 existence : `dict` of [`DatasetRef`, `bool`]
378 Mapping from dataset to boolean indicating existence.
379 """
380 existence: Dict[DatasetRef, bool] = {}
381 # Non-optimized default.
382 for ref in refs:
383 existence[ref] = self.exists(ref)
384 return existence
386 @abstractmethod
387 def exists(self, datasetRef: DatasetRef) -> bool:
388 """Check if the dataset exists in the datastore.
390 Parameters
391 ----------
392 datasetRef : `DatasetRef`
393 Reference to the required dataset.
395 Returns
396 -------
397 exists : `bool`
398 `True` if the entity exists in the `Datastore`.
399 """
400 raise NotImplementedError("Must be implemented by subclass")
402 @abstractmethod
403 def get(self, datasetRef: DatasetRef, parameters: Mapping[str, Any] = None) -> Any:
404 """Load an `InMemoryDataset` from the store.
406 Parameters
407 ----------
408 datasetRef : `DatasetRef`
409 Reference to the required Dataset.
410 parameters : `dict`
411 `StorageClass`-specific parameters that specify a slice of the
412 Dataset to be loaded.
414 Returns
415 -------
416 inMemoryDataset : `object`
417 Requested Dataset or slice thereof as an InMemoryDataset.
418 """
419 raise NotImplementedError("Must be implemented by subclass")
421 @abstractmethod
422 def put(self, inMemoryDataset: Any, datasetRef: DatasetRef) -> None:
423 """Write a `InMemoryDataset` with a given `DatasetRef` to the store.
425 Parameters
426 ----------
427 inMemoryDataset : `object`
428 The Dataset to store.
429 datasetRef : `DatasetRef`
430 Reference to the associated Dataset.
431 """
432 raise NotImplementedError("Must be implemented by subclass")
434 def _overrideTransferMode(self, *datasets: FileDataset, transfer: Optional[str] = None) -> Optional[str]:
435 """Allow ingest transfer mode to be defaulted based on datasets.
437 Parameters
438 ----------
439 datasets : `FileDataset`
440 Each positional argument is a struct containing information about
441 a file to be ingested, including its path (either absolute or
442 relative to the datastore root, if applicable), a complete
443 `DatasetRef` (with ``dataset_id not None``), and optionally a
444 formatter class or its fully-qualified string name. If a formatter
445 is not provided, this method should populate that attribute with
446 the formatter the datastore would use for `put`. Subclasses are
447 also permitted to modify the path attribute (typically to put it
448 in what the datastore considers its standard form).
449 transfer : `str`, optional
450 How (and whether) the dataset should be added to the datastore.
451 See `ingest` for details of transfer modes.
453 Returns
454 -------
455 newTransfer : `str`
456 Transfer mode to use. Will be identical to the supplied transfer
457 mode unless "auto" is used.
458 """
459 if transfer != "auto":
460 return transfer
461 raise RuntimeError(f"{transfer} is not allowed without specialization.")
463 def _prepIngest(self, *datasets: FileDataset, transfer: Optional[str] = None) -> IngestPrepData:
464 """Process datasets to identify which ones can be ingested.
466 Parameters
467 ----------
468 datasets : `FileDataset`
469 Each positional argument is a struct containing information about
470 a file to be ingested, including its path (either absolute or
471 relative to the datastore root, if applicable), a complete
472 `DatasetRef` (with ``dataset_id not None``), and optionally a
473 formatter class or its fully-qualified string name. If a formatter
474 is not provided, this method should populate that attribute with
475 the formatter the datastore would use for `put`. Subclasses are
476 also permitted to modify the path attribute (typically to put it
477 in what the datastore considers its standard form).
478 transfer : `str`, optional
479 How (and whether) the dataset should be added to the datastore.
480 See `ingest` for details of transfer modes.
482 Returns
483 -------
484 data : `IngestPrepData`
485 An instance of a subclass of `IngestPrepData`, used to pass
486 arbitrary data from `_prepIngest` to `_finishIngest`. This should
487 include only the datasets this datastore can actually ingest;
488 others should be silently ignored (`Datastore.ingest` will inspect
489 `IngestPrepData.refs` and raise `DatasetTypeNotSupportedError` if
490 necessary).
492 Raises
493 ------
494 NotImplementedError
495 Raised if the datastore does not support the given transfer mode
496 (including the case where ingest is not supported at all).
497 FileNotFoundError
498 Raised if one of the given files does not exist.
499 FileExistsError
500 Raised if transfer is not `None` but the (internal) location the
501 file would be moved to is already occupied.
503 Notes
504 -----
505 This method (along with `_finishIngest`) should be implemented by
506 subclasses to provide ingest support instead of implementing `ingest`
507 directly.
509 `_prepIngest` should not modify the data repository or given files in
510 any way; all changes should be deferred to `_finishIngest`.
512 When possible, exceptions should be raised in `_prepIngest` instead of
513 `_finishIngest`. `NotImplementedError` exceptions that indicate that
514 the transfer mode is not supported must be raised by `_prepIngest`
515 instead of `_finishIngest`.
516 """
517 raise NotImplementedError(
518 f"Datastore {self} does not support direct file-based ingest."
519 )
521 def _finishIngest(self, prepData: IngestPrepData, *, transfer: Optional[str] = None) -> None:
522 """Complete an ingest operation.
524 Parameters
525 ----------
526 data : `IngestPrepData`
527 An instance of a subclass of `IngestPrepData`. Guaranteed to be
528 the direct result of a call to `_prepIngest` on this datastore.
529 transfer : `str`, optional
530 How (and whether) the dataset should be added to the datastore.
531 See `ingest` for details of transfer modes.
533 Raises
534 ------
535 FileNotFoundError
536 Raised if one of the given files does not exist.
537 FileExistsError
538 Raised if transfer is not `None` but the (internal) location the
539 file would be moved to is already occupied.
541 Notes
542 -----
543 This method (along with `_prepIngest`) should be implemented by
544 subclasses to provide ingest support instead of implementing `ingest`
545 directly.
546 """
547 raise NotImplementedError(
548 f"Datastore {self} does not support direct file-based ingest."
549 )
551 def ingest(self, *datasets: FileDataset, transfer: Optional[str] = None) -> None:
552 """Ingest one or more files into the datastore.
554 Parameters
555 ----------
556 datasets : `FileDataset`
557 Each positional argument is a struct containing information about
558 a file to be ingested, including its path (either absolute or
559 relative to the datastore root, if applicable), a complete
560 `DatasetRef` (with ``dataset_id not None``), and optionally a
561 formatter class or its fully-qualified string name. If a formatter
562 is not provided, the one the datastore would use for ``put`` on
563 that dataset is assumed.
564 transfer : `str`, optional
565 How (and whether) the dataset should be added to the datastore.
566 If `None` (default), the file must already be in a location
567 appropriate for the datastore (e.g. within its root directory),
568 and will not be modified. Other choices include "move", "copy",
569 "link", "symlink", "relsymlink", and "hardlink". "link" is a
570 special transfer mode that will first try to make a hardlink and
571 if that fails a symlink will be used instead. "relsymlink" creates
572 a relative symlink rather than use an absolute path.
573 Most datastores do not support all transfer modes.
574 "auto" is a special option that will let the
575 data store choose the most natural option for itself.
577 Raises
578 ------
579 NotImplementedError
580 Raised if the datastore does not support the given transfer mode
581 (including the case where ingest is not supported at all).
582 DatasetTypeNotSupportedError
583 Raised if one or more files to be ingested have a dataset type that
584 is not supported by the datastore.
585 FileNotFoundError
586 Raised if one of the given files does not exist.
587 FileExistsError
588 Raised if transfer is not `None` but the (internal) location the
589 file would be moved to is already occupied.
591 Notes
592 -----
593 Subclasses should implement `_prepIngest` and `_finishIngest` instead
594 of implementing `ingest` directly. Datastores that hold and
595 delegate to child datastores may want to call those methods as well.
597 Subclasses are encouraged to document their supported transfer modes
598 in their class documentation.
599 """
600 # Allow a datastore to select a default transfer mode
601 transfer = self._overrideTransferMode(*datasets, transfer=transfer)
602 prepData = self._prepIngest(*datasets, transfer=transfer)
603 refs = {ref.id: ref for dataset in datasets for ref in dataset.refs}
604 if None in refs:
605 # Find the file for the error message. There may be multiple
606 # bad refs so look for all of them.
607 unresolved_paths = {}
608 for dataset in datasets:
609 unresolved = []
610 for ref in dataset.refs:
611 if ref.id is None:
612 unresolved.append(ref)
613 if unresolved:
614 unresolved_paths[dataset.path] = unresolved
615 raise RuntimeError("Attempt to ingest unresolved DatasetRef from: "
616 + ",".join(f"{p}: ({[str(r) for r in ref]})"
617 for p, ref in unresolved_paths.items()))
618 if refs.keys() != prepData.refs.keys():
619 unsupported = refs.keys() - prepData.refs.keys()
620 # Group unsupported refs by DatasetType for an informative
621 # but still concise error message.
622 byDatasetType = defaultdict(list)
623 for datasetId in unsupported:
624 ref = refs[datasetId]
625 byDatasetType[ref.datasetType].append(ref)
626 raise DatasetTypeNotSupportedError(
627 "DatasetType(s) not supported in ingest: "
628 + ", ".join(f"{k.name} ({len(v)} dataset(s))" for k, v in byDatasetType.items())
629 )
630 self._finishIngest(prepData, transfer=transfer)
632 def transfer_from(self, source_datastore: Datastore, refs: Iterable[DatasetRef],
633 local_refs: Optional[Iterable[DatasetRef]] = None,
634 transfer: str = "auto",
635 artifact_existence: Optional[Dict[ButlerURI, bool]] = None) -> None:
636 """Transfer dataset artifacts from another datastore to this one.
638 Parameters
639 ----------
640 source_datastore : `Datastore`
641 The datastore from which to transfer artifacts. That datastore
642 must be compatible with this datastore receiving the artifacts.
643 refs : iterable of `DatasetRef`
644 The datasets to transfer from the source datastore.
645 local_refs : iterable of `DatasetRef`, optional
646 The dataset refs associated with the registry associated with
647 this datastore. Can be `None` if the source and target datastore
648 are using UUIDs.
649 transfer : `str`, optional
650 How (and whether) the dataset should be added to the datastore.
651 Choices include "move", "copy",
652 "link", "symlink", "relsymlink", and "hardlink". "link" is a
653 special transfer mode that will first try to make a hardlink and
654 if that fails a symlink will be used instead. "relsymlink" creates
655 a relative symlink rather than use an absolute path.
656 Most datastores do not support all transfer modes.
657 "auto" (the default) is a special option that will let the
658 data store choose the most natural option for itself.
659 If the source location and transfer location are identical the
660 transfer mode will be ignored.
661 artifact_existence : `dict` of [`ButlerURI`, `bool`], optional
662 Mapping of datastore artifact to existence. Updated by this
663 method with details of all artifacts tested. Can be `None`
664 if the caller is not interested.
666 Raises
667 ------
668 TypeError
669 Raised if the two datastores are not compatible.
670 """
671 if type(self) is not type(source_datastore):
672 raise TypeError(f"Datastore mismatch between this datastore ({type(self)}) and the "
673 f"source datastore ({type(source_datastore)}).")
675 raise NotImplementedError(f"Datastore {type(self)} must implement a transfer_from method.")
677 @abstractmethod
678 def getURIs(self, datasetRef: DatasetRef,
679 predict: bool = False) -> Tuple[Optional[ButlerURI], Dict[str, ButlerURI]]:
680 """Return URIs associated with dataset.
682 Parameters
683 ----------
684 ref : `DatasetRef`
685 Reference to the required dataset.
686 predict : `bool`, optional
687 If the datastore does not know about the dataset, should it
688 return a predicted URI or not?
690 Returns
691 -------
692 primary : `ButlerURI`
693 The URI to the primary artifact associated with this dataset.
694 If the dataset was disassembled within the datastore this
695 may be `None`.
696 components : `dict`
697 URIs to any components associated with the dataset artifact.
698 Can be empty if there are no components.
699 """
700 raise NotImplementedError()
702 @abstractmethod
703 def getURI(self, datasetRef: DatasetRef, predict: bool = False) -> ButlerURI:
704 """URI to the Dataset.
706 Parameters
707 ----------
708 datasetRef : `DatasetRef`
709 Reference to the required Dataset.
710 predict : `bool`
711 If `True` attempt to predict the URI for a dataset if it does
712 not exist in datastore.
714 Returns
715 -------
716 uri : `str`
717 URI string pointing to the Dataset within the datastore. If the
718 Dataset does not exist in the datastore, the URI may be a guess.
719 If the datastore does not have entities that relate well
720 to the concept of a URI the returned URI string will be
721 descriptive. The returned URI is not guaranteed to be obtainable.
723 Raises
724 ------
725 FileNotFoundError
726 A URI has been requested for a dataset that does not exist and
727 guessing is not allowed.
728 """
729 raise NotImplementedError("Must be implemented by subclass")
731 @abstractmethod
732 def retrieveArtifacts(self, refs: Iterable[DatasetRef],
733 destination: ButlerURI, transfer: str = "auto",
734 preserve_path: bool = True,
735 overwrite: bool = False) -> List[ButlerURI]:
736 """Retrieve the artifacts associated with the supplied refs.
738 Parameters
739 ----------
740 refs : iterable of `DatasetRef`
741 The datasets for which artifacts are to be retrieved.
742 A single ref can result in multiple artifacts. The refs must
743 be resolved.
744 destination : `ButlerURI`
745 Location to write the artifacts.
746 transfer : `str`, optional
747 Method to use to transfer the artifacts. Must be one of the options
748 supported by `ButlerURI.transfer_from()`. "move" is not allowed.
749 preserve_path : `bool`, optional
750 If `True` the full path of the artifact within the datastore
751 is preserved. If `False` the final file component of the path
752 is used.
753 overwrite : `bool`, optional
754 If `True` allow transfers to overwrite existing files at the
755 destination.
757 Returns
758 -------
759 targets : `list` of `ButlerURI`
760 URIs of file artifacts in destination location. Order is not
761 preserved.
763 Notes
764 -----
765 For non-file datastores the artifacts written to the destination
766 may not match the representation inside the datastore. For example
767 a hierarchichal data structure in a NoSQL database may well be stored
768 as a JSON file.
769 """
770 raise NotImplementedError()
772 @abstractmethod
773 def remove(self, datasetRef: DatasetRef) -> None:
774 """Indicate to the Datastore that a Dataset can be removed.
776 Parameters
777 ----------
778 datasetRef : `DatasetRef`
779 Reference to the required Dataset.
781 Raises
782 ------
783 FileNotFoundError
784 When Dataset does not exist.
786 Notes
787 -----
788 Some Datastores may implement this method as a silent no-op to
789 disable Dataset deletion through standard interfaces.
790 """
791 raise NotImplementedError("Must be implemented by subclass")
793 @abstractmethod
794 def forget(self, refs: Iterable[DatasetRef]) -> None:
795 """Indicate to the Datastore that it should remove all records of the
796 given datasets, without actually deleting them.
798 Parameters
799 ----------
800 refs : `Iterable` [ `DatasetRef` ]
801 References to the datasets being forgotten.
803 Notes
804 -----
805 Asking a datastore to forget a `DatasetRef` it does not hold should be
806 a silent no-op, not an error.
807 """
808 raise NotImplementedError("Must be implemented by subclass")
810 @abstractmethod
811 def trash(self, ref: Union[DatasetRef, Iterable[DatasetRef]], ignore_errors: bool = True) -> None:
812 """Indicate to the Datastore that a Dataset can be moved to the trash.
814 Parameters
815 ----------
816 ref : `DatasetRef` or iterable thereof
817 Reference(s) to the required Dataset.
818 ignore_errors : `bool`, optional
819 Determine whether errors should be ignored. When multiple
820 refs are being trashed there will be no per-ref check.
822 Raises
823 ------
824 FileNotFoundError
825 When Dataset does not exist and errors are not ignored. Only
826 checked if a single ref is supplied (and not in a list).
828 Notes
829 -----
830 Some Datastores may implement this method as a silent no-op to
831 disable Dataset deletion through standard interfaces.
832 """
833 raise NotImplementedError("Must be implemented by subclass")
835 @abstractmethod
836 def emptyTrash(self, ignore_errors: bool = True) -> None:
837 """Remove all datasets from the trash.
839 Parameters
840 ----------
841 ignore_errors : `bool`, optional
842 Determine whether errors should be ignored.
844 Notes
845 -----
846 Some Datastores may implement this method as a silent no-op to
847 disable Dataset deletion through standard interfaces.
848 """
849 raise NotImplementedError("Must be implemented by subclass")
851 @abstractmethod
852 def transfer(self, inputDatastore: Datastore, datasetRef: DatasetRef) -> None:
853 """Transfer a dataset from another datastore to this datastore.
855 Parameters
856 ----------
857 inputDatastore : `Datastore`
858 The external `Datastore` from which to retrieve the Dataset.
859 datasetRef : `DatasetRef`
860 Reference to the required Dataset.
861 """
862 raise NotImplementedError("Must be implemented by subclass")
864 def export(self, refs: Iterable[DatasetRef], *,
865 directory: Optional[str] = None, transfer: Optional[str] = None) -> Iterable[FileDataset]:
866 """Export datasets for transfer to another data repository.
868 Parameters
869 ----------
870 refs : iterable of `DatasetRef`
871 Dataset references to be exported.
872 directory : `str`, optional
873 Path to a directory that should contain files corresponding to
874 output datasets. Ignored if ``transfer`` is `None`.
875 transfer : `str`, optional
876 Mode that should be used to move datasets out of the repository.
877 Valid options are the same as those of the ``transfer`` argument
878 to ``ingest``, and datastores may similarly signal that a transfer
879 mode is not supported by raising `NotImplementedError`.
881 Returns
882 -------
883 dataset : iterable of `DatasetTransfer`
884 Structs containing information about the exported datasets, in the
885 same order as ``refs``.
887 Raises
888 ------
889 NotImplementedError
890 Raised if the given transfer mode is not supported.
891 """
892 raise NotImplementedError(f"Transfer mode {transfer} not supported.")
894 @abstractmethod
895 def validateConfiguration(self, entities: Iterable[Union[DatasetRef, DatasetType, StorageClass]],
896 logFailures: bool = False) -> None:
897 """Validate some of the configuration for this datastore.
899 Parameters
900 ----------
901 entities : iterable of `DatasetRef`, `DatasetType`, or `StorageClass`
902 Entities to test against this configuration. Can be differing
903 types.
904 logFailures : `bool`, optional
905 If `True`, output a log message for every validation error
906 detected.
908 Raises
909 ------
910 DatastoreValidationError
911 Raised if there is a validation problem with a configuration.
913 Notes
914 -----
915 Which parts of the configuration are validated is at the discretion
916 of each Datastore implementation.
917 """
918 raise NotImplementedError("Must be implemented by subclass")
920 @abstractmethod
921 def validateKey(self,
922 lookupKey: LookupKey, entity: Union[DatasetRef, DatasetType, StorageClass]) -> None:
923 """Validate a specific look up key with supplied entity.
925 Parameters
926 ----------
927 lookupKey : `LookupKey`
928 Key to use to retrieve information from the datastore
929 configuration.
930 entity : `DatasetRef`, `DatasetType`, or `StorageClass`
931 Entity to compare with configuration retrieved using the
932 specified lookup key.
934 Raises
935 ------
936 DatastoreValidationError
937 Raised if there is a problem with the combination of entity
938 and lookup key.
940 Notes
941 -----
942 Bypasses the normal selection priorities by allowing a key that
943 would normally not be selected to be validated.
944 """
945 raise NotImplementedError("Must be implemented by subclass")
947 @abstractmethod
948 def getLookupKeys(self) -> Set[LookupKey]:
949 """Return all the lookup keys relevant to this datastore.
951 Returns
952 -------
953 keys : `set` of `LookupKey`
954 The keys stored internally for looking up information based
955 on `DatasetType` name or `StorageClass`.
956 """
957 raise NotImplementedError("Must be implemented by subclass")
959 def needs_expanded_data_ids(
960 self,
961 transfer: Optional[str],
962 entity: Optional[Union[DatasetRef, DatasetType, StorageClass]] = None,
963 ) -> bool:
964 """Test whether this datastore needs expanded data IDs to ingest.
966 Parameters
967 ----------
968 transfer : `str` or `None`
969 Transfer mode for ingest.
970 entity, optional
971 Object representing what will be ingested. If not provided (or not
972 specific enough), `True` may be returned even if expanded data
973 IDs aren't necessary.
975 Returns
976 -------
977 needed : `bool`
978 If `True`, expanded data IDs may be needed. `False` only if
979 expansion definitely isn't necessary.
980 """
981 return True