Coverage for python/lsst/daf/butler/core/datastore.py: 47%
216 statements
« prev ^ index » next coverage.py v6.4.1, created at 2022-06-09 02:25 -0700
« prev ^ index » next coverage.py v6.4.1, created at 2022-06-09 02:25 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Support for generic data stores."""
24from __future__ import annotations
26__all__ = ("DatastoreConfig", "Datastore", "DatastoreValidationError")
28import contextlib
29import dataclasses
30import logging
31from abc import ABCMeta, abstractmethod
32from collections import defaultdict
33from typing import (
34 TYPE_CHECKING,
35 Any,
36 Callable,
37 ClassVar,
38 Dict,
39 Iterable,
40 Iterator,
41 List,
42 Mapping,
43 Optional,
44 Set,
45 Tuple,
46 Type,
47 Union,
48)
50from lsst.utils import doImportType
52from .config import Config, ConfigSubset
53from .constraints import Constraints
54from .exceptions import DatasetTypeNotSupportedError, ValidationError
55from .fileDataset import FileDataset
56from .storageClass import StorageClassFactory
58if TYPE_CHECKING: 58 ↛ 59line 58 didn't jump to line 59, because the condition on line 58 was never true
59 from lsst.resources import ResourcePath, ResourcePathExpression
61 from ..registry.interfaces import DatasetIdRef, DatastoreRegistryBridgeManager
62 from .configSupport import LookupKey
63 from .datasets import DatasetRef, DatasetType
64 from .datastoreRecordData import DatastoreRecordData
65 from .storageClass import StorageClass
68class DatastoreConfig(ConfigSubset):
69 """Configuration for Datastores."""
71 component = "datastore"
72 requiredKeys = ("cls",)
73 defaultConfigFile = "datastore.yaml"
76class DatastoreValidationError(ValidationError):
77 """There is a problem with the Datastore configuration."""
79 pass
82@dataclasses.dataclass(frozen=True)
83class Event:
84 __slots__ = {"name", "undoFunc", "args", "kwargs"}
85 name: str
86 undoFunc: Callable
87 args: tuple
88 kwargs: dict
91class IngestPrepData:
92 """A helper base class for `Datastore` ingest implementations.
94 Datastore implementations will generally need a custom implementation of
95 this class.
97 Should be accessed as ``Datastore.IngestPrepData`` instead of via direct
98 import.
100 Parameters
101 ----------
102 refs : iterable of `DatasetRef`
103 References for the datasets that can be ingested by this datastore.
104 """
106 def __init__(self, refs: Iterable[DatasetRef]):
107 self.refs = {ref.id: ref for ref in refs}
110class DatastoreTransaction:
111 """Keeps a log of `Datastore` activity and allow rollback.
113 Parameters
114 ----------
115 parent : `DatastoreTransaction`, optional
116 The parent transaction (if any)
117 """
119 Event: ClassVar[Type] = Event
121 parent: Optional[DatastoreTransaction]
122 """The parent transaction. (`DatastoreTransaction`, optional)"""
124 def __init__(self, parent: Optional[DatastoreTransaction] = None):
125 self.parent = parent
126 self._log: List[Event] = []
128 def registerUndo(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> None:
129 """Register event with undo function.
131 Parameters
132 ----------
133 name : `str`
134 Name of the event.
135 undoFunc : func
136 Function to undo this event.
137 args : `tuple`
138 Positional arguments to `undoFunc`.
139 **kwargs
140 Keyword arguments to `undoFunc`.
141 """
142 self._log.append(self.Event(name, undoFunc, args, kwargs))
144 @contextlib.contextmanager
145 def undoWith(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> Iterator[None]:
146 """Register undo function if nested operation succeeds.
148 Calls `registerUndo`.
150 This can be used to wrap individual undo-able statements within a
151 DatastoreTransaction block. Multiple statements that can fail
152 separately should not be part of the same `undoWith` block.
154 All arguments are forwarded directly to `registerUndo`.
155 """
156 try:
157 yield None
158 except BaseException:
159 raise
160 else:
161 self.registerUndo(name, undoFunc, *args, **kwargs)
163 def rollback(self) -> None:
164 """Roll back all events in this transaction."""
165 log = logging.getLogger(__name__)
166 while self._log:
167 ev = self._log.pop()
168 try:
169 log.debug(
170 "Rolling back transaction: %s: %s(%s,%s)",
171 ev.name,
172 ev.undoFunc,
173 ",".join(str(a) for a in ev.args),
174 ",".join(f"{k}={v}" for k, v in ev.kwargs.items()),
175 )
176 except Exception:
177 # In case we had a problem in stringification of arguments
178 log.warning("Rolling back transaction: %s", ev.name)
179 try:
180 ev.undoFunc(*ev.args, **ev.kwargs)
181 except BaseException as e:
182 # Deliberately swallow error that may occur in unrolling
183 log.warning("Exception: %s caught while unrolling: %s", e, ev.name)
184 pass
186 def commit(self) -> None:
187 """Commit this transaction."""
188 if self.parent is None:
189 # Just forget about the events, they have already happened.
190 return
191 else:
192 # We may still want to events from this transaction as part of
193 # the parent.
194 self.parent._log.extend(self._log)
197class Datastore(metaclass=ABCMeta):
198 """Datastore interface.
200 Parameters
201 ----------
202 config : `DatastoreConfig` or `str`
203 Load configuration either from an existing config instance or by
204 referring to a configuration file.
205 bridgeManager : `DatastoreRegistryBridgeManager`
206 Object that manages the interface between `Registry` and datastores.
207 butlerRoot : `str`, optional
208 New datastore root to use to override the configuration value.
209 """
211 defaultConfigFile: ClassVar[Optional[str]] = None
212 """Path to configuration defaults. Accessed within the ``config`` resource
213 or relative to a search path. Can be None if no defaults specified.
214 """
216 containerKey: ClassVar[Optional[str]] = None
217 """Name of the key containing a list of subconfigurations that also
218 need to be merged with defaults and will likely use different Python
219 datastore classes (but all using DatastoreConfig). Assumed to be a
220 list of configurations that can be represented in a DatastoreConfig
221 and containing a "cls" definition. None indicates that no containers
222 are expected in this Datastore."""
224 isEphemeral: bool = False
225 """Indicate whether this Datastore is ephemeral or not. An ephemeral
226 datastore is one where the contents of the datastore will not exist
227 across process restarts. This value can change per-instance."""
229 config: DatastoreConfig
230 """Configuration used to create Datastore."""
232 name: str
233 """Label associated with this Datastore."""
235 storageClassFactory: StorageClassFactory
236 """Factory for creating storage class instances from name."""
238 constraints: Constraints
239 """Constraints to apply when putting datasets into the datastore."""
241 # MyPy does not like for this to be annotated as any kind of type, because
242 # it can't do static checking on type variables that can change at runtime.
243 IngestPrepData: ClassVar[Any] = IngestPrepData
244 """Helper base class for ingest implementations.
245 """
247 @classmethod
248 @abstractmethod
249 def setConfigRoot(cls, root: str, config: Config, full: Config, overwrite: bool = True) -> None:
250 """Set filesystem-dependent config options for this datastore.
252 The options will be appropriate for a new empty repository with the
253 given root.
255 Parameters
256 ----------
257 root : `str`
258 Filesystem path to the root of the data repository.
259 config : `Config`
260 A `Config` to update. Only the subset understood by
261 this component will be updated. Will not expand
262 defaults.
263 full : `Config`
264 A complete config with all defaults expanded that can be
265 converted to a `DatastoreConfig`. Read-only and will not be
266 modified by this method.
267 Repository-specific options that should not be obtained
268 from defaults when Butler instances are constructed
269 should be copied from ``full`` to ``config``.
270 overwrite : `bool`, optional
271 If `False`, do not modify a value in ``config`` if the value
272 already exists. Default is always to overwrite with the provided
273 ``root``.
275 Notes
276 -----
277 If a keyword is explicitly defined in the supplied ``config`` it
278 will not be overridden by this method if ``overwrite`` is `False`.
279 This allows explicit values set in external configs to be retained.
280 """
281 raise NotImplementedError()
283 @staticmethod
284 def fromConfig(
285 config: Config,
286 bridgeManager: DatastoreRegistryBridgeManager,
287 butlerRoot: Optional[ResourcePathExpression] = None,
288 ) -> "Datastore":
289 """Create datastore from type specified in config file.
291 Parameters
292 ----------
293 config : `Config`
294 Configuration instance.
295 bridgeManager : `DatastoreRegistryBridgeManager`
296 Object that manages the interface between `Registry` and
297 datastores.
298 butlerRoot : `str`, optional
299 Butler root directory.
300 """
301 cls = doImportType(config["datastore", "cls"])
302 if not issubclass(cls, Datastore):
303 raise TypeError(f"Imported child class {config['datastore', 'cls']} is not a Datastore")
304 return cls(config=config, bridgeManager=bridgeManager, butlerRoot=butlerRoot)
306 def __init__(
307 self,
308 config: Union[Config, str],
309 bridgeManager: DatastoreRegistryBridgeManager,
310 butlerRoot: Optional[ResourcePathExpression] = None,
311 ):
312 self.config = DatastoreConfig(config)
313 self.name = "ABCDataStore"
314 self._transaction: Optional[DatastoreTransaction] = None
316 # All Datastores need storage classes and constraints
317 self.storageClassFactory = StorageClassFactory()
319 # And read the constraints list
320 constraintsConfig = self.config.get("constraints")
321 self.constraints = Constraints(constraintsConfig, universe=bridgeManager.universe)
323 def __str__(self) -> str:
324 return self.name
326 def __repr__(self) -> str:
327 return self.name
329 @property
330 def names(self) -> Tuple[str, ...]:
331 """Names associated with this datastore returned as a list.
333 Can be different to ``name`` for a chaining datastore.
334 """
335 # Default implementation returns solely the name itself
336 return (self.name,)
338 @contextlib.contextmanager
339 def transaction(self) -> Iterator[DatastoreTransaction]:
340 """Context manager supporting `Datastore` transactions.
342 Transactions can be nested, and are to be used in combination with
343 `Registry.transaction`.
344 """
345 self._transaction = DatastoreTransaction(self._transaction)
346 try:
347 yield self._transaction
348 except BaseException:
349 self._transaction.rollback()
350 raise
351 else:
352 self._transaction.commit()
353 self._transaction = self._transaction.parent
355 @abstractmethod
356 def knows(self, ref: DatasetRef) -> bool:
357 """Check if the dataset is known to the datastore.
359 Does not check for existence of any artifact.
361 Parameters
362 ----------
363 ref : `DatasetRef`
364 Reference to the required dataset.
366 Returns
367 -------
368 exists : `bool`
369 `True` if the dataset is known to the datastore.
370 """
371 raise NotImplementedError()
373 def mexists(
374 self, refs: Iterable[DatasetRef], artifact_existence: Optional[Dict[ResourcePath, bool]] = None
375 ) -> Dict[DatasetRef, bool]:
376 """Check the existence of multiple datasets at once.
378 Parameters
379 ----------
380 refs : iterable of `DatasetRef`
381 The datasets to be checked.
382 artifact_existence : `dict` [`lsst.resources.ResourcePath`, `bool`]
383 Optional mapping of datastore artifact to existence. Updated by
384 this method with details of all artifacts tested. Can be `None`
385 if the caller is not interested.
387 Returns
388 -------
389 existence : `dict` of [`DatasetRef`, `bool`]
390 Mapping from dataset to boolean indicating existence.
391 """
392 existence: Dict[DatasetRef, bool] = {}
393 # Non-optimized default.
394 for ref in refs:
395 existence[ref] = self.exists(ref)
396 return existence
398 @abstractmethod
399 def exists(self, datasetRef: DatasetRef) -> bool:
400 """Check if the dataset exists in the datastore.
402 Parameters
403 ----------
404 datasetRef : `DatasetRef`
405 Reference to the required dataset.
407 Returns
408 -------
409 exists : `bool`
410 `True` if the entity exists in the `Datastore`.
411 """
412 raise NotImplementedError("Must be implemented by subclass")
414 @abstractmethod
415 def get(self, datasetRef: DatasetRef, parameters: Mapping[str, Any] = None) -> Any:
416 """Load an `InMemoryDataset` from the store.
418 Parameters
419 ----------
420 datasetRef : `DatasetRef`
421 Reference to the required Dataset.
422 parameters : `dict`
423 `StorageClass`-specific parameters that specify a slice of the
424 Dataset to be loaded.
426 Returns
427 -------
428 inMemoryDataset : `object`
429 Requested Dataset or slice thereof as an InMemoryDataset.
430 """
431 raise NotImplementedError("Must be implemented by subclass")
433 @abstractmethod
434 def put(self, inMemoryDataset: Any, datasetRef: DatasetRef) -> None:
435 """Write a `InMemoryDataset` with a given `DatasetRef` to the store.
437 Parameters
438 ----------
439 inMemoryDataset : `object`
440 The Dataset to store.
441 datasetRef : `DatasetRef`
442 Reference to the associated Dataset.
443 """
444 raise NotImplementedError("Must be implemented by subclass")
446 def _overrideTransferMode(self, *datasets: FileDataset, transfer: Optional[str] = None) -> Optional[str]:
447 """Allow ingest transfer mode to be defaulted based on datasets.
449 Parameters
450 ----------
451 datasets : `FileDataset`
452 Each positional argument is a struct containing information about
453 a file to be ingested, including its path (either absolute or
454 relative to the datastore root, if applicable), a complete
455 `DatasetRef` (with ``dataset_id not None``), and optionally a
456 formatter class or its fully-qualified string name. If a formatter
457 is not provided, this method should populate that attribute with
458 the formatter the datastore would use for `put`. Subclasses are
459 also permitted to modify the path attribute (typically to put it
460 in what the datastore considers its standard form).
461 transfer : `str`, optional
462 How (and whether) the dataset should be added to the datastore.
463 See `ingest` for details of transfer modes.
465 Returns
466 -------
467 newTransfer : `str`
468 Transfer mode to use. Will be identical to the supplied transfer
469 mode unless "auto" is used.
470 """
471 if transfer != "auto":
472 return transfer
473 raise RuntimeError(f"{transfer} is not allowed without specialization.")
475 def _prepIngest(self, *datasets: FileDataset, transfer: Optional[str] = None) -> IngestPrepData:
476 """Process datasets to identify which ones can be ingested.
478 Parameters
479 ----------
480 datasets : `FileDataset`
481 Each positional argument is a struct containing information about
482 a file to be ingested, including its path (either absolute or
483 relative to the datastore root, if applicable), a complete
484 `DatasetRef` (with ``dataset_id not None``), and optionally a
485 formatter class or its fully-qualified string name. If a formatter
486 is not provided, this method should populate that attribute with
487 the formatter the datastore would use for `put`. Subclasses are
488 also permitted to modify the path attribute (typically to put it
489 in what the datastore considers its standard form).
490 transfer : `str`, optional
491 How (and whether) the dataset should be added to the datastore.
492 See `ingest` for details of transfer modes.
494 Returns
495 -------
496 data : `IngestPrepData`
497 An instance of a subclass of `IngestPrepData`, used to pass
498 arbitrary data from `_prepIngest` to `_finishIngest`. This should
499 include only the datasets this datastore can actually ingest;
500 others should be silently ignored (`Datastore.ingest` will inspect
501 `IngestPrepData.refs` and raise `DatasetTypeNotSupportedError` if
502 necessary).
504 Raises
505 ------
506 NotImplementedError
507 Raised if the datastore does not support the given transfer mode
508 (including the case where ingest is not supported at all).
509 FileNotFoundError
510 Raised if one of the given files does not exist.
511 FileExistsError
512 Raised if transfer is not `None` but the (internal) location the
513 file would be moved to is already occupied.
515 Notes
516 -----
517 This method (along with `_finishIngest`) should be implemented by
518 subclasses to provide ingest support instead of implementing `ingest`
519 directly.
521 `_prepIngest` should not modify the data repository or given files in
522 any way; all changes should be deferred to `_finishIngest`.
524 When possible, exceptions should be raised in `_prepIngest` instead of
525 `_finishIngest`. `NotImplementedError` exceptions that indicate that
526 the transfer mode is not supported must be raised by `_prepIngest`
527 instead of `_finishIngest`.
528 """
529 raise NotImplementedError(f"Datastore {self} does not support direct file-based ingest.")
531 def _finishIngest(
532 self, prepData: IngestPrepData, *, transfer: Optional[str] = None, record_validation_info: bool = True
533 ) -> None:
534 """Complete an ingest operation.
536 Parameters
537 ----------
538 data : `IngestPrepData`
539 An instance of a subclass of `IngestPrepData`. Guaranteed to be
540 the direct result of a call to `_prepIngest` on this datastore.
541 transfer : `str`, optional
542 How (and whether) the dataset should be added to the datastore.
543 See `ingest` for details of transfer modes.
544 record_validation_info : `bool`, optional
545 If `True`, the default, the datastore can record validation
546 information associated with the file. If `False` the datastore
547 will not attempt to track any information such as checksums
548 or file sizes. This can be useful if such information is tracked
549 in an external system or if the file is to be compressed in place.
550 It is up to the datastore whether this parameter is relevant.
552 Raises
553 ------
554 FileNotFoundError
555 Raised if one of the given files does not exist.
556 FileExistsError
557 Raised if transfer is not `None` but the (internal) location the
558 file would be moved to is already occupied.
560 Notes
561 -----
562 This method (along with `_prepIngest`) should be implemented by
563 subclasses to provide ingest support instead of implementing `ingest`
564 directly.
565 """
566 raise NotImplementedError(f"Datastore {self} does not support direct file-based ingest.")
568 def ingest(
569 self, *datasets: FileDataset, transfer: Optional[str] = None, record_validation_info: bool = True
570 ) -> None:
571 """Ingest one or more files into the datastore.
573 Parameters
574 ----------
575 datasets : `FileDataset`
576 Each positional argument is a struct containing information about
577 a file to be ingested, including its path (either absolute or
578 relative to the datastore root, if applicable), a complete
579 `DatasetRef` (with ``dataset_id not None``), and optionally a
580 formatter class or its fully-qualified string name. If a formatter
581 is not provided, the one the datastore would use for ``put`` on
582 that dataset is assumed.
583 transfer : `str`, optional
584 How (and whether) the dataset should be added to the datastore.
585 If `None` (default), the file must already be in a location
586 appropriate for the datastore (e.g. within its root directory),
587 and will not be modified. Other choices include "move", "copy",
588 "link", "symlink", "relsymlink", and "hardlink". "link" is a
589 special transfer mode that will first try to make a hardlink and
590 if that fails a symlink will be used instead. "relsymlink" creates
591 a relative symlink rather than use an absolute path.
592 Most datastores do not support all transfer modes.
593 "auto" is a special option that will let the
594 data store choose the most natural option for itself.
595 record_validation_info : `bool`, optional
596 If `True`, the default, the datastore can record validation
597 information associated with the file. If `False` the datastore
598 will not attempt to track any information such as checksums
599 or file sizes. This can be useful if such information is tracked
600 in an external system or if the file is to be compressed in place.
601 It is up to the datastore whether this parameter is relevant.
603 Raises
604 ------
605 NotImplementedError
606 Raised if the datastore does not support the given transfer mode
607 (including the case where ingest is not supported at all).
608 DatasetTypeNotSupportedError
609 Raised if one or more files to be ingested have a dataset type that
610 is not supported by the datastore.
611 FileNotFoundError
612 Raised if one of the given files does not exist.
613 FileExistsError
614 Raised if transfer is not `None` but the (internal) location the
615 file would be moved to is already occupied.
617 Notes
618 -----
619 Subclasses should implement `_prepIngest` and `_finishIngest` instead
620 of implementing `ingest` directly. Datastores that hold and
621 delegate to child datastores may want to call those methods as well.
623 Subclasses are encouraged to document their supported transfer modes
624 in their class documentation.
625 """
626 # Allow a datastore to select a default transfer mode
627 transfer = self._overrideTransferMode(*datasets, transfer=transfer)
628 prepData = self._prepIngest(*datasets, transfer=transfer)
629 refs = {ref.id: ref for dataset in datasets for ref in dataset.refs}
630 if None in refs:
631 # Find the file for the error message. There may be multiple
632 # bad refs so look for all of them.
633 unresolved_paths = {}
634 for dataset in datasets:
635 unresolved = []
636 for ref in dataset.refs:
637 if ref.id is None:
638 unresolved.append(ref)
639 if unresolved:
640 unresolved_paths[dataset.path] = unresolved
641 raise RuntimeError(
642 "Attempt to ingest unresolved DatasetRef from: "
643 + ",".join(f"{p}: ({[str(r) for r in ref]})" for p, ref in unresolved_paths.items())
644 )
645 if refs.keys() != prepData.refs.keys():
646 unsupported = refs.keys() - prepData.refs.keys()
647 # Group unsupported refs by DatasetType for an informative
648 # but still concise error message.
649 byDatasetType = defaultdict(list)
650 for datasetId in unsupported:
651 ref = refs[datasetId]
652 byDatasetType[ref.datasetType].append(ref)
653 raise DatasetTypeNotSupportedError(
654 "DatasetType(s) not supported in ingest: "
655 + ", ".join(f"{k.name} ({len(v)} dataset(s))" for k, v in byDatasetType.items())
656 )
657 self._finishIngest(prepData, transfer=transfer, record_validation_info=record_validation_info)
659 def transfer_from(
660 self,
661 source_datastore: Datastore,
662 refs: Iterable[DatasetRef],
663 local_refs: Optional[Iterable[DatasetRef]] = None,
664 transfer: str = "auto",
665 artifact_existence: Optional[Dict[ResourcePath, bool]] = None,
666 ) -> None:
667 """Transfer dataset artifacts from another datastore to this one.
669 Parameters
670 ----------
671 source_datastore : `Datastore`
672 The datastore from which to transfer artifacts. That datastore
673 must be compatible with this datastore receiving the artifacts.
674 refs : iterable of `DatasetRef`
675 The datasets to transfer from the source datastore.
676 local_refs : iterable of `DatasetRef`, optional
677 The dataset refs associated with the registry associated with
678 this datastore. Can be `None` if the source and target datastore
679 are using UUIDs.
680 transfer : `str`, optional
681 How (and whether) the dataset should be added to the datastore.
682 Choices include "move", "copy",
683 "link", "symlink", "relsymlink", and "hardlink". "link" is a
684 special transfer mode that will first try to make a hardlink and
685 if that fails a symlink will be used instead. "relsymlink" creates
686 a relative symlink rather than use an absolute path.
687 Most datastores do not support all transfer modes.
688 "auto" (the default) is a special option that will let the
689 data store choose the most natural option for itself.
690 If the source location and transfer location are identical the
691 transfer mode will be ignored.
692 artifact_existence : `dict` [`lsst.resources.ResourcePath`, `bool`]
693 Optional mapping of datastore artifact to existence. Updated by
694 this method with details of all artifacts tested. Can be `None`
695 if the caller is not interested.
697 Raises
698 ------
699 TypeError
700 Raised if the two datastores are not compatible.
701 """
702 if type(self) is not type(source_datastore):
703 raise TypeError(
704 f"Datastore mismatch between this datastore ({type(self)}) and the "
705 f"source datastore ({type(source_datastore)})."
706 )
708 raise NotImplementedError(f"Datastore {type(self)} must implement a transfer_from method.")
710 @abstractmethod
711 def getURIs(
712 self, datasetRef: DatasetRef, predict: bool = False
713 ) -> Tuple[Optional[ResourcePath], Dict[str, ResourcePath]]:
714 """Return URIs associated with dataset.
716 Parameters
717 ----------
718 ref : `DatasetRef`
719 Reference to the required dataset.
720 predict : `bool`, optional
721 If the datastore does not know about the dataset, should it
722 return a predicted URI or not?
724 Returns
725 -------
726 primary : `lsst.resources.ResourcePath`
727 The URI to the primary artifact associated with this dataset.
728 If the dataset was disassembled within the datastore this
729 may be `None`.
730 components : `dict`
731 URIs to any components associated with the dataset artifact.
732 Can be empty if there are no components.
733 """
734 raise NotImplementedError()
736 @abstractmethod
737 def getURI(self, datasetRef: DatasetRef, predict: bool = False) -> ResourcePath:
738 """URI to the Dataset.
740 Parameters
741 ----------
742 datasetRef : `DatasetRef`
743 Reference to the required Dataset.
744 predict : `bool`
745 If `True` attempt to predict the URI for a dataset if it does
746 not exist in datastore.
748 Returns
749 -------
750 uri : `str`
751 URI string pointing to the Dataset within the datastore. If the
752 Dataset does not exist in the datastore, the URI may be a guess.
753 If the datastore does not have entities that relate well
754 to the concept of a URI the returned URI string will be
755 descriptive. The returned URI is not guaranteed to be obtainable.
757 Raises
758 ------
759 FileNotFoundError
760 A URI has been requested for a dataset that does not exist and
761 guessing is not allowed.
762 """
763 raise NotImplementedError("Must be implemented by subclass")
765 @abstractmethod
766 def retrieveArtifacts(
767 self,
768 refs: Iterable[DatasetRef],
769 destination: ResourcePath,
770 transfer: str = "auto",
771 preserve_path: bool = True,
772 overwrite: bool = False,
773 ) -> List[ResourcePath]:
774 """Retrieve the artifacts associated with the supplied refs.
776 Parameters
777 ----------
778 refs : iterable of `DatasetRef`
779 The datasets for which artifacts are to be retrieved.
780 A single ref can result in multiple artifacts. The refs must
781 be resolved.
782 destination : `lsst.resources.ResourcePath`
783 Location to write the artifacts.
784 transfer : `str`, optional
785 Method to use to transfer the artifacts. Must be one of the options
786 supported by `lsst.resources.ResourcePath.transfer_from()`.
787 "move" is not allowed.
788 preserve_path : `bool`, optional
789 If `True` the full path of the artifact within the datastore
790 is preserved. If `False` the final file component of the path
791 is used.
792 overwrite : `bool`, optional
793 If `True` allow transfers to overwrite existing files at the
794 destination.
796 Returns
797 -------
798 targets : `list` of `lsst.resources.ResourcePath`
799 URIs of file artifacts in destination location. Order is not
800 preserved.
802 Notes
803 -----
804 For non-file datastores the artifacts written to the destination
805 may not match the representation inside the datastore. For example
806 a hierarchichal data structure in a NoSQL database may well be stored
807 as a JSON file.
808 """
809 raise NotImplementedError()
811 @abstractmethod
812 def remove(self, datasetRef: DatasetRef) -> None:
813 """Indicate to the Datastore that a Dataset can be removed.
815 Parameters
816 ----------
817 datasetRef : `DatasetRef`
818 Reference to the required Dataset.
820 Raises
821 ------
822 FileNotFoundError
823 When Dataset does not exist.
825 Notes
826 -----
827 Some Datastores may implement this method as a silent no-op to
828 disable Dataset deletion through standard interfaces.
829 """
830 raise NotImplementedError("Must be implemented by subclass")
832 @abstractmethod
833 def forget(self, refs: Iterable[DatasetRef]) -> None:
834 """Indicate to the Datastore that it should remove all records of the
835 given datasets, without actually deleting them.
837 Parameters
838 ----------
839 refs : `Iterable` [ `DatasetRef` ]
840 References to the datasets being forgotten.
842 Notes
843 -----
844 Asking a datastore to forget a `DatasetRef` it does not hold should be
845 a silent no-op, not an error.
846 """
847 raise NotImplementedError("Must be implemented by subclass")
849 @abstractmethod
850 def trash(self, ref: Union[DatasetRef, Iterable[DatasetRef]], ignore_errors: bool = True) -> None:
851 """Indicate to the Datastore that a Dataset can be moved to the trash.
853 Parameters
854 ----------
855 ref : `DatasetRef` or iterable thereof
856 Reference(s) to the required Dataset.
857 ignore_errors : `bool`, optional
858 Determine whether errors should be ignored. When multiple
859 refs are being trashed there will be no per-ref check.
861 Raises
862 ------
863 FileNotFoundError
864 When Dataset does not exist and errors are not ignored. Only
865 checked if a single ref is supplied (and not in a list).
867 Notes
868 -----
869 Some Datastores may implement this method as a silent no-op to
870 disable Dataset deletion through standard interfaces.
871 """
872 raise NotImplementedError("Must be implemented by subclass")
874 @abstractmethod
875 def emptyTrash(self, ignore_errors: bool = True) -> None:
876 """Remove all datasets from the trash.
878 Parameters
879 ----------
880 ignore_errors : `bool`, optional
881 Determine whether errors should be ignored.
883 Notes
884 -----
885 Some Datastores may implement this method as a silent no-op to
886 disable Dataset deletion through standard interfaces.
887 """
888 raise NotImplementedError("Must be implemented by subclass")
890 @abstractmethod
891 def transfer(self, inputDatastore: Datastore, datasetRef: DatasetRef) -> None:
892 """Transfer a dataset from another datastore to this datastore.
894 Parameters
895 ----------
896 inputDatastore : `Datastore`
897 The external `Datastore` from which to retrieve the Dataset.
898 datasetRef : `DatasetRef`
899 Reference to the required Dataset.
900 """
901 raise NotImplementedError("Must be implemented by subclass")
903 def export(
904 self, refs: Iterable[DatasetRef], *, directory: Optional[str] = None, transfer: Optional[str] = None
905 ) -> Iterable[FileDataset]:
906 """Export datasets for transfer to another data repository.
908 Parameters
909 ----------
910 refs : iterable of `DatasetRef`
911 Dataset references to be exported.
912 directory : `str`, optional
913 Path to a directory that should contain files corresponding to
914 output datasets. Ignored if ``transfer`` is `None`.
915 transfer : `str`, optional
916 Mode that should be used to move datasets out of the repository.
917 Valid options are the same as those of the ``transfer`` argument
918 to ``ingest``, and datastores may similarly signal that a transfer
919 mode is not supported by raising `NotImplementedError`.
921 Returns
922 -------
923 dataset : iterable of `DatasetTransfer`
924 Structs containing information about the exported datasets, in the
925 same order as ``refs``.
927 Raises
928 ------
929 NotImplementedError
930 Raised if the given transfer mode is not supported.
931 """
932 raise NotImplementedError(f"Transfer mode {transfer} not supported.")
934 @abstractmethod
935 def validateConfiguration(
936 self, entities: Iterable[Union[DatasetRef, DatasetType, StorageClass]], logFailures: bool = False
937 ) -> None:
938 """Validate some of the configuration for this datastore.
940 Parameters
941 ----------
942 entities : iterable of `DatasetRef`, `DatasetType`, or `StorageClass`
943 Entities to test against this configuration. Can be differing
944 types.
945 logFailures : `bool`, optional
946 If `True`, output a log message for every validation error
947 detected.
949 Raises
950 ------
951 DatastoreValidationError
952 Raised if there is a validation problem with a configuration.
954 Notes
955 -----
956 Which parts of the configuration are validated is at the discretion
957 of each Datastore implementation.
958 """
959 raise NotImplementedError("Must be implemented by subclass")
961 @abstractmethod
962 def validateKey(self, lookupKey: LookupKey, entity: Union[DatasetRef, DatasetType, StorageClass]) -> None:
963 """Validate a specific look up key with supplied entity.
965 Parameters
966 ----------
967 lookupKey : `LookupKey`
968 Key to use to retrieve information from the datastore
969 configuration.
970 entity : `DatasetRef`, `DatasetType`, or `StorageClass`
971 Entity to compare with configuration retrieved using the
972 specified lookup key.
974 Raises
975 ------
976 DatastoreValidationError
977 Raised if there is a problem with the combination of entity
978 and lookup key.
980 Notes
981 -----
982 Bypasses the normal selection priorities by allowing a key that
983 would normally not be selected to be validated.
984 """
985 raise NotImplementedError("Must be implemented by subclass")
987 @abstractmethod
988 def getLookupKeys(self) -> Set[LookupKey]:
989 """Return all the lookup keys relevant to this datastore.
991 Returns
992 -------
993 keys : `set` of `LookupKey`
994 The keys stored internally for looking up information based
995 on `DatasetType` name or `StorageClass`.
996 """
997 raise NotImplementedError("Must be implemented by subclass")
999 def needs_expanded_data_ids(
1000 self,
1001 transfer: Optional[str],
1002 entity: Optional[Union[DatasetRef, DatasetType, StorageClass]] = None,
1003 ) -> bool:
1004 """Test whether this datastore needs expanded data IDs to ingest.
1006 Parameters
1007 ----------
1008 transfer : `str` or `None`
1009 Transfer mode for ingest.
1010 entity, optional
1011 Object representing what will be ingested. If not provided (or not
1012 specific enough), `True` may be returned even if expanded data
1013 IDs aren't necessary.
1015 Returns
1016 -------
1017 needed : `bool`
1018 If `True`, expanded data IDs may be needed. `False` only if
1019 expansion definitely isn't necessary.
1020 """
1021 return True
1023 @abstractmethod
1024 def import_records(
1025 self,
1026 data: Mapping[str, DatastoreRecordData],
1027 ) -> None:
1028 """Import datastore location and record data from an in-memory data
1029 structure.
1031 Parameters
1032 ----------
1033 data : `Mapping` [ `str`, `DatastoreRecordData` ]
1034 Datastore records indexed by datastore name. May contain data for
1035 other `Datastore` instances (generally because they are chained to
1036 this one), which should be ignored.
1038 Notes
1039 -----
1040 Implementations should generally not check that any external resources
1041 (e.g. files) referred to by these records actually exist, for
1042 performance reasons; we expect higher-level code to guarantee that they
1043 do.
1045 Implementations are responsible for calling
1046 `DatastoreRegistryBridge.insert` on all datasets in ``data.locations``
1047 where the key is in `names`, as well as loading any opaque table data.
1048 """
1049 raise NotImplementedError()
1051 @abstractmethod
1052 def export_records(
1053 self,
1054 refs: Iterable[DatasetIdRef],
1055 ) -> Mapping[str, DatastoreRecordData]:
1056 """Export datastore records and locations to an in-memory data
1057 structure.
1059 Parameters
1060 ----------
1061 refs : `Iterable` [ `DatasetIdRef` ]
1062 Datasets to save. This may include datasets not known to this
1063 datastore, which should be ignored.
1065 Returns
1066 -------
1067 data : `Mapping` [ `str`, `DatastoreRecordData` ]
1068 Exported datastore records indexed by datastore name.
1069 """
1070 raise NotImplementedError()