Coverage for python/lsst/daf/butler/core/datastore.py: 42%
244 statements
« prev ^ index » next coverage.py v6.5.0, created at 2022-10-21 02:03 -0700
« prev ^ index » next coverage.py v6.5.0, created at 2022-10-21 02:03 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Support for generic data stores."""
24from __future__ import annotations
26__all__ = ("DatastoreConfig", "Datastore", "DatastoreValidationError", "DatasetRefURIs")
28import contextlib
29import dataclasses
30import logging
31from abc import ABCMeta, abstractmethod
32from collections import abc, defaultdict
33from typing import (
34 TYPE_CHECKING,
35 Any,
36 Callable,
37 ClassVar,
38 Dict,
39 Iterable,
40 Iterator,
41 List,
42 Mapping,
43 Optional,
44 Set,
45 Tuple,
46 Type,
47 Union,
48)
50from lsst.utils import doImportType
52from .config import Config, ConfigSubset
53from .constraints import Constraints
54from .exceptions import DatasetTypeNotSupportedError, ValidationError
55from .fileDataset import FileDataset
56from .storageClass import StorageClassFactory
58if TYPE_CHECKING: 58 ↛ 59line 58 didn't jump to line 59, because the condition on line 58 was never true
59 from lsst.resources import ResourcePath, ResourcePathExpression
61 from ..registry.interfaces import DatasetIdRef, DatastoreRegistryBridgeManager
62 from .configSupport import LookupKey
63 from .datasets import DatasetRef, DatasetType
64 from .datastoreRecordData import DatastoreRecordData
65 from .storageClass import StorageClass
68class DatastoreConfig(ConfigSubset):
69 """Configuration for Datastores."""
71 component = "datastore"
72 requiredKeys = ("cls",)
73 defaultConfigFile = "datastore.yaml"
76class DatastoreValidationError(ValidationError):
77 """There is a problem with the Datastore configuration."""
79 pass
82@dataclasses.dataclass(frozen=True)
83class Event:
84 __slots__ = {"name", "undoFunc", "args", "kwargs"}
85 name: str
86 undoFunc: Callable
87 args: tuple
88 kwargs: dict
91class IngestPrepData:
92 """A helper base class for `Datastore` ingest implementations.
94 Datastore implementations will generally need a custom implementation of
95 this class.
97 Should be accessed as ``Datastore.IngestPrepData`` instead of via direct
98 import.
100 Parameters
101 ----------
102 refs : iterable of `DatasetRef`
103 References for the datasets that can be ingested by this datastore.
104 """
106 def __init__(self, refs: Iterable[DatasetRef]):
107 self.refs = {ref.id: ref for ref in refs}
110class DatastoreTransaction:
111 """Keeps a log of `Datastore` activity and allow rollback.
113 Parameters
114 ----------
115 parent : `DatastoreTransaction`, optional
116 The parent transaction (if any)
117 """
119 Event: ClassVar[Type] = Event
121 parent: Optional[DatastoreTransaction]
122 """The parent transaction. (`DatastoreTransaction`, optional)"""
124 def __init__(self, parent: Optional[DatastoreTransaction] = None):
125 self.parent = parent
126 self._log: List[Event] = []
128 def registerUndo(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> None:
129 """Register event with undo function.
131 Parameters
132 ----------
133 name : `str`
134 Name of the event.
135 undoFunc : func
136 Function to undo this event.
137 args : `tuple`
138 Positional arguments to `undoFunc`.
139 **kwargs
140 Keyword arguments to `undoFunc`.
141 """
142 self._log.append(self.Event(name, undoFunc, args, kwargs))
144 @contextlib.contextmanager
145 def undoWith(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> Iterator[None]:
146 """Register undo function if nested operation succeeds.
148 Calls `registerUndo`.
150 This can be used to wrap individual undo-able statements within a
151 DatastoreTransaction block. Multiple statements that can fail
152 separately should not be part of the same `undoWith` block.
154 All arguments are forwarded directly to `registerUndo`.
155 """
156 try:
157 yield None
158 except BaseException:
159 raise
160 else:
161 self.registerUndo(name, undoFunc, *args, **kwargs)
163 def rollback(self) -> None:
164 """Roll back all events in this transaction."""
165 log = logging.getLogger(__name__)
166 while self._log:
167 ev = self._log.pop()
168 try:
169 log.debug(
170 "Rolling back transaction: %s: %s(%s,%s)",
171 ev.name,
172 ev.undoFunc,
173 ",".join(str(a) for a in ev.args),
174 ",".join(f"{k}={v}" for k, v in ev.kwargs.items()),
175 )
176 except Exception:
177 # In case we had a problem in stringification of arguments
178 log.warning("Rolling back transaction: %s", ev.name)
179 try:
180 ev.undoFunc(*ev.args, **ev.kwargs)
181 except BaseException as e:
182 # Deliberately swallow error that may occur in unrolling
183 log.warning("Exception: %s caught while unrolling: %s", e, ev.name)
184 pass
186 def commit(self) -> None:
187 """Commit this transaction."""
188 if self.parent is None:
189 # Just forget about the events, they have already happened.
190 return
191 else:
192 # We may still want to events from this transaction as part of
193 # the parent.
194 self.parent._log.extend(self._log)
197@dataclasses.dataclass
198class DatasetRefURIs(abc.Sequence):
199 """Represents the primary and component ResourcePath(s) associated with a
200 DatasetRef.
202 This is used in places where its members used to be represented as a tuple
203 `(primaryURI, componentURIs)`. To maintain backward compatibility this
204 inherits from Sequence and so instances can be treated as a two-item
205 tuple.
206 """
208 def __init__(
209 self,
210 primaryURI: Optional[ResourcePath] = None,
211 componentURIs: Optional[Dict[str, ResourcePath]] = None,
212 ):
214 self.primaryURI = primaryURI
215 """The URI to the primary artifact associated with this dataset. If the
216 dataset was disassembled within the datastore this may be `None`.
217 """
219 self.componentURIs = componentURIs or {}
220 """The URIs to any components associated with the dataset artifact
221 indexed by component name. This can be empty if there are no
222 components.
223 """
225 def __getitem__(self, index: Any) -> Any:
226 """Get primaryURI and componentURIs by index.
228 Provides support for tuple-like access.
229 """
230 if index == 0:
231 return self.primaryURI
232 elif index == 1:
233 return self.componentURIs
234 raise IndexError("list index out of range")
236 def __len__(self) -> int:
237 """Get the number of data members.
239 Provides support for tuple-like access.
240 """
241 return 2
243 def __repr__(self) -> str:
244 return f"DatasetRefURIs({repr(self.primaryURI)}, {repr(self.componentURIs)})"
247class Datastore(metaclass=ABCMeta):
248 """Datastore interface.
250 Parameters
251 ----------
252 config : `DatastoreConfig` or `str`
253 Load configuration either from an existing config instance or by
254 referring to a configuration file.
255 bridgeManager : `DatastoreRegistryBridgeManager`
256 Object that manages the interface between `Registry` and datastores.
257 butlerRoot : `str`, optional
258 New datastore root to use to override the configuration value.
259 """
261 defaultConfigFile: ClassVar[Optional[str]] = None
262 """Path to configuration defaults. Accessed within the ``config`` resource
263 or relative to a search path. Can be None if no defaults specified.
264 """
266 containerKey: ClassVar[Optional[str]] = None
267 """Name of the key containing a list of subconfigurations that also
268 need to be merged with defaults and will likely use different Python
269 datastore classes (but all using DatastoreConfig). Assumed to be a
270 list of configurations that can be represented in a DatastoreConfig
271 and containing a "cls" definition. None indicates that no containers
272 are expected in this Datastore."""
274 isEphemeral: bool = False
275 """Indicate whether this Datastore is ephemeral or not. An ephemeral
276 datastore is one where the contents of the datastore will not exist
277 across process restarts. This value can change per-instance."""
279 config: DatastoreConfig
280 """Configuration used to create Datastore."""
282 name: str
283 """Label associated with this Datastore."""
285 storageClassFactory: StorageClassFactory
286 """Factory for creating storage class instances from name."""
288 constraints: Constraints
289 """Constraints to apply when putting datasets into the datastore."""
291 # MyPy does not like for this to be annotated as any kind of type, because
292 # it can't do static checking on type variables that can change at runtime.
293 IngestPrepData: ClassVar[Any] = IngestPrepData
294 """Helper base class for ingest implementations.
295 """
297 @classmethod
298 @abstractmethod
299 def setConfigRoot(cls, root: str, config: Config, full: Config, overwrite: bool = True) -> None:
300 """Set filesystem-dependent config options for this datastore.
302 The options will be appropriate for a new empty repository with the
303 given root.
305 Parameters
306 ----------
307 root : `str`
308 Filesystem path to the root of the data repository.
309 config : `Config`
310 A `Config` to update. Only the subset understood by
311 this component will be updated. Will not expand
312 defaults.
313 full : `Config`
314 A complete config with all defaults expanded that can be
315 converted to a `DatastoreConfig`. Read-only and will not be
316 modified by this method.
317 Repository-specific options that should not be obtained
318 from defaults when Butler instances are constructed
319 should be copied from ``full`` to ``config``.
320 overwrite : `bool`, optional
321 If `False`, do not modify a value in ``config`` if the value
322 already exists. Default is always to overwrite with the provided
323 ``root``.
325 Notes
326 -----
327 If a keyword is explicitly defined in the supplied ``config`` it
328 will not be overridden by this method if ``overwrite`` is `False`.
329 This allows explicit values set in external configs to be retained.
330 """
331 raise NotImplementedError()
333 @staticmethod
334 def fromConfig(
335 config: Config,
336 bridgeManager: DatastoreRegistryBridgeManager,
337 butlerRoot: Optional[ResourcePathExpression] = None,
338 ) -> "Datastore":
339 """Create datastore from type specified in config file.
341 Parameters
342 ----------
343 config : `Config`
344 Configuration instance.
345 bridgeManager : `DatastoreRegistryBridgeManager`
346 Object that manages the interface between `Registry` and
347 datastores.
348 butlerRoot : `str`, optional
349 Butler root directory.
350 """
351 cls = doImportType(config["datastore", "cls"])
352 if not issubclass(cls, Datastore):
353 raise TypeError(f"Imported child class {config['datastore', 'cls']} is not a Datastore")
354 return cls(config=config, bridgeManager=bridgeManager, butlerRoot=butlerRoot)
356 def __init__(
357 self,
358 config: Union[Config, str],
359 bridgeManager: DatastoreRegistryBridgeManager,
360 butlerRoot: Optional[ResourcePathExpression] = None,
361 ):
362 self.config = DatastoreConfig(config)
363 self.name = "ABCDataStore"
364 self._transaction: Optional[DatastoreTransaction] = None
366 # All Datastores need storage classes and constraints
367 self.storageClassFactory = StorageClassFactory()
369 # And read the constraints list
370 constraintsConfig = self.config.get("constraints")
371 self.constraints = Constraints(constraintsConfig, universe=bridgeManager.universe)
373 def __str__(self) -> str:
374 return self.name
376 def __repr__(self) -> str:
377 return self.name
379 @property
380 def names(self) -> Tuple[str, ...]:
381 """Names associated with this datastore returned as a list.
383 Can be different to ``name`` for a chaining datastore.
384 """
385 # Default implementation returns solely the name itself
386 return (self.name,)
388 @contextlib.contextmanager
389 def transaction(self) -> Iterator[DatastoreTransaction]:
390 """Context manager supporting `Datastore` transactions.
392 Transactions can be nested, and are to be used in combination with
393 `Registry.transaction`.
394 """
395 self._transaction = DatastoreTransaction(self._transaction)
396 try:
397 yield self._transaction
398 except BaseException:
399 self._transaction.rollback()
400 raise
401 else:
402 self._transaction.commit()
403 self._transaction = self._transaction.parent
405 @abstractmethod
406 def knows(self, ref: DatasetRef) -> bool:
407 """Check if the dataset is known to the datastore.
409 Does not check for existence of any artifact.
411 Parameters
412 ----------
413 ref : `DatasetRef`
414 Reference to the required dataset.
416 Returns
417 -------
418 exists : `bool`
419 `True` if the dataset is known to the datastore.
420 """
421 raise NotImplementedError()
423 def mexists(
424 self, refs: Iterable[DatasetRef], artifact_existence: Optional[Dict[ResourcePath, bool]] = None
425 ) -> Dict[DatasetRef, bool]:
426 """Check the existence of multiple datasets at once.
428 Parameters
429 ----------
430 refs : iterable of `DatasetRef`
431 The datasets to be checked.
432 artifact_existence : `dict` [`lsst.resources.ResourcePath`, `bool`]
433 Optional mapping of datastore artifact to existence. Updated by
434 this method with details of all artifacts tested. Can be `None`
435 if the caller is not interested.
437 Returns
438 -------
439 existence : `dict` of [`DatasetRef`, `bool`]
440 Mapping from dataset to boolean indicating existence.
441 """
442 existence: Dict[DatasetRef, bool] = {}
443 # Non-optimized default.
444 for ref in refs:
445 existence[ref] = self.exists(ref)
446 return existence
448 @abstractmethod
449 def exists(self, datasetRef: DatasetRef) -> bool:
450 """Check if the dataset exists in the datastore.
452 Parameters
453 ----------
454 datasetRef : `DatasetRef`
455 Reference to the required dataset.
457 Returns
458 -------
459 exists : `bool`
460 `True` if the entity exists in the `Datastore`.
461 """
462 raise NotImplementedError("Must be implemented by subclass")
464 @abstractmethod
465 def get(
466 self,
467 datasetRef: DatasetRef,
468 parameters: Mapping[str, Any] = None,
469 storageClass: Optional[Union[StorageClass, str]] = None,
470 ) -> Any:
471 """Load an `InMemoryDataset` from the store.
473 Parameters
474 ----------
475 datasetRef : `DatasetRef`
476 Reference to the required Dataset.
477 parameters : `dict`
478 `StorageClass`-specific parameters that specify a slice of the
479 Dataset to be loaded.
480 storageClass : `StorageClass` or `str`, optional
481 The storage class to be used to override the Python type
482 returned by this method. By default the returned type matches
483 the dataset type definition for this dataset. Specifying a
484 read `StorageClass` can force a different type to be returned.
485 This type must be compatible with the original type.
487 Returns
488 -------
489 inMemoryDataset : `object`
490 Requested Dataset or slice thereof as an InMemoryDataset.
491 """
492 raise NotImplementedError("Must be implemented by subclass")
494 @abstractmethod
495 def put(self, inMemoryDataset: Any, datasetRef: DatasetRef) -> None:
496 """Write a `InMemoryDataset` with a given `DatasetRef` to the store.
498 Parameters
499 ----------
500 inMemoryDataset : `object`
501 The Dataset to store.
502 datasetRef : `DatasetRef`
503 Reference to the associated Dataset.
504 """
505 raise NotImplementedError("Must be implemented by subclass")
507 def _overrideTransferMode(self, *datasets: FileDataset, transfer: Optional[str] = None) -> Optional[str]:
508 """Allow ingest transfer mode to be defaulted based on datasets.
510 Parameters
511 ----------
512 datasets : `FileDataset`
513 Each positional argument is a struct containing information about
514 a file to be ingested, including its path (either absolute or
515 relative to the datastore root, if applicable), a complete
516 `DatasetRef` (with ``dataset_id not None``), and optionally a
517 formatter class or its fully-qualified string name. If a formatter
518 is not provided, this method should populate that attribute with
519 the formatter the datastore would use for `put`. Subclasses are
520 also permitted to modify the path attribute (typically to put it
521 in what the datastore considers its standard form).
522 transfer : `str`, optional
523 How (and whether) the dataset should be added to the datastore.
524 See `ingest` for details of transfer modes.
526 Returns
527 -------
528 newTransfer : `str`
529 Transfer mode to use. Will be identical to the supplied transfer
530 mode unless "auto" is used.
531 """
532 if transfer != "auto":
533 return transfer
534 raise RuntimeError(f"{transfer} is not allowed without specialization.")
536 def _prepIngest(self, *datasets: FileDataset, transfer: Optional[str] = None) -> IngestPrepData:
537 """Process datasets to identify which ones can be ingested.
539 Parameters
540 ----------
541 datasets : `FileDataset`
542 Each positional argument is a struct containing information about
543 a file to be ingested, including its path (either absolute or
544 relative to the datastore root, if applicable), a complete
545 `DatasetRef` (with ``dataset_id not None``), and optionally a
546 formatter class or its fully-qualified string name. If a formatter
547 is not provided, this method should populate that attribute with
548 the formatter the datastore would use for `put`. Subclasses are
549 also permitted to modify the path attribute (typically to put it
550 in what the datastore considers its standard form).
551 transfer : `str`, optional
552 How (and whether) the dataset should be added to the datastore.
553 See `ingest` for details of transfer modes.
555 Returns
556 -------
557 data : `IngestPrepData`
558 An instance of a subclass of `IngestPrepData`, used to pass
559 arbitrary data from `_prepIngest` to `_finishIngest`. This should
560 include only the datasets this datastore can actually ingest;
561 others should be silently ignored (`Datastore.ingest` will inspect
562 `IngestPrepData.refs` and raise `DatasetTypeNotSupportedError` if
563 necessary).
565 Raises
566 ------
567 NotImplementedError
568 Raised if the datastore does not support the given transfer mode
569 (including the case where ingest is not supported at all).
570 FileNotFoundError
571 Raised if one of the given files does not exist.
572 FileExistsError
573 Raised if transfer is not `None` but the (internal) location the
574 file would be moved to is already occupied.
576 Notes
577 -----
578 This method (along with `_finishIngest`) should be implemented by
579 subclasses to provide ingest support instead of implementing `ingest`
580 directly.
582 `_prepIngest` should not modify the data repository or given files in
583 any way; all changes should be deferred to `_finishIngest`.
585 When possible, exceptions should be raised in `_prepIngest` instead of
586 `_finishIngest`. `NotImplementedError` exceptions that indicate that
587 the transfer mode is not supported must be raised by `_prepIngest`
588 instead of `_finishIngest`.
589 """
590 raise NotImplementedError(f"Datastore {self} does not support direct file-based ingest.")
592 def _finishIngest(
593 self, prepData: IngestPrepData, *, transfer: Optional[str] = None, record_validation_info: bool = True
594 ) -> None:
595 """Complete an ingest operation.
597 Parameters
598 ----------
599 data : `IngestPrepData`
600 An instance of a subclass of `IngestPrepData`. Guaranteed to be
601 the direct result of a call to `_prepIngest` on this datastore.
602 transfer : `str`, optional
603 How (and whether) the dataset should be added to the datastore.
604 See `ingest` for details of transfer modes.
605 record_validation_info : `bool`, optional
606 If `True`, the default, the datastore can record validation
607 information associated with the file. If `False` the datastore
608 will not attempt to track any information such as checksums
609 or file sizes. This can be useful if such information is tracked
610 in an external system or if the file is to be compressed in place.
611 It is up to the datastore whether this parameter is relevant.
613 Raises
614 ------
615 FileNotFoundError
616 Raised if one of the given files does not exist.
617 FileExistsError
618 Raised if transfer is not `None` but the (internal) location the
619 file would be moved to is already occupied.
621 Notes
622 -----
623 This method (along with `_prepIngest`) should be implemented by
624 subclasses to provide ingest support instead of implementing `ingest`
625 directly.
626 """
627 raise NotImplementedError(f"Datastore {self} does not support direct file-based ingest.")
629 def ingest(
630 self, *datasets: FileDataset, transfer: Optional[str] = None, record_validation_info: bool = True
631 ) -> None:
632 """Ingest one or more files into the datastore.
634 Parameters
635 ----------
636 datasets : `FileDataset`
637 Each positional argument is a struct containing information about
638 a file to be ingested, including its path (either absolute or
639 relative to the datastore root, if applicable), a complete
640 `DatasetRef` (with ``dataset_id not None``), and optionally a
641 formatter class or its fully-qualified string name. If a formatter
642 is not provided, the one the datastore would use for ``put`` on
643 that dataset is assumed.
644 transfer : `str`, optional
645 How (and whether) the dataset should be added to the datastore.
646 If `None` (default), the file must already be in a location
647 appropriate for the datastore (e.g. within its root directory),
648 and will not be modified. Other choices include "move", "copy",
649 "link", "symlink", "relsymlink", and "hardlink". "link" is a
650 special transfer mode that will first try to make a hardlink and
651 if that fails a symlink will be used instead. "relsymlink" creates
652 a relative symlink rather than use an absolute path.
653 Most datastores do not support all transfer modes.
654 "auto" is a special option that will let the
655 data store choose the most natural option for itself.
656 record_validation_info : `bool`, optional
657 If `True`, the default, the datastore can record validation
658 information associated with the file. If `False` the datastore
659 will not attempt to track any information such as checksums
660 or file sizes. This can be useful if such information is tracked
661 in an external system or if the file is to be compressed in place.
662 It is up to the datastore whether this parameter is relevant.
664 Raises
665 ------
666 NotImplementedError
667 Raised if the datastore does not support the given transfer mode
668 (including the case where ingest is not supported at all).
669 DatasetTypeNotSupportedError
670 Raised if one or more files to be ingested have a dataset type that
671 is not supported by the datastore.
672 FileNotFoundError
673 Raised if one of the given files does not exist.
674 FileExistsError
675 Raised if transfer is not `None` but the (internal) location the
676 file would be moved to is already occupied.
678 Notes
679 -----
680 Subclasses should implement `_prepIngest` and `_finishIngest` instead
681 of implementing `ingest` directly. Datastores that hold and
682 delegate to child datastores may want to call those methods as well.
684 Subclasses are encouraged to document their supported transfer modes
685 in their class documentation.
686 """
687 # Allow a datastore to select a default transfer mode
688 transfer = self._overrideTransferMode(*datasets, transfer=transfer)
689 prepData = self._prepIngest(*datasets, transfer=transfer)
690 refs = {ref.id: ref for dataset in datasets for ref in dataset.refs}
691 if None in refs:
692 # Find the file for the error message. There may be multiple
693 # bad refs so look for all of them.
694 unresolved_paths = {}
695 for dataset in datasets:
696 unresolved = []
697 for ref in dataset.refs:
698 if ref.id is None:
699 unresolved.append(ref)
700 if unresolved:
701 unresolved_paths[dataset.path] = unresolved
702 raise RuntimeError(
703 "Attempt to ingest unresolved DatasetRef from: "
704 + ",".join(f"{p}: ({[str(r) for r in ref]})" for p, ref in unresolved_paths.items())
705 )
706 if refs.keys() != prepData.refs.keys():
707 unsupported = refs.keys() - prepData.refs.keys()
708 # Group unsupported refs by DatasetType for an informative
709 # but still concise error message.
710 byDatasetType = defaultdict(list)
711 for datasetId in unsupported:
712 ref = refs[datasetId]
713 byDatasetType[ref.datasetType].append(ref)
714 raise DatasetTypeNotSupportedError(
715 "DatasetType(s) not supported in ingest: "
716 + ", ".join(f"{k.name} ({len(v)} dataset(s))" for k, v in byDatasetType.items())
717 )
718 self._finishIngest(prepData, transfer=transfer, record_validation_info=record_validation_info)
720 def transfer_from(
721 self,
722 source_datastore: Datastore,
723 refs: Iterable[DatasetRef],
724 local_refs: Optional[Iterable[DatasetRef]] = None,
725 transfer: str = "auto",
726 artifact_existence: Optional[Dict[ResourcePath, bool]] = None,
727 ) -> None:
728 """Transfer dataset artifacts from another datastore to this one.
730 Parameters
731 ----------
732 source_datastore : `Datastore`
733 The datastore from which to transfer artifacts. That datastore
734 must be compatible with this datastore receiving the artifacts.
735 refs : iterable of `DatasetRef`
736 The datasets to transfer from the source datastore.
737 local_refs : iterable of `DatasetRef`, optional
738 The dataset refs associated with the registry associated with
739 this datastore. Can be `None` if the source and target datastore
740 are using UUIDs.
741 transfer : `str`, optional
742 How (and whether) the dataset should be added to the datastore.
743 Choices include "move", "copy",
744 "link", "symlink", "relsymlink", and "hardlink". "link" is a
745 special transfer mode that will first try to make a hardlink and
746 if that fails a symlink will be used instead. "relsymlink" creates
747 a relative symlink rather than use an absolute path.
748 Most datastores do not support all transfer modes.
749 "auto" (the default) is a special option that will let the
750 data store choose the most natural option for itself.
751 If the source location and transfer location are identical the
752 transfer mode will be ignored.
753 artifact_existence : `dict` [`lsst.resources.ResourcePath`, `bool`]
754 Optional mapping of datastore artifact to existence. Updated by
755 this method with details of all artifacts tested. Can be `None`
756 if the caller is not interested.
758 Raises
759 ------
760 TypeError
761 Raised if the two datastores are not compatible.
762 """
763 if type(self) is not type(source_datastore):
764 raise TypeError(
765 f"Datastore mismatch between this datastore ({type(self)}) and the "
766 f"source datastore ({type(source_datastore)})."
767 )
769 raise NotImplementedError(f"Datastore {type(self)} must implement a transfer_from method.")
771 def getManyURIs(
772 self,
773 refs: Iterable[DatasetRef],
774 predict: bool = False,
775 allow_missing: bool = False,
776 ) -> Dict[DatasetRef, DatasetRefURIs]:
777 """Return URIs associated with many datasets.
779 Parameters
780 ----------
781 refs : iterable of `DatasetIdRef`
782 References to the required datasets.
783 predict : `bool`, optional
784 If the datastore does not know about a dataset, should it
785 return a predicted URI or not?
786 allow_missing : `bool`
787 If `False`, and `predict` is `False`, will raise if a `DatasetRef`
788 does not exist.
790 Returns
791 -------
792 URIs : `dict` of [`DatasetRef`, `DatasetRefUris`]
793 A dict of primary and component URIs, indexed by the passed-in
794 refs.
796 Raises
797 ------
798 FileNotFoundError
799 A URI has been requested for a dataset that does not exist and
800 guessing is not allowed.
802 Notes
803 -----
804 In file-based datastores, getManuURIs does not check that the file is
805 really there, it's assuming it is if datastore is aware of the file
806 then it actually exists.
807 """
808 uris: Dict[DatasetRef, DatasetRefURIs] = {}
809 missing_refs = []
810 for ref in refs:
811 try:
812 uris[ref] = self.getURIs(ref, predict=predict)
813 except FileNotFoundError:
814 missing_refs.append(ref)
815 if missing_refs and not allow_missing:
816 raise FileNotFoundError(
817 "Missing {} refs from datastore out of {} and predict=False.".format(
818 num_missing := len(missing_refs), num_missing + len(uris)
819 )
820 )
821 return uris
823 @abstractmethod
824 def getURIs(self, datasetRef: DatasetRef, predict: bool = False) -> DatasetRefURIs:
825 """Return URIs associated with dataset.
827 Parameters
828 ----------
829 ref : `DatasetRef`
830 Reference to the required dataset.
831 predict : `bool`, optional
832 If the datastore does not know about the dataset, should it
833 return a predicted URI or not?
835 Returns
836 -------
837 uris : `DatasetRefURIs`
838 The URI to the primary artifact associated with this dataset (if
839 the dataset was disassembled within the datastore this may be
840 `None`), and the URIs to any components associated with the dataset
841 artifact. (can be empty if there are no components).
842 """
843 raise NotImplementedError()
845 @abstractmethod
846 def getURI(self, datasetRef: DatasetRef, predict: bool = False) -> ResourcePath:
847 """URI to the Dataset.
849 Parameters
850 ----------
851 datasetRef : `DatasetRef`
852 Reference to the required Dataset.
853 predict : `bool`
854 If `True` attempt to predict the URI for a dataset if it does
855 not exist in datastore.
857 Returns
858 -------
859 uri : `str`
860 URI string pointing to the Dataset within the datastore. If the
861 Dataset does not exist in the datastore, the URI may be a guess.
862 If the datastore does not have entities that relate well
863 to the concept of a URI the returned URI string will be
864 descriptive. The returned URI is not guaranteed to be obtainable.
866 Raises
867 ------
868 FileNotFoundError
869 A URI has been requested for a dataset that does not exist and
870 guessing is not allowed.
871 """
872 raise NotImplementedError("Must be implemented by subclass")
874 @abstractmethod
875 def retrieveArtifacts(
876 self,
877 refs: Iterable[DatasetRef],
878 destination: ResourcePath,
879 transfer: str = "auto",
880 preserve_path: bool = True,
881 overwrite: bool = False,
882 ) -> List[ResourcePath]:
883 """Retrieve the artifacts associated with the supplied refs.
885 Parameters
886 ----------
887 refs : iterable of `DatasetRef`
888 The datasets for which artifacts are to be retrieved.
889 A single ref can result in multiple artifacts. The refs must
890 be resolved.
891 destination : `lsst.resources.ResourcePath`
892 Location to write the artifacts.
893 transfer : `str`, optional
894 Method to use to transfer the artifacts. Must be one of the options
895 supported by `lsst.resources.ResourcePath.transfer_from()`.
896 "move" is not allowed.
897 preserve_path : `bool`, optional
898 If `True` the full path of the artifact within the datastore
899 is preserved. If `False` the final file component of the path
900 is used.
901 overwrite : `bool`, optional
902 If `True` allow transfers to overwrite existing files at the
903 destination.
905 Returns
906 -------
907 targets : `list` of `lsst.resources.ResourcePath`
908 URIs of file artifacts in destination location. Order is not
909 preserved.
911 Notes
912 -----
913 For non-file datastores the artifacts written to the destination
914 may not match the representation inside the datastore. For example
915 a hierarchichal data structure in a NoSQL database may well be stored
916 as a JSON file.
917 """
918 raise NotImplementedError()
920 @abstractmethod
921 def remove(self, datasetRef: DatasetRef) -> None:
922 """Indicate to the Datastore that a Dataset can be removed.
924 Parameters
925 ----------
926 datasetRef : `DatasetRef`
927 Reference to the required Dataset.
929 Raises
930 ------
931 FileNotFoundError
932 When Dataset does not exist.
934 Notes
935 -----
936 Some Datastores may implement this method as a silent no-op to
937 disable Dataset deletion through standard interfaces.
938 """
939 raise NotImplementedError("Must be implemented by subclass")
941 @abstractmethod
942 def forget(self, refs: Iterable[DatasetRef]) -> None:
943 """Indicate to the Datastore that it should remove all records of the
944 given datasets, without actually deleting them.
946 Parameters
947 ----------
948 refs : `Iterable` [ `DatasetRef` ]
949 References to the datasets being forgotten.
951 Notes
952 -----
953 Asking a datastore to forget a `DatasetRef` it does not hold should be
954 a silent no-op, not an error.
955 """
956 raise NotImplementedError("Must be implemented by subclass")
958 @abstractmethod
959 def trash(self, ref: Union[DatasetRef, Iterable[DatasetRef]], ignore_errors: bool = True) -> None:
960 """Indicate to the Datastore that a Dataset can be moved to the trash.
962 Parameters
963 ----------
964 ref : `DatasetRef` or iterable thereof
965 Reference(s) to the required Dataset.
966 ignore_errors : `bool`, optional
967 Determine whether errors should be ignored. When multiple
968 refs are being trashed there will be no per-ref check.
970 Raises
971 ------
972 FileNotFoundError
973 When Dataset does not exist and errors are not ignored. Only
974 checked if a single ref is supplied (and not in a list).
976 Notes
977 -----
978 Some Datastores may implement this method as a silent no-op to
979 disable Dataset deletion through standard interfaces.
980 """
981 raise NotImplementedError("Must be implemented by subclass")
983 @abstractmethod
984 def emptyTrash(self, ignore_errors: bool = True) -> None:
985 """Remove all datasets from the trash.
987 Parameters
988 ----------
989 ignore_errors : `bool`, optional
990 Determine whether errors should be ignored.
992 Notes
993 -----
994 Some Datastores may implement this method as a silent no-op to
995 disable Dataset deletion through standard interfaces.
996 """
997 raise NotImplementedError("Must be implemented by subclass")
999 @abstractmethod
1000 def transfer(self, inputDatastore: Datastore, datasetRef: DatasetRef) -> None:
1001 """Transfer a dataset from another datastore to this datastore.
1003 Parameters
1004 ----------
1005 inputDatastore : `Datastore`
1006 The external `Datastore` from which to retrieve the Dataset.
1007 datasetRef : `DatasetRef`
1008 Reference to the required Dataset.
1009 """
1010 raise NotImplementedError("Must be implemented by subclass")
1012 def export(
1013 self, refs: Iterable[DatasetRef], *, directory: Optional[str] = None, transfer: Optional[str] = None
1014 ) -> Iterable[FileDataset]:
1015 """Export datasets for transfer to another data repository.
1017 Parameters
1018 ----------
1019 refs : iterable of `DatasetRef`
1020 Dataset references to be exported.
1021 directory : `str`, optional
1022 Path to a directory that should contain files corresponding to
1023 output datasets. Ignored if ``transfer`` is `None`.
1024 transfer : `str`, optional
1025 Mode that should be used to move datasets out of the repository.
1026 Valid options are the same as those of the ``transfer`` argument
1027 to ``ingest``, and datastores may similarly signal that a transfer
1028 mode is not supported by raising `NotImplementedError`.
1030 Returns
1031 -------
1032 dataset : iterable of `DatasetTransfer`
1033 Structs containing information about the exported datasets, in the
1034 same order as ``refs``.
1036 Raises
1037 ------
1038 NotImplementedError
1039 Raised if the given transfer mode is not supported.
1040 """
1041 raise NotImplementedError(f"Transfer mode {transfer} not supported.")
1043 @abstractmethod
1044 def validateConfiguration(
1045 self, entities: Iterable[Union[DatasetRef, DatasetType, StorageClass]], logFailures: bool = False
1046 ) -> None:
1047 """Validate some of the configuration for this datastore.
1049 Parameters
1050 ----------
1051 entities : iterable of `DatasetRef`, `DatasetType`, or `StorageClass`
1052 Entities to test against this configuration. Can be differing
1053 types.
1054 logFailures : `bool`, optional
1055 If `True`, output a log message for every validation error
1056 detected.
1058 Raises
1059 ------
1060 DatastoreValidationError
1061 Raised if there is a validation problem with a configuration.
1063 Notes
1064 -----
1065 Which parts of the configuration are validated is at the discretion
1066 of each Datastore implementation.
1067 """
1068 raise NotImplementedError("Must be implemented by subclass")
1070 @abstractmethod
1071 def validateKey(self, lookupKey: LookupKey, entity: Union[DatasetRef, DatasetType, StorageClass]) -> None:
1072 """Validate a specific look up key with supplied entity.
1074 Parameters
1075 ----------
1076 lookupKey : `LookupKey`
1077 Key to use to retrieve information from the datastore
1078 configuration.
1079 entity : `DatasetRef`, `DatasetType`, or `StorageClass`
1080 Entity to compare with configuration retrieved using the
1081 specified lookup key.
1083 Raises
1084 ------
1085 DatastoreValidationError
1086 Raised if there is a problem with the combination of entity
1087 and lookup key.
1089 Notes
1090 -----
1091 Bypasses the normal selection priorities by allowing a key that
1092 would normally not be selected to be validated.
1093 """
1094 raise NotImplementedError("Must be implemented by subclass")
1096 @abstractmethod
1097 def getLookupKeys(self) -> Set[LookupKey]:
1098 """Return all the lookup keys relevant to this datastore.
1100 Returns
1101 -------
1102 keys : `set` of `LookupKey`
1103 The keys stored internally for looking up information based
1104 on `DatasetType` name or `StorageClass`.
1105 """
1106 raise NotImplementedError("Must be implemented by subclass")
1108 def needs_expanded_data_ids(
1109 self,
1110 transfer: Optional[str],
1111 entity: Optional[Union[DatasetRef, DatasetType, StorageClass]] = None,
1112 ) -> bool:
1113 """Test whether this datastore needs expanded data IDs to ingest.
1115 Parameters
1116 ----------
1117 transfer : `str` or `None`
1118 Transfer mode for ingest.
1119 entity, optional
1120 Object representing what will be ingested. If not provided (or not
1121 specific enough), `True` may be returned even if expanded data
1122 IDs aren't necessary.
1124 Returns
1125 -------
1126 needed : `bool`
1127 If `True`, expanded data IDs may be needed. `False` only if
1128 expansion definitely isn't necessary.
1129 """
1130 return True
1132 @abstractmethod
1133 def import_records(
1134 self,
1135 data: Mapping[str, DatastoreRecordData],
1136 ) -> None:
1137 """Import datastore location and record data from an in-memory data
1138 structure.
1140 Parameters
1141 ----------
1142 data : `Mapping` [ `str`, `DatastoreRecordData` ]
1143 Datastore records indexed by datastore name. May contain data for
1144 other `Datastore` instances (generally because they are chained to
1145 this one), which should be ignored.
1147 Notes
1148 -----
1149 Implementations should generally not check that any external resources
1150 (e.g. files) referred to by these records actually exist, for
1151 performance reasons; we expect higher-level code to guarantee that they
1152 do.
1154 Implementations are responsible for calling
1155 `DatastoreRegistryBridge.insert` on all datasets in ``data.locations``
1156 where the key is in `names`, as well as loading any opaque table data.
1157 """
1158 raise NotImplementedError()
1160 @abstractmethod
1161 def export_records(
1162 self,
1163 refs: Iterable[DatasetIdRef],
1164 ) -> Mapping[str, DatastoreRecordData]:
1165 """Export datastore records and locations to an in-memory data
1166 structure.
1168 Parameters
1169 ----------
1170 refs : `Iterable` [ `DatasetIdRef` ]
1171 Datasets to save. This may include datasets not known to this
1172 datastore, which should be ignored.
1174 Returns
1175 -------
1176 data : `Mapping` [ `str`, `DatastoreRecordData` ]
1177 Exported datastore records indexed by datastore name.
1178 """
1179 raise NotImplementedError()