Coverage for python/lsst/daf/butler/core/datastore.py: 44%
244 statements
« prev ^ index » next coverage.py v6.4.1, created at 2022-06-23 09:44 +0000
« prev ^ index » next coverage.py v6.4.1, created at 2022-06-23 09:44 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Support for generic data stores."""
24from __future__ import annotations
26__all__ = ("DatastoreConfig", "Datastore", "DatastoreValidationError", "DatasetRefURIs")
28import contextlib
29import dataclasses
30import logging
31from abc import ABCMeta, abstractmethod
32from collections import abc, defaultdict
33from typing import (
34 TYPE_CHECKING,
35 Any,
36 Callable,
37 ClassVar,
38 Dict,
39 Iterable,
40 Iterator,
41 List,
42 Mapping,
43 Optional,
44 Set,
45 Tuple,
46 Type,
47 Union,
48)
50from lsst.utils import doImportType
52from .config import Config, ConfigSubset
53from .constraints import Constraints
54from .exceptions import DatasetTypeNotSupportedError, ValidationError
55from .fileDataset import FileDataset
56from .storageClass import StorageClassFactory
58if TYPE_CHECKING: 58 ↛ 59line 58 didn't jump to line 59, because the condition on line 58 was never true
59 from lsst.resources import ResourcePath, ResourcePathExpression
61 from ..registry.interfaces import DatasetIdRef, DatastoreRegistryBridgeManager
62 from .configSupport import LookupKey
63 from .datasets import DatasetRef, DatasetType
64 from .datastoreRecordData import DatastoreRecordData
65 from .storageClass import StorageClass
68class DatastoreConfig(ConfigSubset):
69 """Configuration for Datastores."""
71 component = "datastore"
72 requiredKeys = ("cls",)
73 defaultConfigFile = "datastore.yaml"
76class DatastoreValidationError(ValidationError):
77 """There is a problem with the Datastore configuration."""
79 pass
82@dataclasses.dataclass(frozen=True)
83class Event:
84 __slots__ = {"name", "undoFunc", "args", "kwargs"}
85 name: str
86 undoFunc: Callable
87 args: tuple
88 kwargs: dict
91class IngestPrepData:
92 """A helper base class for `Datastore` ingest implementations.
94 Datastore implementations will generally need a custom implementation of
95 this class.
97 Should be accessed as ``Datastore.IngestPrepData`` instead of via direct
98 import.
100 Parameters
101 ----------
102 refs : iterable of `DatasetRef`
103 References for the datasets that can be ingested by this datastore.
104 """
106 def __init__(self, refs: Iterable[DatasetRef]):
107 self.refs = {ref.id: ref for ref in refs}
110class DatastoreTransaction:
111 """Keeps a log of `Datastore` activity and allow rollback.
113 Parameters
114 ----------
115 parent : `DatastoreTransaction`, optional
116 The parent transaction (if any)
117 """
119 Event: ClassVar[Type] = Event
121 parent: Optional[DatastoreTransaction]
122 """The parent transaction. (`DatastoreTransaction`, optional)"""
124 def __init__(self, parent: Optional[DatastoreTransaction] = None):
125 self.parent = parent
126 self._log: List[Event] = []
128 def registerUndo(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> None:
129 """Register event with undo function.
131 Parameters
132 ----------
133 name : `str`
134 Name of the event.
135 undoFunc : func
136 Function to undo this event.
137 args : `tuple`
138 Positional arguments to `undoFunc`.
139 **kwargs
140 Keyword arguments to `undoFunc`.
141 """
142 self._log.append(self.Event(name, undoFunc, args, kwargs))
144 @contextlib.contextmanager
145 def undoWith(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> Iterator[None]:
146 """Register undo function if nested operation succeeds.
148 Calls `registerUndo`.
150 This can be used to wrap individual undo-able statements within a
151 DatastoreTransaction block. Multiple statements that can fail
152 separately should not be part of the same `undoWith` block.
154 All arguments are forwarded directly to `registerUndo`.
155 """
156 try:
157 yield None
158 except BaseException:
159 raise
160 else:
161 self.registerUndo(name, undoFunc, *args, **kwargs)
163 def rollback(self) -> None:
164 """Roll back all events in this transaction."""
165 log = logging.getLogger(__name__)
166 while self._log:
167 ev = self._log.pop()
168 try:
169 log.debug(
170 "Rolling back transaction: %s: %s(%s,%s)",
171 ev.name,
172 ev.undoFunc,
173 ",".join(str(a) for a in ev.args),
174 ",".join(f"{k}={v}" for k, v in ev.kwargs.items()),
175 )
176 except Exception:
177 # In case we had a problem in stringification of arguments
178 log.warning("Rolling back transaction: %s", ev.name)
179 try:
180 ev.undoFunc(*ev.args, **ev.kwargs)
181 except BaseException as e:
182 # Deliberately swallow error that may occur in unrolling
183 log.warning("Exception: %s caught while unrolling: %s", e, ev.name)
184 pass
186 def commit(self) -> None:
187 """Commit this transaction."""
188 if self.parent is None:
189 # Just forget about the events, they have already happened.
190 return
191 else:
192 # We may still want to events from this transaction as part of
193 # the parent.
194 self.parent._log.extend(self._log)
197@dataclasses.dataclass
198class DatasetRefURIs(abc.Sequence):
199 """Represents the primary and component ResourcePath(s) associated with a
200 DatasetRef.
202 This is used in places where its members used to be represented as a tuple
203 `(primaryURI, componentURIs)`. To maintain backward compatibility this
204 inherits from Sequence and so instances can be treated as a two-item
205 tuple.
206 """
208 def __init__(
209 self,
210 primaryURI: Optional[ResourcePath] = None,
211 componentURIs: Optional[Dict[str, ResourcePath]] = None,
212 ):
214 self.primaryURI = primaryURI
215 """The URI to the primary artifact associated with this dataset. If the
216 dataset was disassembled within the datastore this may be `None`.
217 """
219 self.componentURIs = componentURIs or {}
220 """The URIs to any components associated with the dataset artifact
221 indexed by component name. This can be empty if there are no
222 components.
223 """
225 def __getitem__(self, index: Any) -> Any:
226 """Get primaryURI and componentURIs by index.
228 Provides support for tuple-like access.
229 """
230 if index == 0:
231 return self.primaryURI
232 elif index == 1:
233 return self.componentURIs
234 raise IndexError("list index out of range")
236 def __len__(self) -> int:
237 """Get the number of data members.
239 Provides support for tuple-like access.
240 """
241 return 2
243 def __repr__(self) -> str:
244 return f"DatasetRefURIs({repr(self.primaryURI)}, {repr(self.componentURIs)})"
247class Datastore(metaclass=ABCMeta):
248 """Datastore interface.
250 Parameters
251 ----------
252 config : `DatastoreConfig` or `str`
253 Load configuration either from an existing config instance or by
254 referring to a configuration file.
255 bridgeManager : `DatastoreRegistryBridgeManager`
256 Object that manages the interface between `Registry` and datastores.
257 butlerRoot : `str`, optional
258 New datastore root to use to override the configuration value.
259 """
261 defaultConfigFile: ClassVar[Optional[str]] = None
262 """Path to configuration defaults. Accessed within the ``config`` resource
263 or relative to a search path. Can be None if no defaults specified.
264 """
266 containerKey: ClassVar[Optional[str]] = None
267 """Name of the key containing a list of subconfigurations that also
268 need to be merged with defaults and will likely use different Python
269 datastore classes (but all using DatastoreConfig). Assumed to be a
270 list of configurations that can be represented in a DatastoreConfig
271 and containing a "cls" definition. None indicates that no containers
272 are expected in this Datastore."""
274 isEphemeral: bool = False
275 """Indicate whether this Datastore is ephemeral or not. An ephemeral
276 datastore is one where the contents of the datastore will not exist
277 across process restarts. This value can change per-instance."""
279 config: DatastoreConfig
280 """Configuration used to create Datastore."""
282 name: str
283 """Label associated with this Datastore."""
285 storageClassFactory: StorageClassFactory
286 """Factory for creating storage class instances from name."""
288 constraints: Constraints
289 """Constraints to apply when putting datasets into the datastore."""
291 # MyPy does not like for this to be annotated as any kind of type, because
292 # it can't do static checking on type variables that can change at runtime.
293 IngestPrepData: ClassVar[Any] = IngestPrepData
294 """Helper base class for ingest implementations.
295 """
297 @classmethod
298 @abstractmethod
299 def setConfigRoot(cls, root: str, config: Config, full: Config, overwrite: bool = True) -> None:
300 """Set filesystem-dependent config options for this datastore.
302 The options will be appropriate for a new empty repository with the
303 given root.
305 Parameters
306 ----------
307 root : `str`
308 Filesystem path to the root of the data repository.
309 config : `Config`
310 A `Config` to update. Only the subset understood by
311 this component will be updated. Will not expand
312 defaults.
313 full : `Config`
314 A complete config with all defaults expanded that can be
315 converted to a `DatastoreConfig`. Read-only and will not be
316 modified by this method.
317 Repository-specific options that should not be obtained
318 from defaults when Butler instances are constructed
319 should be copied from ``full`` to ``config``.
320 overwrite : `bool`, optional
321 If `False`, do not modify a value in ``config`` if the value
322 already exists. Default is always to overwrite with the provided
323 ``root``.
325 Notes
326 -----
327 If a keyword is explicitly defined in the supplied ``config`` it
328 will not be overridden by this method if ``overwrite`` is `False`.
329 This allows explicit values set in external configs to be retained.
330 """
331 raise NotImplementedError()
333 @staticmethod
334 def fromConfig(
335 config: Config,
336 bridgeManager: DatastoreRegistryBridgeManager,
337 butlerRoot: Optional[ResourcePathExpression] = None,
338 ) -> "Datastore":
339 """Create datastore from type specified in config file.
341 Parameters
342 ----------
343 config : `Config`
344 Configuration instance.
345 bridgeManager : `DatastoreRegistryBridgeManager`
346 Object that manages the interface between `Registry` and
347 datastores.
348 butlerRoot : `str`, optional
349 Butler root directory.
350 """
351 cls = doImportType(config["datastore", "cls"])
352 if not issubclass(cls, Datastore):
353 raise TypeError(f"Imported child class {config['datastore', 'cls']} is not a Datastore")
354 return cls(config=config, bridgeManager=bridgeManager, butlerRoot=butlerRoot)
356 def __init__(
357 self,
358 config: Union[Config, str],
359 bridgeManager: DatastoreRegistryBridgeManager,
360 butlerRoot: Optional[ResourcePathExpression] = None,
361 ):
362 self.config = DatastoreConfig(config)
363 self.name = "ABCDataStore"
364 self._transaction: Optional[DatastoreTransaction] = None
366 # All Datastores need storage classes and constraints
367 self.storageClassFactory = StorageClassFactory()
369 # And read the constraints list
370 constraintsConfig = self.config.get("constraints")
371 self.constraints = Constraints(constraintsConfig, universe=bridgeManager.universe)
373 def __str__(self) -> str:
374 return self.name
376 def __repr__(self) -> str:
377 return self.name
379 @property
380 def names(self) -> Tuple[str, ...]:
381 """Names associated with this datastore returned as a list.
383 Can be different to ``name`` for a chaining datastore.
384 """
385 # Default implementation returns solely the name itself
386 return (self.name,)
388 @contextlib.contextmanager
389 def transaction(self) -> Iterator[DatastoreTransaction]:
390 """Context manager supporting `Datastore` transactions.
392 Transactions can be nested, and are to be used in combination with
393 `Registry.transaction`.
394 """
395 self._transaction = DatastoreTransaction(self._transaction)
396 try:
397 yield self._transaction
398 except BaseException:
399 self._transaction.rollback()
400 raise
401 else:
402 self._transaction.commit()
403 self._transaction = self._transaction.parent
405 @abstractmethod
406 def knows(self, ref: DatasetRef) -> bool:
407 """Check if the dataset is known to the datastore.
409 Does not check for existence of any artifact.
411 Parameters
412 ----------
413 ref : `DatasetRef`
414 Reference to the required dataset.
416 Returns
417 -------
418 exists : `bool`
419 `True` if the dataset is known to the datastore.
420 """
421 raise NotImplementedError()
423 def mexists(
424 self, refs: Iterable[DatasetRef], artifact_existence: Optional[Dict[ResourcePath, bool]] = None
425 ) -> Dict[DatasetRef, bool]:
426 """Check the existence of multiple datasets at once.
428 Parameters
429 ----------
430 refs : iterable of `DatasetRef`
431 The datasets to be checked.
432 artifact_existence : `dict` [`lsst.resources.ResourcePath`, `bool`]
433 Optional mapping of datastore artifact to existence. Updated by
434 this method with details of all artifacts tested. Can be `None`
435 if the caller is not interested.
437 Returns
438 -------
439 existence : `dict` of [`DatasetRef`, `bool`]
440 Mapping from dataset to boolean indicating existence.
441 """
442 existence: Dict[DatasetRef, bool] = {}
443 # Non-optimized default.
444 for ref in refs:
445 existence[ref] = self.exists(ref)
446 return existence
448 @abstractmethod
449 def exists(self, datasetRef: DatasetRef) -> bool:
450 """Check if the dataset exists in the datastore.
452 Parameters
453 ----------
454 datasetRef : `DatasetRef`
455 Reference to the required dataset.
457 Returns
458 -------
459 exists : `bool`
460 `True` if the entity exists in the `Datastore`.
461 """
462 raise NotImplementedError("Must be implemented by subclass")
464 @abstractmethod
465 def get(self, datasetRef: DatasetRef, parameters: Mapping[str, Any] = None) -> Any:
466 """Load an `InMemoryDataset` from the store.
468 Parameters
469 ----------
470 datasetRef : `DatasetRef`
471 Reference to the required Dataset.
472 parameters : `dict`
473 `StorageClass`-specific parameters that specify a slice of the
474 Dataset to be loaded.
476 Returns
477 -------
478 inMemoryDataset : `object`
479 Requested Dataset or slice thereof as an InMemoryDataset.
480 """
481 raise NotImplementedError("Must be implemented by subclass")
483 @abstractmethod
484 def put(self, inMemoryDataset: Any, datasetRef: DatasetRef) -> None:
485 """Write a `InMemoryDataset` with a given `DatasetRef` to the store.
487 Parameters
488 ----------
489 inMemoryDataset : `object`
490 The Dataset to store.
491 datasetRef : `DatasetRef`
492 Reference to the associated Dataset.
493 """
494 raise NotImplementedError("Must be implemented by subclass")
496 def _overrideTransferMode(self, *datasets: FileDataset, transfer: Optional[str] = None) -> Optional[str]:
497 """Allow ingest transfer mode to be defaulted based on datasets.
499 Parameters
500 ----------
501 datasets : `FileDataset`
502 Each positional argument is a struct containing information about
503 a file to be ingested, including its path (either absolute or
504 relative to the datastore root, if applicable), a complete
505 `DatasetRef` (with ``dataset_id not None``), and optionally a
506 formatter class or its fully-qualified string name. If a formatter
507 is not provided, this method should populate that attribute with
508 the formatter the datastore would use for `put`. Subclasses are
509 also permitted to modify the path attribute (typically to put it
510 in what the datastore considers its standard form).
511 transfer : `str`, optional
512 How (and whether) the dataset should be added to the datastore.
513 See `ingest` for details of transfer modes.
515 Returns
516 -------
517 newTransfer : `str`
518 Transfer mode to use. Will be identical to the supplied transfer
519 mode unless "auto" is used.
520 """
521 if transfer != "auto":
522 return transfer
523 raise RuntimeError(f"{transfer} is not allowed without specialization.")
525 def _prepIngest(self, *datasets: FileDataset, transfer: Optional[str] = None) -> IngestPrepData:
526 """Process datasets to identify which ones can be ingested.
528 Parameters
529 ----------
530 datasets : `FileDataset`
531 Each positional argument is a struct containing information about
532 a file to be ingested, including its path (either absolute or
533 relative to the datastore root, if applicable), a complete
534 `DatasetRef` (with ``dataset_id not None``), and optionally a
535 formatter class or its fully-qualified string name. If a formatter
536 is not provided, this method should populate that attribute with
537 the formatter the datastore would use for `put`. Subclasses are
538 also permitted to modify the path attribute (typically to put it
539 in what the datastore considers its standard form).
540 transfer : `str`, optional
541 How (and whether) the dataset should be added to the datastore.
542 See `ingest` for details of transfer modes.
544 Returns
545 -------
546 data : `IngestPrepData`
547 An instance of a subclass of `IngestPrepData`, used to pass
548 arbitrary data from `_prepIngest` to `_finishIngest`. This should
549 include only the datasets this datastore can actually ingest;
550 others should be silently ignored (`Datastore.ingest` will inspect
551 `IngestPrepData.refs` and raise `DatasetTypeNotSupportedError` if
552 necessary).
554 Raises
555 ------
556 NotImplementedError
557 Raised if the datastore does not support the given transfer mode
558 (including the case where ingest is not supported at all).
559 FileNotFoundError
560 Raised if one of the given files does not exist.
561 FileExistsError
562 Raised if transfer is not `None` but the (internal) location the
563 file would be moved to is already occupied.
565 Notes
566 -----
567 This method (along with `_finishIngest`) should be implemented by
568 subclasses to provide ingest support instead of implementing `ingest`
569 directly.
571 `_prepIngest` should not modify the data repository or given files in
572 any way; all changes should be deferred to `_finishIngest`.
574 When possible, exceptions should be raised in `_prepIngest` instead of
575 `_finishIngest`. `NotImplementedError` exceptions that indicate that
576 the transfer mode is not supported must be raised by `_prepIngest`
577 instead of `_finishIngest`.
578 """
579 raise NotImplementedError(f"Datastore {self} does not support direct file-based ingest.")
581 def _finishIngest(
582 self, prepData: IngestPrepData, *, transfer: Optional[str] = None, record_validation_info: bool = True
583 ) -> None:
584 """Complete an ingest operation.
586 Parameters
587 ----------
588 data : `IngestPrepData`
589 An instance of a subclass of `IngestPrepData`. Guaranteed to be
590 the direct result of a call to `_prepIngest` on this datastore.
591 transfer : `str`, optional
592 How (and whether) the dataset should be added to the datastore.
593 See `ingest` for details of transfer modes.
594 record_validation_info : `bool`, optional
595 If `True`, the default, the datastore can record validation
596 information associated with the file. If `False` the datastore
597 will not attempt to track any information such as checksums
598 or file sizes. This can be useful if such information is tracked
599 in an external system or if the file is to be compressed in place.
600 It is up to the datastore whether this parameter is relevant.
602 Raises
603 ------
604 FileNotFoundError
605 Raised if one of the given files does not exist.
606 FileExistsError
607 Raised if transfer is not `None` but the (internal) location the
608 file would be moved to is already occupied.
610 Notes
611 -----
612 This method (along with `_prepIngest`) should be implemented by
613 subclasses to provide ingest support instead of implementing `ingest`
614 directly.
615 """
616 raise NotImplementedError(f"Datastore {self} does not support direct file-based ingest.")
618 def ingest(
619 self, *datasets: FileDataset, transfer: Optional[str] = None, record_validation_info: bool = True
620 ) -> None:
621 """Ingest one or more files into the datastore.
623 Parameters
624 ----------
625 datasets : `FileDataset`
626 Each positional argument is a struct containing information about
627 a file to be ingested, including its path (either absolute or
628 relative to the datastore root, if applicable), a complete
629 `DatasetRef` (with ``dataset_id not None``), and optionally a
630 formatter class or its fully-qualified string name. If a formatter
631 is not provided, the one the datastore would use for ``put`` on
632 that dataset is assumed.
633 transfer : `str`, optional
634 How (and whether) the dataset should be added to the datastore.
635 If `None` (default), the file must already be in a location
636 appropriate for the datastore (e.g. within its root directory),
637 and will not be modified. Other choices include "move", "copy",
638 "link", "symlink", "relsymlink", and "hardlink". "link" is a
639 special transfer mode that will first try to make a hardlink and
640 if that fails a symlink will be used instead. "relsymlink" creates
641 a relative symlink rather than use an absolute path.
642 Most datastores do not support all transfer modes.
643 "auto" is a special option that will let the
644 data store choose the most natural option for itself.
645 record_validation_info : `bool`, optional
646 If `True`, the default, the datastore can record validation
647 information associated with the file. If `False` the datastore
648 will not attempt to track any information such as checksums
649 or file sizes. This can be useful if such information is tracked
650 in an external system or if the file is to be compressed in place.
651 It is up to the datastore whether this parameter is relevant.
653 Raises
654 ------
655 NotImplementedError
656 Raised if the datastore does not support the given transfer mode
657 (including the case where ingest is not supported at all).
658 DatasetTypeNotSupportedError
659 Raised if one or more files to be ingested have a dataset type that
660 is not supported by the datastore.
661 FileNotFoundError
662 Raised if one of the given files does not exist.
663 FileExistsError
664 Raised if transfer is not `None` but the (internal) location the
665 file would be moved to is already occupied.
667 Notes
668 -----
669 Subclasses should implement `_prepIngest` and `_finishIngest` instead
670 of implementing `ingest` directly. Datastores that hold and
671 delegate to child datastores may want to call those methods as well.
673 Subclasses are encouraged to document their supported transfer modes
674 in their class documentation.
675 """
676 # Allow a datastore to select a default transfer mode
677 transfer = self._overrideTransferMode(*datasets, transfer=transfer)
678 prepData = self._prepIngest(*datasets, transfer=transfer)
679 refs = {ref.id: ref for dataset in datasets for ref in dataset.refs}
680 if None in refs:
681 # Find the file for the error message. There may be multiple
682 # bad refs so look for all of them.
683 unresolved_paths = {}
684 for dataset in datasets:
685 unresolved = []
686 for ref in dataset.refs:
687 if ref.id is None:
688 unresolved.append(ref)
689 if unresolved:
690 unresolved_paths[dataset.path] = unresolved
691 raise RuntimeError(
692 "Attempt to ingest unresolved DatasetRef from: "
693 + ",".join(f"{p}: ({[str(r) for r in ref]})" for p, ref in unresolved_paths.items())
694 )
695 if refs.keys() != prepData.refs.keys():
696 unsupported = refs.keys() - prepData.refs.keys()
697 # Group unsupported refs by DatasetType for an informative
698 # but still concise error message.
699 byDatasetType = defaultdict(list)
700 for datasetId in unsupported:
701 ref = refs[datasetId]
702 byDatasetType[ref.datasetType].append(ref)
703 raise DatasetTypeNotSupportedError(
704 "DatasetType(s) not supported in ingest: "
705 + ", ".join(f"{k.name} ({len(v)} dataset(s))" for k, v in byDatasetType.items())
706 )
707 self._finishIngest(prepData, transfer=transfer, record_validation_info=record_validation_info)
709 def transfer_from(
710 self,
711 source_datastore: Datastore,
712 refs: Iterable[DatasetRef],
713 local_refs: Optional[Iterable[DatasetRef]] = None,
714 transfer: str = "auto",
715 artifact_existence: Optional[Dict[ResourcePath, bool]] = None,
716 ) -> None:
717 """Transfer dataset artifacts from another datastore to this one.
719 Parameters
720 ----------
721 source_datastore : `Datastore`
722 The datastore from which to transfer artifacts. That datastore
723 must be compatible with this datastore receiving the artifacts.
724 refs : iterable of `DatasetRef`
725 The datasets to transfer from the source datastore.
726 local_refs : iterable of `DatasetRef`, optional
727 The dataset refs associated with the registry associated with
728 this datastore. Can be `None` if the source and target datastore
729 are using UUIDs.
730 transfer : `str`, optional
731 How (and whether) the dataset should be added to the datastore.
732 Choices include "move", "copy",
733 "link", "symlink", "relsymlink", and "hardlink". "link" is a
734 special transfer mode that will first try to make a hardlink and
735 if that fails a symlink will be used instead. "relsymlink" creates
736 a relative symlink rather than use an absolute path.
737 Most datastores do not support all transfer modes.
738 "auto" (the default) is a special option that will let the
739 data store choose the most natural option for itself.
740 If the source location and transfer location are identical the
741 transfer mode will be ignored.
742 artifact_existence : `dict` [`lsst.resources.ResourcePath`, `bool`]
743 Optional mapping of datastore artifact to existence. Updated by
744 this method with details of all artifacts tested. Can be `None`
745 if the caller is not interested.
747 Raises
748 ------
749 TypeError
750 Raised if the two datastores are not compatible.
751 """
752 if type(self) is not type(source_datastore):
753 raise TypeError(
754 f"Datastore mismatch between this datastore ({type(self)}) and the "
755 f"source datastore ({type(source_datastore)})."
756 )
758 raise NotImplementedError(f"Datastore {type(self)} must implement a transfer_from method.")
760 def getManyURIs(
761 self,
762 refs: Iterable[DatasetRef],
763 predict: bool = False,
764 allow_missing: bool = False,
765 ) -> Dict[DatasetRef, DatasetRefURIs]:
766 """Return URIs associated with many datasets.
768 Parameters
769 ----------
770 refs : iterable of `DatasetIdRef`
771 References to the required datasets.
772 predict : `bool`, optional
773 If the datastore does not know about a dataset, should it
774 return a predicted URI or not?
775 allow_missing : `bool`
776 If `False`, and `predict` is `False`, will raise if a `DatasetRef`
777 does not exist.
779 Returns
780 -------
781 URIs : `dict` of [`DatasetRef`, `DatasetRefUris`]
782 A dict of primary and component URIs, indexed by the passed-in
783 refs.
785 Raises
786 ------
787 FileNotFoundError
788 A URI has been requested for a dataset that does not exist and
789 guessing is not allowed.
791 Notes
792 -----
793 In file-based datastores, getManuURIs does not check that the file is
794 really there, it's assuming it is if datastore is aware of the file
795 then it actually exists.
796 """
797 uris: Dict[DatasetRef, DatasetRefURIs] = {}
798 missing_refs = []
799 for ref in refs:
800 try:
801 uris[ref] = self.getURIs(ref, predict=predict)
802 except FileNotFoundError:
803 missing_refs.append(ref)
804 if missing_refs and not allow_missing:
805 raise FileNotFoundError(
806 "Missing {} refs from datastore out of {} and predict=False.".format(
807 num_missing := len(missing_refs), num_missing + len(uris)
808 )
809 )
810 return uris
812 @abstractmethod
813 def getURIs(self, datasetRef: DatasetRef, predict: bool = False) -> DatasetRefURIs:
814 """Return URIs associated with dataset.
816 Parameters
817 ----------
818 ref : `DatasetRef`
819 Reference to the required dataset.
820 predict : `bool`, optional
821 If the datastore does not know about the dataset, should it
822 return a predicted URI or not?
824 Returns
825 -------
826 uris : `DatasetRefURIs`
827 The URI to the primary artifact associated with this dataset (if
828 the dataset was disassembled within the datastore this may be
829 `None`), and the URIs to any components associated with the dataset
830 artifact. (can be empty if there are no components).
831 """
832 raise NotImplementedError()
834 @abstractmethod
835 def getURI(self, datasetRef: DatasetRef, predict: bool = False) -> ResourcePath:
836 """URI to the Dataset.
838 Parameters
839 ----------
840 datasetRef : `DatasetRef`
841 Reference to the required Dataset.
842 predict : `bool`
843 If `True` attempt to predict the URI for a dataset if it does
844 not exist in datastore.
846 Returns
847 -------
848 uri : `str`
849 URI string pointing to the Dataset within the datastore. If the
850 Dataset does not exist in the datastore, the URI may be a guess.
851 If the datastore does not have entities that relate well
852 to the concept of a URI the returned URI string will be
853 descriptive. The returned URI is not guaranteed to be obtainable.
855 Raises
856 ------
857 FileNotFoundError
858 A URI has been requested for a dataset that does not exist and
859 guessing is not allowed.
860 """
861 raise NotImplementedError("Must be implemented by subclass")
863 @abstractmethod
864 def retrieveArtifacts(
865 self,
866 refs: Iterable[DatasetRef],
867 destination: ResourcePath,
868 transfer: str = "auto",
869 preserve_path: bool = True,
870 overwrite: bool = False,
871 ) -> List[ResourcePath]:
872 """Retrieve the artifacts associated with the supplied refs.
874 Parameters
875 ----------
876 refs : iterable of `DatasetRef`
877 The datasets for which artifacts are to be retrieved.
878 A single ref can result in multiple artifacts. The refs must
879 be resolved.
880 destination : `lsst.resources.ResourcePath`
881 Location to write the artifacts.
882 transfer : `str`, optional
883 Method to use to transfer the artifacts. Must be one of the options
884 supported by `lsst.resources.ResourcePath.transfer_from()`.
885 "move" is not allowed.
886 preserve_path : `bool`, optional
887 If `True` the full path of the artifact within the datastore
888 is preserved. If `False` the final file component of the path
889 is used.
890 overwrite : `bool`, optional
891 If `True` allow transfers to overwrite existing files at the
892 destination.
894 Returns
895 -------
896 targets : `list` of `lsst.resources.ResourcePath`
897 URIs of file artifacts in destination location. Order is not
898 preserved.
900 Notes
901 -----
902 For non-file datastores the artifacts written to the destination
903 may not match the representation inside the datastore. For example
904 a hierarchichal data structure in a NoSQL database may well be stored
905 as a JSON file.
906 """
907 raise NotImplementedError()
909 @abstractmethod
910 def remove(self, datasetRef: DatasetRef) -> None:
911 """Indicate to the Datastore that a Dataset can be removed.
913 Parameters
914 ----------
915 datasetRef : `DatasetRef`
916 Reference to the required Dataset.
918 Raises
919 ------
920 FileNotFoundError
921 When Dataset does not exist.
923 Notes
924 -----
925 Some Datastores may implement this method as a silent no-op to
926 disable Dataset deletion through standard interfaces.
927 """
928 raise NotImplementedError("Must be implemented by subclass")
930 @abstractmethod
931 def forget(self, refs: Iterable[DatasetRef]) -> None:
932 """Indicate to the Datastore that it should remove all records of the
933 given datasets, without actually deleting them.
935 Parameters
936 ----------
937 refs : `Iterable` [ `DatasetRef` ]
938 References to the datasets being forgotten.
940 Notes
941 -----
942 Asking a datastore to forget a `DatasetRef` it does not hold should be
943 a silent no-op, not an error.
944 """
945 raise NotImplementedError("Must be implemented by subclass")
947 @abstractmethod
948 def trash(self, ref: Union[DatasetRef, Iterable[DatasetRef]], ignore_errors: bool = True) -> None:
949 """Indicate to the Datastore that a Dataset can be moved to the trash.
951 Parameters
952 ----------
953 ref : `DatasetRef` or iterable thereof
954 Reference(s) to the required Dataset.
955 ignore_errors : `bool`, optional
956 Determine whether errors should be ignored. When multiple
957 refs are being trashed there will be no per-ref check.
959 Raises
960 ------
961 FileNotFoundError
962 When Dataset does not exist and errors are not ignored. Only
963 checked if a single ref is supplied (and not in a list).
965 Notes
966 -----
967 Some Datastores may implement this method as a silent no-op to
968 disable Dataset deletion through standard interfaces.
969 """
970 raise NotImplementedError("Must be implemented by subclass")
972 @abstractmethod
973 def emptyTrash(self, ignore_errors: bool = True) -> None:
974 """Remove all datasets from the trash.
976 Parameters
977 ----------
978 ignore_errors : `bool`, optional
979 Determine whether errors should be ignored.
981 Notes
982 -----
983 Some Datastores may implement this method as a silent no-op to
984 disable Dataset deletion through standard interfaces.
985 """
986 raise NotImplementedError("Must be implemented by subclass")
988 @abstractmethod
989 def transfer(self, inputDatastore: Datastore, datasetRef: DatasetRef) -> None:
990 """Transfer a dataset from another datastore to this datastore.
992 Parameters
993 ----------
994 inputDatastore : `Datastore`
995 The external `Datastore` from which to retrieve the Dataset.
996 datasetRef : `DatasetRef`
997 Reference to the required Dataset.
998 """
999 raise NotImplementedError("Must be implemented by subclass")
1001 def export(
1002 self, refs: Iterable[DatasetRef], *, directory: Optional[str] = None, transfer: Optional[str] = None
1003 ) -> Iterable[FileDataset]:
1004 """Export datasets for transfer to another data repository.
1006 Parameters
1007 ----------
1008 refs : iterable of `DatasetRef`
1009 Dataset references to be exported.
1010 directory : `str`, optional
1011 Path to a directory that should contain files corresponding to
1012 output datasets. Ignored if ``transfer`` is `None`.
1013 transfer : `str`, optional
1014 Mode that should be used to move datasets out of the repository.
1015 Valid options are the same as those of the ``transfer`` argument
1016 to ``ingest``, and datastores may similarly signal that a transfer
1017 mode is not supported by raising `NotImplementedError`.
1019 Returns
1020 -------
1021 dataset : iterable of `DatasetTransfer`
1022 Structs containing information about the exported datasets, in the
1023 same order as ``refs``.
1025 Raises
1026 ------
1027 NotImplementedError
1028 Raised if the given transfer mode is not supported.
1029 """
1030 raise NotImplementedError(f"Transfer mode {transfer} not supported.")
1032 @abstractmethod
1033 def validateConfiguration(
1034 self, entities: Iterable[Union[DatasetRef, DatasetType, StorageClass]], logFailures: bool = False
1035 ) -> None:
1036 """Validate some of the configuration for this datastore.
1038 Parameters
1039 ----------
1040 entities : iterable of `DatasetRef`, `DatasetType`, or `StorageClass`
1041 Entities to test against this configuration. Can be differing
1042 types.
1043 logFailures : `bool`, optional
1044 If `True`, output a log message for every validation error
1045 detected.
1047 Raises
1048 ------
1049 DatastoreValidationError
1050 Raised if there is a validation problem with a configuration.
1052 Notes
1053 -----
1054 Which parts of the configuration are validated is at the discretion
1055 of each Datastore implementation.
1056 """
1057 raise NotImplementedError("Must be implemented by subclass")
1059 @abstractmethod
1060 def validateKey(self, lookupKey: LookupKey, entity: Union[DatasetRef, DatasetType, StorageClass]) -> None:
1061 """Validate a specific look up key with supplied entity.
1063 Parameters
1064 ----------
1065 lookupKey : `LookupKey`
1066 Key to use to retrieve information from the datastore
1067 configuration.
1068 entity : `DatasetRef`, `DatasetType`, or `StorageClass`
1069 Entity to compare with configuration retrieved using the
1070 specified lookup key.
1072 Raises
1073 ------
1074 DatastoreValidationError
1075 Raised if there is a problem with the combination of entity
1076 and lookup key.
1078 Notes
1079 -----
1080 Bypasses the normal selection priorities by allowing a key that
1081 would normally not be selected to be validated.
1082 """
1083 raise NotImplementedError("Must be implemented by subclass")
1085 @abstractmethod
1086 def getLookupKeys(self) -> Set[LookupKey]:
1087 """Return all the lookup keys relevant to this datastore.
1089 Returns
1090 -------
1091 keys : `set` of `LookupKey`
1092 The keys stored internally for looking up information based
1093 on `DatasetType` name or `StorageClass`.
1094 """
1095 raise NotImplementedError("Must be implemented by subclass")
1097 def needs_expanded_data_ids(
1098 self,
1099 transfer: Optional[str],
1100 entity: Optional[Union[DatasetRef, DatasetType, StorageClass]] = None,
1101 ) -> bool:
1102 """Test whether this datastore needs expanded data IDs to ingest.
1104 Parameters
1105 ----------
1106 transfer : `str` or `None`
1107 Transfer mode for ingest.
1108 entity, optional
1109 Object representing what will be ingested. If not provided (or not
1110 specific enough), `True` may be returned even if expanded data
1111 IDs aren't necessary.
1113 Returns
1114 -------
1115 needed : `bool`
1116 If `True`, expanded data IDs may be needed. `False` only if
1117 expansion definitely isn't necessary.
1118 """
1119 return True
1121 @abstractmethod
1122 def import_records(
1123 self,
1124 data: Mapping[str, DatastoreRecordData],
1125 ) -> None:
1126 """Import datastore location and record data from an in-memory data
1127 structure.
1129 Parameters
1130 ----------
1131 data : `Mapping` [ `str`, `DatastoreRecordData` ]
1132 Datastore records indexed by datastore name. May contain data for
1133 other `Datastore` instances (generally because they are chained to
1134 this one), which should be ignored.
1136 Notes
1137 -----
1138 Implementations should generally not check that any external resources
1139 (e.g. files) referred to by these records actually exist, for
1140 performance reasons; we expect higher-level code to guarantee that they
1141 do.
1143 Implementations are responsible for calling
1144 `DatastoreRegistryBridge.insert` on all datasets in ``data.locations``
1145 where the key is in `names`, as well as loading any opaque table data.
1146 """
1147 raise NotImplementedError()
1149 @abstractmethod
1150 def export_records(
1151 self,
1152 refs: Iterable[DatasetIdRef],
1153 ) -> Mapping[str, DatastoreRecordData]:
1154 """Export datastore records and locations to an in-memory data
1155 structure.
1157 Parameters
1158 ----------
1159 refs : `Iterable` [ `DatasetIdRef` ]
1160 Datasets to save. This may include datasets not known to this
1161 datastore, which should be ignored.
1163 Returns
1164 -------
1165 data : `Mapping` [ `str`, `DatastoreRecordData` ]
1166 Exported datastore records indexed by datastore name.
1167 """
1168 raise NotImplementedError()