Coverage for python/lsst/daf/butler/core/datastore.py: 42%
246 statements
« prev ^ index » next coverage.py v6.5.0, created at 2022-10-28 09:59 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2022-10-28 09:59 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Support for generic data stores."""
24from __future__ import annotations
26__all__ = ("DatastoreConfig", "Datastore", "DatastoreValidationError", "DatasetRefURIs")
28import contextlib
29import dataclasses
30import logging
31from abc import ABCMeta, abstractmethod
32from collections import abc, defaultdict
33from typing import (
34 TYPE_CHECKING,
35 Any,
36 Callable,
37 ClassVar,
38 Dict,
39 Iterable,
40 Iterator,
41 List,
42 Mapping,
43 Optional,
44 Set,
45 Tuple,
46 Type,
47 Union,
48)
50from lsst.utils import doImportType
52from .config import Config, ConfigSubset
53from .constraints import Constraints
54from .exceptions import DatasetTypeNotSupportedError, ValidationError
55from .fileDataset import FileDataset
56from .storageClass import StorageClassFactory
58if TYPE_CHECKING: 58 ↛ 59line 58 didn't jump to line 59, because the condition on line 58 was never true
59 from lsst.resources import ResourcePath, ResourcePathExpression
61 from ..registry.interfaces import DatasetIdRef, DatastoreRegistryBridgeManager
62 from .configSupport import LookupKey
63 from .datasets import DatasetRef, DatasetType
64 from .datastoreRecordData import DatastoreRecordData
65 from .storageClass import StorageClass
68class DatastoreConfig(ConfigSubset):
69 """Configuration for Datastores."""
71 component = "datastore"
72 requiredKeys = ("cls",)
73 defaultConfigFile = "datastore.yaml"
76class DatastoreValidationError(ValidationError):
77 """There is a problem with the Datastore configuration."""
79 pass
82@dataclasses.dataclass(frozen=True)
83class Event:
84 __slots__ = {"name", "undoFunc", "args", "kwargs"}
85 name: str
86 undoFunc: Callable
87 args: tuple
88 kwargs: dict
91class IngestPrepData:
92 """A helper base class for `Datastore` ingest implementations.
94 Datastore implementations will generally need a custom implementation of
95 this class.
97 Should be accessed as ``Datastore.IngestPrepData`` instead of via direct
98 import.
100 Parameters
101 ----------
102 refs : iterable of `DatasetRef`
103 References for the datasets that can be ingested by this datastore.
104 """
106 def __init__(self, refs: Iterable[DatasetRef]):
107 self.refs = {ref.id: ref for ref in refs}
110class DatastoreTransaction:
111 """Keeps a log of `Datastore` activity and allow rollback.
113 Parameters
114 ----------
115 parent : `DatastoreTransaction`, optional
116 The parent transaction (if any)
117 """
119 Event: ClassVar[Type] = Event
121 parent: Optional[DatastoreTransaction]
122 """The parent transaction. (`DatastoreTransaction`, optional)"""
124 def __init__(self, parent: Optional[DatastoreTransaction] = None):
125 self.parent = parent
126 self._log: List[Event] = []
128 def registerUndo(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> None:
129 """Register event with undo function.
131 Parameters
132 ----------
133 name : `str`
134 Name of the event.
135 undoFunc : func
136 Function to undo this event.
137 args : `tuple`
138 Positional arguments to `undoFunc`.
139 **kwargs
140 Keyword arguments to `undoFunc`.
141 """
142 self._log.append(self.Event(name, undoFunc, args, kwargs))
144 @contextlib.contextmanager
145 def undoWith(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> Iterator[None]:
146 """Register undo function if nested operation succeeds.
148 Calls `registerUndo`.
150 This can be used to wrap individual undo-able statements within a
151 DatastoreTransaction block. Multiple statements that can fail
152 separately should not be part of the same `undoWith` block.
154 All arguments are forwarded directly to `registerUndo`.
155 """
156 try:
157 yield None
158 except BaseException:
159 raise
160 else:
161 self.registerUndo(name, undoFunc, *args, **kwargs)
163 def rollback(self) -> None:
164 """Roll back all events in this transaction."""
165 log = logging.getLogger(__name__)
166 while self._log:
167 ev = self._log.pop()
168 try:
169 log.debug(
170 "Rolling back transaction: %s: %s(%s,%s)",
171 ev.name,
172 ev.undoFunc,
173 ",".join(str(a) for a in ev.args),
174 ",".join(f"{k}={v}" for k, v in ev.kwargs.items()),
175 )
176 except Exception:
177 # In case we had a problem in stringification of arguments
178 log.warning("Rolling back transaction: %s", ev.name)
179 try:
180 ev.undoFunc(*ev.args, **ev.kwargs)
181 except BaseException as e:
182 # Deliberately swallow error that may occur in unrolling
183 log.warning("Exception: %s caught while unrolling: %s", e, ev.name)
184 pass
186 def commit(self) -> None:
187 """Commit this transaction."""
188 if self.parent is None:
189 # Just forget about the events, they have already happened.
190 return
191 else:
192 # We may still want to events from this transaction as part of
193 # the parent.
194 self.parent._log.extend(self._log)
197@dataclasses.dataclass
198class DatasetRefURIs(abc.Sequence):
199 """Represents the primary and component ResourcePath(s) associated with a
200 DatasetRef.
202 This is used in places where its members used to be represented as a tuple
203 `(primaryURI, componentURIs)`. To maintain backward compatibility this
204 inherits from Sequence and so instances can be treated as a two-item
205 tuple.
206 """
208 def __init__(
209 self,
210 primaryURI: Optional[ResourcePath] = None,
211 componentURIs: Optional[Dict[str, ResourcePath]] = None,
212 ):
214 self.primaryURI = primaryURI
215 """The URI to the primary artifact associated with this dataset. If the
216 dataset was disassembled within the datastore this may be `None`.
217 """
219 self.componentURIs = componentURIs or {}
220 """The URIs to any components associated with the dataset artifact
221 indexed by component name. This can be empty if there are no
222 components.
223 """
225 def __getitem__(self, index: Any) -> Any:
226 """Get primaryURI and componentURIs by index.
228 Provides support for tuple-like access.
229 """
230 if index == 0:
231 return self.primaryURI
232 elif index == 1:
233 return self.componentURIs
234 raise IndexError("list index out of range")
236 def __len__(self) -> int:
237 """Get the number of data members.
239 Provides support for tuple-like access.
240 """
241 return 2
243 def __repr__(self) -> str:
244 return f"DatasetRefURIs({repr(self.primaryURI)}, {repr(self.componentURIs)})"
247class Datastore(metaclass=ABCMeta):
248 """Datastore interface.
250 Parameters
251 ----------
252 config : `DatastoreConfig` or `str`
253 Load configuration either from an existing config instance or by
254 referring to a configuration file.
255 bridgeManager : `DatastoreRegistryBridgeManager`
256 Object that manages the interface between `Registry` and datastores.
257 butlerRoot : `str`, optional
258 New datastore root to use to override the configuration value.
259 """
261 defaultConfigFile: ClassVar[Optional[str]] = None
262 """Path to configuration defaults. Accessed within the ``config`` resource
263 or relative to a search path. Can be None if no defaults specified.
264 """
266 containerKey: ClassVar[Optional[str]] = None
267 """Name of the key containing a list of subconfigurations that also
268 need to be merged with defaults and will likely use different Python
269 datastore classes (but all using DatastoreConfig). Assumed to be a
270 list of configurations that can be represented in a DatastoreConfig
271 and containing a "cls" definition. None indicates that no containers
272 are expected in this Datastore."""
274 isEphemeral: bool = False
275 """Indicate whether this Datastore is ephemeral or not. An ephemeral
276 datastore is one where the contents of the datastore will not exist
277 across process restarts. This value can change per-instance."""
279 config: DatastoreConfig
280 """Configuration used to create Datastore."""
282 name: str
283 """Label associated with this Datastore."""
285 storageClassFactory: StorageClassFactory
286 """Factory for creating storage class instances from name."""
288 constraints: Constraints
289 """Constraints to apply when putting datasets into the datastore."""
291 # MyPy does not like for this to be annotated as any kind of type, because
292 # it can't do static checking on type variables that can change at runtime.
293 IngestPrepData: ClassVar[Any] = IngestPrepData
294 """Helper base class for ingest implementations.
295 """
297 @classmethod
298 @abstractmethod
299 def setConfigRoot(cls, root: str, config: Config, full: Config, overwrite: bool = True) -> None:
300 """Set filesystem-dependent config options for this datastore.
302 The options will be appropriate for a new empty repository with the
303 given root.
305 Parameters
306 ----------
307 root : `str`
308 Filesystem path to the root of the data repository.
309 config : `Config`
310 A `Config` to update. Only the subset understood by
311 this component will be updated. Will not expand
312 defaults.
313 full : `Config`
314 A complete config with all defaults expanded that can be
315 converted to a `DatastoreConfig`. Read-only and will not be
316 modified by this method.
317 Repository-specific options that should not be obtained
318 from defaults when Butler instances are constructed
319 should be copied from ``full`` to ``config``.
320 overwrite : `bool`, optional
321 If `False`, do not modify a value in ``config`` if the value
322 already exists. Default is always to overwrite with the provided
323 ``root``.
325 Notes
326 -----
327 If a keyword is explicitly defined in the supplied ``config`` it
328 will not be overridden by this method if ``overwrite`` is `False`.
329 This allows explicit values set in external configs to be retained.
330 """
331 raise NotImplementedError()
333 @staticmethod
334 def fromConfig(
335 config: Config,
336 bridgeManager: DatastoreRegistryBridgeManager,
337 butlerRoot: Optional[ResourcePathExpression] = None,
338 ) -> "Datastore":
339 """Create datastore from type specified in config file.
341 Parameters
342 ----------
343 config : `Config`
344 Configuration instance.
345 bridgeManager : `DatastoreRegistryBridgeManager`
346 Object that manages the interface between `Registry` and
347 datastores.
348 butlerRoot : `str`, optional
349 Butler root directory.
350 """
351 cls = doImportType(config["datastore", "cls"])
352 if not issubclass(cls, Datastore):
353 raise TypeError(f"Imported child class {config['datastore', 'cls']} is not a Datastore")
354 return cls(config=config, bridgeManager=bridgeManager, butlerRoot=butlerRoot)
356 def __init__(
357 self,
358 config: Union[Config, str],
359 bridgeManager: DatastoreRegistryBridgeManager,
360 butlerRoot: Optional[ResourcePathExpression] = None,
361 ):
362 self.config = DatastoreConfig(config)
363 self.name = "ABCDataStore"
364 self._transaction: Optional[DatastoreTransaction] = None
366 # All Datastores need storage classes and constraints
367 self.storageClassFactory = StorageClassFactory()
369 # And read the constraints list
370 constraintsConfig = self.config.get("constraints")
371 self.constraints = Constraints(constraintsConfig, universe=bridgeManager.universe)
373 def __str__(self) -> str:
374 return self.name
376 def __repr__(self) -> str:
377 return self.name
379 @property
380 def names(self) -> Tuple[str, ...]:
381 """Names associated with this datastore returned as a list.
383 Can be different to ``name`` for a chaining datastore.
384 """
385 # Default implementation returns solely the name itself
386 return (self.name,)
388 @contextlib.contextmanager
389 def transaction(self) -> Iterator[DatastoreTransaction]:
390 """Context manager supporting `Datastore` transactions.
392 Transactions can be nested, and are to be used in combination with
393 `Registry.transaction`.
394 """
395 self._transaction = DatastoreTransaction(self._transaction)
396 try:
397 yield self._transaction
398 except BaseException:
399 self._transaction.rollback()
400 raise
401 else:
402 self._transaction.commit()
403 self._transaction = self._transaction.parent
405 @abstractmethod
406 def knows(self, ref: DatasetRef) -> bool:
407 """Check if the dataset is known to the datastore.
409 Does not check for existence of any artifact.
411 Parameters
412 ----------
413 ref : `DatasetRef`
414 Reference to the required dataset.
416 Returns
417 -------
418 exists : `bool`
419 `True` if the dataset is known to the datastore.
420 """
421 raise NotImplementedError()
423 def knows_these(self, refs: Iterable[DatasetRef]) -> dict[DatasetRef, bool]:
424 """Check which of the given datasets are known to this datastore.
426 This is like ``mexist()`` but does not check that the file exists.
428 Parameters
429 ----------
430 refs : iterable `DatasetRef`
431 The datasets to check.
433 Returns
434 -------
435 exists : `dict`[`DatasetRef`, `bool`]
436 Mapping of dataset to boolean indicating whether the dataset
437 is known to the datastore.
438 """
439 # Non-optimized default calls knows() repeatedly.
440 return {ref: self.knows(ref) for ref in refs}
442 def mexists(
443 self, refs: Iterable[DatasetRef], artifact_existence: Optional[Dict[ResourcePath, bool]] = None
444 ) -> Dict[DatasetRef, bool]:
445 """Check the existence of multiple datasets at once.
447 Parameters
448 ----------
449 refs : iterable of `DatasetRef`
450 The datasets to be checked.
451 artifact_existence : `dict` [`lsst.resources.ResourcePath`, `bool`]
452 Optional mapping of datastore artifact to existence. Updated by
453 this method with details of all artifacts tested. Can be `None`
454 if the caller is not interested.
456 Returns
457 -------
458 existence : `dict` of [`DatasetRef`, `bool`]
459 Mapping from dataset to boolean indicating existence.
460 """
461 existence: Dict[DatasetRef, bool] = {}
462 # Non-optimized default.
463 for ref in refs:
464 existence[ref] = self.exists(ref)
465 return existence
467 @abstractmethod
468 def exists(self, datasetRef: DatasetRef) -> bool:
469 """Check if the dataset exists in the datastore.
471 Parameters
472 ----------
473 datasetRef : `DatasetRef`
474 Reference to the required dataset.
476 Returns
477 -------
478 exists : `bool`
479 `True` if the entity exists in the `Datastore`.
480 """
481 raise NotImplementedError("Must be implemented by subclass")
483 @abstractmethod
484 def get(
485 self,
486 datasetRef: DatasetRef,
487 parameters: Mapping[str, Any] = None,
488 storageClass: Optional[Union[StorageClass, str]] = None,
489 ) -> Any:
490 """Load an `InMemoryDataset` from the store.
492 Parameters
493 ----------
494 datasetRef : `DatasetRef`
495 Reference to the required Dataset.
496 parameters : `dict`
497 `StorageClass`-specific parameters that specify a slice of the
498 Dataset to be loaded.
499 storageClass : `StorageClass` or `str`, optional
500 The storage class to be used to override the Python type
501 returned by this method. By default the returned type matches
502 the dataset type definition for this dataset. Specifying a
503 read `StorageClass` can force a different type to be returned.
504 This type must be compatible with the original type.
506 Returns
507 -------
508 inMemoryDataset : `object`
509 Requested Dataset or slice thereof as an InMemoryDataset.
510 """
511 raise NotImplementedError("Must be implemented by subclass")
513 @abstractmethod
514 def put(self, inMemoryDataset: Any, datasetRef: DatasetRef) -> None:
515 """Write a `InMemoryDataset` with a given `DatasetRef` to the store.
517 Parameters
518 ----------
519 inMemoryDataset : `object`
520 The Dataset to store.
521 datasetRef : `DatasetRef`
522 Reference to the associated Dataset.
523 """
524 raise NotImplementedError("Must be implemented by subclass")
526 def _overrideTransferMode(self, *datasets: FileDataset, transfer: Optional[str] = None) -> Optional[str]:
527 """Allow ingest transfer mode to be defaulted based on datasets.
529 Parameters
530 ----------
531 datasets : `FileDataset`
532 Each positional argument is a struct containing information about
533 a file to be ingested, including its path (either absolute or
534 relative to the datastore root, if applicable), a complete
535 `DatasetRef` (with ``dataset_id not None``), and optionally a
536 formatter class or its fully-qualified string name. If a formatter
537 is not provided, this method should populate that attribute with
538 the formatter the datastore would use for `put`. Subclasses are
539 also permitted to modify the path attribute (typically to put it
540 in what the datastore considers its standard form).
541 transfer : `str`, optional
542 How (and whether) the dataset should be added to the datastore.
543 See `ingest` for details of transfer modes.
545 Returns
546 -------
547 newTransfer : `str`
548 Transfer mode to use. Will be identical to the supplied transfer
549 mode unless "auto" is used.
550 """
551 if transfer != "auto":
552 return transfer
553 raise RuntimeError(f"{transfer} is not allowed without specialization.")
555 def _prepIngest(self, *datasets: FileDataset, transfer: Optional[str] = None) -> IngestPrepData:
556 """Process datasets to identify which ones can be ingested.
558 Parameters
559 ----------
560 datasets : `FileDataset`
561 Each positional argument is a struct containing information about
562 a file to be ingested, including its path (either absolute or
563 relative to the datastore root, if applicable), a complete
564 `DatasetRef` (with ``dataset_id not None``), and optionally a
565 formatter class or its fully-qualified string name. If a formatter
566 is not provided, this method should populate that attribute with
567 the formatter the datastore would use for `put`. Subclasses are
568 also permitted to modify the path attribute (typically to put it
569 in what the datastore considers its standard form).
570 transfer : `str`, optional
571 How (and whether) the dataset should be added to the datastore.
572 See `ingest` for details of transfer modes.
574 Returns
575 -------
576 data : `IngestPrepData`
577 An instance of a subclass of `IngestPrepData`, used to pass
578 arbitrary data from `_prepIngest` to `_finishIngest`. This should
579 include only the datasets this datastore can actually ingest;
580 others should be silently ignored (`Datastore.ingest` will inspect
581 `IngestPrepData.refs` and raise `DatasetTypeNotSupportedError` if
582 necessary).
584 Raises
585 ------
586 NotImplementedError
587 Raised if the datastore does not support the given transfer mode
588 (including the case where ingest is not supported at all).
589 FileNotFoundError
590 Raised if one of the given files does not exist.
591 FileExistsError
592 Raised if transfer is not `None` but the (internal) location the
593 file would be moved to is already occupied.
595 Notes
596 -----
597 This method (along with `_finishIngest`) should be implemented by
598 subclasses to provide ingest support instead of implementing `ingest`
599 directly.
601 `_prepIngest` should not modify the data repository or given files in
602 any way; all changes should be deferred to `_finishIngest`.
604 When possible, exceptions should be raised in `_prepIngest` instead of
605 `_finishIngest`. `NotImplementedError` exceptions that indicate that
606 the transfer mode is not supported must be raised by `_prepIngest`
607 instead of `_finishIngest`.
608 """
609 raise NotImplementedError(f"Datastore {self} does not support direct file-based ingest.")
611 def _finishIngest(
612 self, prepData: IngestPrepData, *, transfer: Optional[str] = None, record_validation_info: bool = True
613 ) -> None:
614 """Complete an ingest operation.
616 Parameters
617 ----------
618 data : `IngestPrepData`
619 An instance of a subclass of `IngestPrepData`. Guaranteed to be
620 the direct result of a call to `_prepIngest` on this datastore.
621 transfer : `str`, optional
622 How (and whether) the dataset should be added to the datastore.
623 See `ingest` for details of transfer modes.
624 record_validation_info : `bool`, optional
625 If `True`, the default, the datastore can record validation
626 information associated with the file. If `False` the datastore
627 will not attempt to track any information such as checksums
628 or file sizes. This can be useful if such information is tracked
629 in an external system or if the file is to be compressed in place.
630 It is up to the datastore whether this parameter is relevant.
632 Raises
633 ------
634 FileNotFoundError
635 Raised if one of the given files does not exist.
636 FileExistsError
637 Raised if transfer is not `None` but the (internal) location the
638 file would be moved to is already occupied.
640 Notes
641 -----
642 This method (along with `_prepIngest`) should be implemented by
643 subclasses to provide ingest support instead of implementing `ingest`
644 directly.
645 """
646 raise NotImplementedError(f"Datastore {self} does not support direct file-based ingest.")
648 def ingest(
649 self, *datasets: FileDataset, transfer: Optional[str] = None, record_validation_info: bool = True
650 ) -> None:
651 """Ingest one or more files into the datastore.
653 Parameters
654 ----------
655 datasets : `FileDataset`
656 Each positional argument is a struct containing information about
657 a file to be ingested, including its path (either absolute or
658 relative to the datastore root, if applicable), a complete
659 `DatasetRef` (with ``dataset_id not None``), and optionally a
660 formatter class or its fully-qualified string name. If a formatter
661 is not provided, the one the datastore would use for ``put`` on
662 that dataset is assumed.
663 transfer : `str`, optional
664 How (and whether) the dataset should be added to the datastore.
665 If `None` (default), the file must already be in a location
666 appropriate for the datastore (e.g. within its root directory),
667 and will not be modified. Other choices include "move", "copy",
668 "link", "symlink", "relsymlink", and "hardlink". "link" is a
669 special transfer mode that will first try to make a hardlink and
670 if that fails a symlink will be used instead. "relsymlink" creates
671 a relative symlink rather than use an absolute path.
672 Most datastores do not support all transfer modes.
673 "auto" is a special option that will let the
674 data store choose the most natural option for itself.
675 record_validation_info : `bool`, optional
676 If `True`, the default, the datastore can record validation
677 information associated with the file. If `False` the datastore
678 will not attempt to track any information such as checksums
679 or file sizes. This can be useful if such information is tracked
680 in an external system or if the file is to be compressed in place.
681 It is up to the datastore whether this parameter is relevant.
683 Raises
684 ------
685 NotImplementedError
686 Raised if the datastore does not support the given transfer mode
687 (including the case where ingest is not supported at all).
688 DatasetTypeNotSupportedError
689 Raised if one or more files to be ingested have a dataset type that
690 is not supported by the datastore.
691 FileNotFoundError
692 Raised if one of the given files does not exist.
693 FileExistsError
694 Raised if transfer is not `None` but the (internal) location the
695 file would be moved to is already occupied.
697 Notes
698 -----
699 Subclasses should implement `_prepIngest` and `_finishIngest` instead
700 of implementing `ingest` directly. Datastores that hold and
701 delegate to child datastores may want to call those methods as well.
703 Subclasses are encouraged to document their supported transfer modes
704 in their class documentation.
705 """
706 # Allow a datastore to select a default transfer mode
707 transfer = self._overrideTransferMode(*datasets, transfer=transfer)
708 prepData = self._prepIngest(*datasets, transfer=transfer)
709 refs = {ref.id: ref for dataset in datasets for ref in dataset.refs}
710 if None in refs:
711 # Find the file for the error message. There may be multiple
712 # bad refs so look for all of them.
713 unresolved_paths = {}
714 for dataset in datasets:
715 unresolved = []
716 for ref in dataset.refs:
717 if ref.id is None:
718 unresolved.append(ref)
719 if unresolved:
720 unresolved_paths[dataset.path] = unresolved
721 raise RuntimeError(
722 "Attempt to ingest unresolved DatasetRef from: "
723 + ",".join(f"{p}: ({[str(r) for r in ref]})" for p, ref in unresolved_paths.items())
724 )
725 if refs.keys() != prepData.refs.keys():
726 unsupported = refs.keys() - prepData.refs.keys()
727 # Group unsupported refs by DatasetType for an informative
728 # but still concise error message.
729 byDatasetType = defaultdict(list)
730 for datasetId in unsupported:
731 ref = refs[datasetId]
732 byDatasetType[ref.datasetType].append(ref)
733 raise DatasetTypeNotSupportedError(
734 "DatasetType(s) not supported in ingest: "
735 + ", ".join(f"{k.name} ({len(v)} dataset(s))" for k, v in byDatasetType.items())
736 )
737 self._finishIngest(prepData, transfer=transfer, record_validation_info=record_validation_info)
739 def transfer_from(
740 self,
741 source_datastore: Datastore,
742 refs: Iterable[DatasetRef],
743 local_refs: Optional[Iterable[DatasetRef]] = None,
744 transfer: str = "auto",
745 artifact_existence: Optional[Dict[ResourcePath, bool]] = None,
746 ) -> None:
747 """Transfer dataset artifacts from another datastore to this one.
749 Parameters
750 ----------
751 source_datastore : `Datastore`
752 The datastore from which to transfer artifacts. That datastore
753 must be compatible with this datastore receiving the artifacts.
754 refs : iterable of `DatasetRef`
755 The datasets to transfer from the source datastore.
756 local_refs : iterable of `DatasetRef`, optional
757 The dataset refs associated with the registry associated with
758 this datastore. Can be `None` if the source and target datastore
759 are using UUIDs.
760 transfer : `str`, optional
761 How (and whether) the dataset should be added to the datastore.
762 Choices include "move", "copy",
763 "link", "symlink", "relsymlink", and "hardlink". "link" is a
764 special transfer mode that will first try to make a hardlink and
765 if that fails a symlink will be used instead. "relsymlink" creates
766 a relative symlink rather than use an absolute path.
767 Most datastores do not support all transfer modes.
768 "auto" (the default) is a special option that will let the
769 data store choose the most natural option for itself.
770 If the source location and transfer location are identical the
771 transfer mode will be ignored.
772 artifact_existence : `dict` [`lsst.resources.ResourcePath`, `bool`]
773 Optional mapping of datastore artifact to existence. Updated by
774 this method with details of all artifacts tested. Can be `None`
775 if the caller is not interested.
777 Raises
778 ------
779 TypeError
780 Raised if the two datastores are not compatible.
781 """
782 if type(self) is not type(source_datastore):
783 raise TypeError(
784 f"Datastore mismatch between this datastore ({type(self)}) and the "
785 f"source datastore ({type(source_datastore)})."
786 )
788 raise NotImplementedError(f"Datastore {type(self)} must implement a transfer_from method.")
790 def getManyURIs(
791 self,
792 refs: Iterable[DatasetRef],
793 predict: bool = False,
794 allow_missing: bool = False,
795 ) -> Dict[DatasetRef, DatasetRefURIs]:
796 """Return URIs associated with many datasets.
798 Parameters
799 ----------
800 refs : iterable of `DatasetIdRef`
801 References to the required datasets.
802 predict : `bool`, optional
803 If the datastore does not know about a dataset, should it
804 return a predicted URI or not?
805 allow_missing : `bool`
806 If `False`, and `predict` is `False`, will raise if a `DatasetRef`
807 does not exist.
809 Returns
810 -------
811 URIs : `dict` of [`DatasetRef`, `DatasetRefUris`]
812 A dict of primary and component URIs, indexed by the passed-in
813 refs.
815 Raises
816 ------
817 FileNotFoundError
818 A URI has been requested for a dataset that does not exist and
819 guessing is not allowed.
821 Notes
822 -----
823 In file-based datastores, getManuURIs does not check that the file is
824 really there, it's assuming it is if datastore is aware of the file
825 then it actually exists.
826 """
827 uris: Dict[DatasetRef, DatasetRefURIs] = {}
828 missing_refs = []
829 for ref in refs:
830 try:
831 uris[ref] = self.getURIs(ref, predict=predict)
832 except FileNotFoundError:
833 missing_refs.append(ref)
834 if missing_refs and not allow_missing:
835 raise FileNotFoundError(
836 "Missing {} refs from datastore out of {} and predict=False.".format(
837 num_missing := len(missing_refs), num_missing + len(uris)
838 )
839 )
840 return uris
842 @abstractmethod
843 def getURIs(self, datasetRef: DatasetRef, predict: bool = False) -> DatasetRefURIs:
844 """Return URIs associated with dataset.
846 Parameters
847 ----------
848 ref : `DatasetRef`
849 Reference to the required dataset.
850 predict : `bool`, optional
851 If the datastore does not know about the dataset, should it
852 return a predicted URI or not?
854 Returns
855 -------
856 uris : `DatasetRefURIs`
857 The URI to the primary artifact associated with this dataset (if
858 the dataset was disassembled within the datastore this may be
859 `None`), and the URIs to any components associated with the dataset
860 artifact. (can be empty if there are no components).
861 """
862 raise NotImplementedError()
864 @abstractmethod
865 def getURI(self, datasetRef: DatasetRef, predict: bool = False) -> ResourcePath:
866 """URI to the Dataset.
868 Parameters
869 ----------
870 datasetRef : `DatasetRef`
871 Reference to the required Dataset.
872 predict : `bool`
873 If `True` attempt to predict the URI for a dataset if it does
874 not exist in datastore.
876 Returns
877 -------
878 uri : `str`
879 URI string pointing to the Dataset within the datastore. If the
880 Dataset does not exist in the datastore, the URI may be a guess.
881 If the datastore does not have entities that relate well
882 to the concept of a URI the returned URI string will be
883 descriptive. The returned URI is not guaranteed to be obtainable.
885 Raises
886 ------
887 FileNotFoundError
888 A URI has been requested for a dataset that does not exist and
889 guessing is not allowed.
890 """
891 raise NotImplementedError("Must be implemented by subclass")
893 @abstractmethod
894 def retrieveArtifacts(
895 self,
896 refs: Iterable[DatasetRef],
897 destination: ResourcePath,
898 transfer: str = "auto",
899 preserve_path: bool = True,
900 overwrite: bool = False,
901 ) -> List[ResourcePath]:
902 """Retrieve the artifacts associated with the supplied refs.
904 Parameters
905 ----------
906 refs : iterable of `DatasetRef`
907 The datasets for which artifacts are to be retrieved.
908 A single ref can result in multiple artifacts. The refs must
909 be resolved.
910 destination : `lsst.resources.ResourcePath`
911 Location to write the artifacts.
912 transfer : `str`, optional
913 Method to use to transfer the artifacts. Must be one of the options
914 supported by `lsst.resources.ResourcePath.transfer_from()`.
915 "move" is not allowed.
916 preserve_path : `bool`, optional
917 If `True` the full path of the artifact within the datastore
918 is preserved. If `False` the final file component of the path
919 is used.
920 overwrite : `bool`, optional
921 If `True` allow transfers to overwrite existing files at the
922 destination.
924 Returns
925 -------
926 targets : `list` of `lsst.resources.ResourcePath`
927 URIs of file artifacts in destination location. Order is not
928 preserved.
930 Notes
931 -----
932 For non-file datastores the artifacts written to the destination
933 may not match the representation inside the datastore. For example
934 a hierarchichal data structure in a NoSQL database may well be stored
935 as a JSON file.
936 """
937 raise NotImplementedError()
939 @abstractmethod
940 def remove(self, datasetRef: DatasetRef) -> None:
941 """Indicate to the Datastore that a Dataset can be removed.
943 Parameters
944 ----------
945 datasetRef : `DatasetRef`
946 Reference to the required Dataset.
948 Raises
949 ------
950 FileNotFoundError
951 When Dataset does not exist.
953 Notes
954 -----
955 Some Datastores may implement this method as a silent no-op to
956 disable Dataset deletion through standard interfaces.
957 """
958 raise NotImplementedError("Must be implemented by subclass")
960 @abstractmethod
961 def forget(self, refs: Iterable[DatasetRef]) -> None:
962 """Indicate to the Datastore that it should remove all records of the
963 given datasets, without actually deleting them.
965 Parameters
966 ----------
967 refs : `Iterable` [ `DatasetRef` ]
968 References to the datasets being forgotten.
970 Notes
971 -----
972 Asking a datastore to forget a `DatasetRef` it does not hold should be
973 a silent no-op, not an error.
974 """
975 raise NotImplementedError("Must be implemented by subclass")
977 @abstractmethod
978 def trash(self, ref: Union[DatasetRef, Iterable[DatasetRef]], ignore_errors: bool = True) -> None:
979 """Indicate to the Datastore that a Dataset can be moved to the trash.
981 Parameters
982 ----------
983 ref : `DatasetRef` or iterable thereof
984 Reference(s) to the required Dataset.
985 ignore_errors : `bool`, optional
986 Determine whether errors should be ignored. When multiple
987 refs are being trashed there will be no per-ref check.
989 Raises
990 ------
991 FileNotFoundError
992 When Dataset does not exist and errors are not ignored. Only
993 checked if a single ref is supplied (and not in a list).
995 Notes
996 -----
997 Some Datastores may implement this method as a silent no-op to
998 disable Dataset deletion through standard interfaces.
999 """
1000 raise NotImplementedError("Must be implemented by subclass")
1002 @abstractmethod
1003 def emptyTrash(self, ignore_errors: bool = True) -> None:
1004 """Remove all datasets from the trash.
1006 Parameters
1007 ----------
1008 ignore_errors : `bool`, optional
1009 Determine whether errors should be ignored.
1011 Notes
1012 -----
1013 Some Datastores may implement this method as a silent no-op to
1014 disable Dataset deletion through standard interfaces.
1015 """
1016 raise NotImplementedError("Must be implemented by subclass")
1018 @abstractmethod
1019 def transfer(self, inputDatastore: Datastore, datasetRef: DatasetRef) -> None:
1020 """Transfer a dataset from another datastore to this datastore.
1022 Parameters
1023 ----------
1024 inputDatastore : `Datastore`
1025 The external `Datastore` from which to retrieve the Dataset.
1026 datasetRef : `DatasetRef`
1027 Reference to the required Dataset.
1028 """
1029 raise NotImplementedError("Must be implemented by subclass")
1031 def export(
1032 self,
1033 refs: Iterable[DatasetRef],
1034 *,
1035 directory: Optional[ResourcePathExpression] = None,
1036 transfer: Optional[str] = "auto",
1037 ) -> Iterable[FileDataset]:
1038 """Export datasets for transfer to another data repository.
1040 Parameters
1041 ----------
1042 refs : iterable of `DatasetRef`
1043 Dataset references to be exported.
1044 directory : `str`, optional
1045 Path to a directory that should contain files corresponding to
1046 output datasets. Ignored if ``transfer`` is explicitly `None`.
1047 transfer : `str`, optional
1048 Mode that should be used to move datasets out of the repository.
1049 Valid options are the same as those of the ``transfer`` argument
1050 to ``ingest``, and datastores may similarly signal that a transfer
1051 mode is not supported by raising `NotImplementedError`. If "auto"
1052 is given and no ``directory`` is specified, `None` will be
1053 implied.
1055 Returns
1056 -------
1057 dataset : iterable of `DatasetTransfer`
1058 Structs containing information about the exported datasets, in the
1059 same order as ``refs``.
1061 Raises
1062 ------
1063 NotImplementedError
1064 Raised if the given transfer mode is not supported.
1065 """
1066 raise NotImplementedError(f"Transfer mode {transfer} not supported.")
1068 @abstractmethod
1069 def validateConfiguration(
1070 self, entities: Iterable[Union[DatasetRef, DatasetType, StorageClass]], logFailures: bool = False
1071 ) -> None:
1072 """Validate some of the configuration for this datastore.
1074 Parameters
1075 ----------
1076 entities : iterable of `DatasetRef`, `DatasetType`, or `StorageClass`
1077 Entities to test against this configuration. Can be differing
1078 types.
1079 logFailures : `bool`, optional
1080 If `True`, output a log message for every validation error
1081 detected.
1083 Raises
1084 ------
1085 DatastoreValidationError
1086 Raised if there is a validation problem with a configuration.
1088 Notes
1089 -----
1090 Which parts of the configuration are validated is at the discretion
1091 of each Datastore implementation.
1092 """
1093 raise NotImplementedError("Must be implemented by subclass")
1095 @abstractmethod
1096 def validateKey(self, lookupKey: LookupKey, entity: Union[DatasetRef, DatasetType, StorageClass]) -> None:
1097 """Validate a specific look up key with supplied entity.
1099 Parameters
1100 ----------
1101 lookupKey : `LookupKey`
1102 Key to use to retrieve information from the datastore
1103 configuration.
1104 entity : `DatasetRef`, `DatasetType`, or `StorageClass`
1105 Entity to compare with configuration retrieved using the
1106 specified lookup key.
1108 Raises
1109 ------
1110 DatastoreValidationError
1111 Raised if there is a problem with the combination of entity
1112 and lookup key.
1114 Notes
1115 -----
1116 Bypasses the normal selection priorities by allowing a key that
1117 would normally not be selected to be validated.
1118 """
1119 raise NotImplementedError("Must be implemented by subclass")
1121 @abstractmethod
1122 def getLookupKeys(self) -> Set[LookupKey]:
1123 """Return all the lookup keys relevant to this datastore.
1125 Returns
1126 -------
1127 keys : `set` of `LookupKey`
1128 The keys stored internally for looking up information based
1129 on `DatasetType` name or `StorageClass`.
1130 """
1131 raise NotImplementedError("Must be implemented by subclass")
1133 def needs_expanded_data_ids(
1134 self,
1135 transfer: Optional[str],
1136 entity: Optional[Union[DatasetRef, DatasetType, StorageClass]] = None,
1137 ) -> bool:
1138 """Test whether this datastore needs expanded data IDs to ingest.
1140 Parameters
1141 ----------
1142 transfer : `str` or `None`
1143 Transfer mode for ingest.
1144 entity, optional
1145 Object representing what will be ingested. If not provided (or not
1146 specific enough), `True` may be returned even if expanded data
1147 IDs aren't necessary.
1149 Returns
1150 -------
1151 needed : `bool`
1152 If `True`, expanded data IDs may be needed. `False` only if
1153 expansion definitely isn't necessary.
1154 """
1155 return True
1157 @abstractmethod
1158 def import_records(
1159 self,
1160 data: Mapping[str, DatastoreRecordData],
1161 ) -> None:
1162 """Import datastore location and record data from an in-memory data
1163 structure.
1165 Parameters
1166 ----------
1167 data : `Mapping` [ `str`, `DatastoreRecordData` ]
1168 Datastore records indexed by datastore name. May contain data for
1169 other `Datastore` instances (generally because they are chained to
1170 this one), which should be ignored.
1172 Notes
1173 -----
1174 Implementations should generally not check that any external resources
1175 (e.g. files) referred to by these records actually exist, for
1176 performance reasons; we expect higher-level code to guarantee that they
1177 do.
1179 Implementations are responsible for calling
1180 `DatastoreRegistryBridge.insert` on all datasets in ``data.locations``
1181 where the key is in `names`, as well as loading any opaque table data.
1182 """
1183 raise NotImplementedError()
1185 @abstractmethod
1186 def export_records(
1187 self,
1188 refs: Iterable[DatasetIdRef],
1189 ) -> Mapping[str, DatastoreRecordData]:
1190 """Export datastore records and locations to an in-memory data
1191 structure.
1193 Parameters
1194 ----------
1195 refs : `Iterable` [ `DatasetIdRef` ]
1196 Datasets to save. This may include datasets not known to this
1197 datastore, which should be ignored.
1199 Returns
1200 -------
1201 data : `Mapping` [ `str`, `DatastoreRecordData` ]
1202 Exported datastore records indexed by datastore name.
1203 """
1204 raise NotImplementedError()