Coverage for python/lsst/daf/butler/core/datastore.py: 61%
250 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-25 15:14 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-25 15:14 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Support for generic data stores."""
24from __future__ import annotations
26__all__ = ("DatastoreConfig", "Datastore", "DatastoreValidationError", "DatasetRefURIs", "NullDatastore")
28import contextlib
29import dataclasses
30import logging
31import time
32from abc import ABCMeta, abstractmethod
33from collections import abc, defaultdict
34from collections.abc import Callable, Iterable, Iterator, Mapping
35from typing import TYPE_CHECKING, Any, ClassVar
37from lsst.utils import doImportType
39from .config import Config, ConfigSubset
40from .constraints import Constraints
41from .exceptions import DatasetTypeNotSupportedError, ValidationError
42from .fileDataset import FileDataset
43from .storageClass import StorageClassFactory
45if TYPE_CHECKING:
46 from lsst.resources import ResourcePath, ResourcePathExpression
48 from ..registry.interfaces import DatasetIdRef, DatastoreRegistryBridgeManager
49 from .configSupport import LookupKey
50 from .datasets import DatasetRef, DatasetType
51 from .datastoreRecordData import DatastoreRecordData
52 from .storageClass import StorageClass
54_LOG = logging.getLogger(__name__)
57class DatastoreConfig(ConfigSubset):
58 """Configuration for Datastores."""
60 component = "datastore"
61 requiredKeys = ("cls",)
62 defaultConfigFile = "datastore.yaml"
65class DatastoreValidationError(ValidationError):
66 """There is a problem with the Datastore configuration."""
68 pass
71@dataclasses.dataclass(frozen=True)
72class Event:
73 """Representation of an event that can be rolled back."""
75 __slots__ = {"name", "undoFunc", "args", "kwargs"}
76 name: str
77 undoFunc: Callable
78 args: tuple
79 kwargs: dict
82class IngestPrepData:
83 """A helper base class for `Datastore` ingest implementations.
85 Datastore implementations will generally need a custom implementation of
86 this class.
88 Should be accessed as ``Datastore.IngestPrepData`` instead of via direct
89 import.
91 Parameters
92 ----------
93 refs : iterable of `DatasetRef`
94 References for the datasets that can be ingested by this datastore.
95 """
97 def __init__(self, refs: Iterable[DatasetRef]):
98 self.refs = {ref.id: ref for ref in refs}
101class DatastoreTransaction:
102 """Keeps a log of `Datastore` activity and allow rollback.
104 Parameters
105 ----------
106 parent : `DatastoreTransaction`, optional
107 The parent transaction (if any)
108 """
110 Event: ClassVar[type] = Event
112 parent: DatastoreTransaction | None
113 """The parent transaction. (`DatastoreTransaction`, optional)"""
115 def __init__(self, parent: DatastoreTransaction | None = None):
116 self.parent = parent
117 self._log: list[Event] = []
119 def registerUndo(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> None:
120 """Register event with undo function.
122 Parameters
123 ----------
124 name : `str`
125 Name of the event.
126 undoFunc : func
127 Function to undo this event.
128 args : `tuple`
129 Positional arguments to `undoFunc`.
130 **kwargs
131 Keyword arguments to `undoFunc`.
132 """
133 self._log.append(self.Event(name, undoFunc, args, kwargs))
135 @contextlib.contextmanager
136 def undoWith(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> Iterator[None]:
137 """Register undo function if nested operation succeeds.
139 Calls `registerUndo`.
141 This can be used to wrap individual undo-able statements within a
142 DatastoreTransaction block. Multiple statements that can fail
143 separately should not be part of the same `undoWith` block.
145 All arguments are forwarded directly to `registerUndo`.
146 """
147 try:
148 yield None
149 except BaseException:
150 raise
151 else:
152 self.registerUndo(name, undoFunc, *args, **kwargs)
154 def rollback(self) -> None:
155 """Roll back all events in this transaction."""
156 log = logging.getLogger(__name__)
157 while self._log:
158 ev = self._log.pop()
159 try:
160 log.debug(
161 "Rolling back transaction: %s: %s(%s,%s)",
162 ev.name,
163 ev.undoFunc,
164 ",".join(str(a) for a in ev.args),
165 ",".join(f"{k}={v}" for k, v in ev.kwargs.items()),
166 )
167 except Exception:
168 # In case we had a problem in stringification of arguments
169 log.warning("Rolling back transaction: %s", ev.name)
170 try:
171 ev.undoFunc(*ev.args, **ev.kwargs)
172 except BaseException as e:
173 # Deliberately swallow error that may occur in unrolling
174 log.warning("Exception: %s caught while unrolling: %s", e, ev.name)
175 pass
177 def commit(self) -> None:
178 """Commit this transaction."""
179 if self.parent is None:
180 # Just forget about the events, they have already happened.
181 return
182 else:
183 # We may still want to events from this transaction as part of
184 # the parent.
185 self.parent._log.extend(self._log)
188@dataclasses.dataclass
189class DatasetRefURIs(abc.Sequence):
190 """Represents the primary and component ResourcePath(s) associated with a
191 DatasetRef.
193 This is used in places where its members used to be represented as a tuple
194 `(primaryURI, componentURIs)`. To maintain backward compatibility this
195 inherits from Sequence and so instances can be treated as a two-item
196 tuple.
197 """
199 def __init__(
200 self,
201 primaryURI: ResourcePath | None = None,
202 componentURIs: dict[str, ResourcePath] | None = None,
203 ):
204 self.primaryURI = primaryURI
205 """The URI to the primary artifact associated with this dataset. If the
206 dataset was disassembled within the datastore this may be `None`.
207 """
209 self.componentURIs = componentURIs or {}
210 """The URIs to any components associated with the dataset artifact
211 indexed by component name. This can be empty if there are no
212 components.
213 """
215 def __getitem__(self, index: Any) -> Any:
216 """Get primaryURI and componentURIs by index.
218 Provides support for tuple-like access.
219 """
220 if index == 0:
221 return self.primaryURI
222 elif index == 1:
223 return self.componentURIs
224 raise IndexError("list index out of range")
226 def __len__(self) -> int:
227 """Get the number of data members.
229 Provides support for tuple-like access.
230 """
231 return 2
233 def __repr__(self) -> str:
234 return f"DatasetRefURIs({repr(self.primaryURI)}, {repr(self.componentURIs)})"
237class Datastore(metaclass=ABCMeta):
238 """Datastore interface.
240 Parameters
241 ----------
242 config : `DatastoreConfig` or `str`
243 Load configuration either from an existing config instance or by
244 referring to a configuration file.
245 bridgeManager : `DatastoreRegistryBridgeManager`
246 Object that manages the interface between `Registry` and datastores.
247 butlerRoot : `str`, optional
248 New datastore root to use to override the configuration value.
249 """
251 defaultConfigFile: ClassVar[str | None] = None
252 """Path to configuration defaults. Accessed within the ``config`` resource
253 or relative to a search path. Can be None if no defaults specified.
254 """
256 containerKey: ClassVar[str | None] = None
257 """Name of the key containing a list of subconfigurations that also
258 need to be merged with defaults and will likely use different Python
259 datastore classes (but all using DatastoreConfig). Assumed to be a
260 list of configurations that can be represented in a DatastoreConfig
261 and containing a "cls" definition. None indicates that no containers
262 are expected in this Datastore."""
264 isEphemeral: bool = False
265 """Indicate whether this Datastore is ephemeral or not. An ephemeral
266 datastore is one where the contents of the datastore will not exist
267 across process restarts. This value can change per-instance."""
269 config: DatastoreConfig
270 """Configuration used to create Datastore."""
272 name: str
273 """Label associated with this Datastore."""
275 storageClassFactory: StorageClassFactory
276 """Factory for creating storage class instances from name."""
278 constraints: Constraints
279 """Constraints to apply when putting datasets into the datastore."""
281 # MyPy does not like for this to be annotated as any kind of type, because
282 # it can't do static checking on type variables that can change at runtime.
283 IngestPrepData: ClassVar[Any] = IngestPrepData
284 """Helper base class for ingest implementations.
285 """
287 @classmethod
288 @abstractmethod
289 def setConfigRoot(cls, root: str, config: Config, full: Config, overwrite: bool = True) -> None:
290 """Set filesystem-dependent config options for this datastore.
292 The options will be appropriate for a new empty repository with the
293 given root.
295 Parameters
296 ----------
297 root : `str`
298 Filesystem path to the root of the data repository.
299 config : `Config`
300 A `Config` to update. Only the subset understood by
301 this component will be updated. Will not expand
302 defaults.
303 full : `Config`
304 A complete config with all defaults expanded that can be
305 converted to a `DatastoreConfig`. Read-only and will not be
306 modified by this method.
307 Repository-specific options that should not be obtained
308 from defaults when Butler instances are constructed
309 should be copied from ``full`` to ``config``.
310 overwrite : `bool`, optional
311 If `False`, do not modify a value in ``config`` if the value
312 already exists. Default is always to overwrite with the provided
313 ``root``.
315 Notes
316 -----
317 If a keyword is explicitly defined in the supplied ``config`` it
318 will not be overridden by this method if ``overwrite`` is `False`.
319 This allows explicit values set in external configs to be retained.
320 """
321 raise NotImplementedError()
323 @staticmethod
324 def fromConfig(
325 config: Config,
326 bridgeManager: DatastoreRegistryBridgeManager,
327 butlerRoot: ResourcePathExpression | None = None,
328 ) -> Datastore:
329 """Create datastore from type specified in config file.
331 Parameters
332 ----------
333 config : `Config` or `~lsst.resources.ResourcePathExpression`
334 Configuration instance.
335 bridgeManager : `DatastoreRegistryBridgeManager`
336 Object that manages the interface between `Registry` and
337 datastores.
338 butlerRoot : `str`, optional
339 Butler root directory.
340 """
341 cls = doImportType(config["datastore", "cls"])
342 if not issubclass(cls, Datastore):
343 raise TypeError(f"Imported child class {config['datastore', 'cls']} is not a Datastore")
344 return cls(config=config, bridgeManager=bridgeManager, butlerRoot=butlerRoot)
346 def __init__(
347 self,
348 config: Config | ResourcePathExpression,
349 bridgeManager: DatastoreRegistryBridgeManager,
350 butlerRoot: ResourcePathExpression | None = None,
351 ):
352 self.config = DatastoreConfig(config)
353 self.name = "ABCDataStore"
354 self._transaction: DatastoreTransaction | None = None
356 # All Datastores need storage classes and constraints
357 self.storageClassFactory = StorageClassFactory()
359 # And read the constraints list
360 constraintsConfig = self.config.get("constraints")
361 self.constraints = Constraints(constraintsConfig, universe=bridgeManager.universe)
363 def __str__(self) -> str:
364 return self.name
366 def __repr__(self) -> str:
367 return self.name
369 @property
370 def names(self) -> tuple[str, ...]:
371 """Names associated with this datastore returned as a list.
373 Can be different to ``name`` for a chaining datastore.
374 """
375 # Default implementation returns solely the name itself
376 return (self.name,)
378 @property
379 def roots(self) -> dict[str, ResourcePath | None]:
380 """Return the root URIs for each named datastore.
382 Mapping from datastore name to root URI. The URI can be `None`
383 if a datastore has no concept of a root URI.
384 (`dict` [`str`, `ResourcePath` | `None`])
385 """
386 return {self.name: None}
388 @contextlib.contextmanager
389 def transaction(self) -> Iterator[DatastoreTransaction]:
390 """Context manager supporting `Datastore` transactions.
392 Transactions can be nested, and are to be used in combination with
393 `Registry.transaction`.
394 """
395 self._transaction = DatastoreTransaction(self._transaction)
396 try:
397 yield self._transaction
398 except BaseException:
399 self._transaction.rollback()
400 raise
401 else:
402 self._transaction.commit()
403 self._transaction = self._transaction.parent
405 @abstractmethod
406 def knows(self, ref: DatasetRef) -> bool:
407 """Check if the dataset is known to the datastore.
409 Does not check for existence of any artifact.
411 Parameters
412 ----------
413 ref : `DatasetRef`
414 Reference to the required dataset.
416 Returns
417 -------
418 exists : `bool`
419 `True` if the dataset is known to the datastore.
420 """
421 raise NotImplementedError()
423 def knows_these(self, refs: Iterable[DatasetRef]) -> dict[DatasetRef, bool]:
424 """Check which of the given datasets are known to this datastore.
426 This is like ``mexist()`` but does not check that the file exists.
428 Parameters
429 ----------
430 refs : iterable `DatasetRef`
431 The datasets to check.
433 Returns
434 -------
435 exists : `dict`[`DatasetRef`, `bool`]
436 Mapping of dataset to boolean indicating whether the dataset
437 is known to the datastore.
438 """
439 # Non-optimized default calls knows() repeatedly.
440 return {ref: self.knows(ref) for ref in refs}
442 def mexists(
443 self, refs: Iterable[DatasetRef], artifact_existence: dict[ResourcePath, bool] | None = None
444 ) -> dict[DatasetRef, bool]:
445 """Check the existence of multiple datasets at once.
447 Parameters
448 ----------
449 refs : iterable of `DatasetRef`
450 The datasets to be checked.
451 artifact_existence : `dict` [`lsst.resources.ResourcePath`, `bool`]
452 Optional mapping of datastore artifact to existence. Updated by
453 this method with details of all artifacts tested. Can be `None`
454 if the caller is not interested.
456 Returns
457 -------
458 existence : `dict` of [`DatasetRef`, `bool`]
459 Mapping from dataset to boolean indicating existence.
460 """
461 existence: dict[DatasetRef, bool] = {}
462 # Non-optimized default.
463 for ref in refs:
464 existence[ref] = self.exists(ref)
465 return existence
467 @abstractmethod
468 def exists(self, datasetRef: DatasetRef) -> bool:
469 """Check if the dataset exists in the datastore.
471 Parameters
472 ----------
473 datasetRef : `DatasetRef`
474 Reference to the required dataset.
476 Returns
477 -------
478 exists : `bool`
479 `True` if the entity exists in the `Datastore`.
480 """
481 raise NotImplementedError("Must be implemented by subclass")
483 @abstractmethod
484 def get(
485 self,
486 datasetRef: DatasetRef,
487 parameters: Mapping[str, Any] | None = None,
488 storageClass: StorageClass | str | None = None,
489 ) -> Any:
490 """Load an `InMemoryDataset` from the store.
492 Parameters
493 ----------
494 datasetRef : `DatasetRef`
495 Reference to the required Dataset.
496 parameters : `dict`
497 `StorageClass`-specific parameters that specify a slice of the
498 Dataset to be loaded.
499 storageClass : `StorageClass` or `str`, optional
500 The storage class to be used to override the Python type
501 returned by this method. By default the returned type matches
502 the dataset type definition for this dataset. Specifying a
503 read `StorageClass` can force a different type to be returned.
504 This type must be compatible with the original type.
506 Returns
507 -------
508 inMemoryDataset : `object`
509 Requested Dataset or slice thereof as an InMemoryDataset.
510 """
511 raise NotImplementedError("Must be implemented by subclass")
513 @abstractmethod
514 def put(self, inMemoryDataset: Any, datasetRef: DatasetRef) -> None:
515 """Write a `InMemoryDataset` with a given `DatasetRef` to the store.
517 Parameters
518 ----------
519 inMemoryDataset : `object`
520 The Dataset to store.
521 datasetRef : `DatasetRef`
522 Reference to the associated Dataset.
523 """
524 raise NotImplementedError("Must be implemented by subclass")
526 def _overrideTransferMode(self, *datasets: FileDataset, transfer: str | None = None) -> str | None:
527 """Allow ingest transfer mode to be defaulted based on datasets.
529 Parameters
530 ----------
531 datasets : `FileDataset`
532 Each positional argument is a struct containing information about
533 a file to be ingested, including its path (either absolute or
534 relative to the datastore root, if applicable), a complete
535 `DatasetRef` (with ``dataset_id not None``), and optionally a
536 formatter class or its fully-qualified string name. If a formatter
537 is not provided, this method should populate that attribute with
538 the formatter the datastore would use for `put`. Subclasses are
539 also permitted to modify the path attribute (typically to put it
540 in what the datastore considers its standard form).
541 transfer : `str`, optional
542 How (and whether) the dataset should be added to the datastore.
543 See `ingest` for details of transfer modes.
545 Returns
546 -------
547 newTransfer : `str`
548 Transfer mode to use. Will be identical to the supplied transfer
549 mode unless "auto" is used.
550 """
551 if transfer != "auto":
552 return transfer
553 raise RuntimeError(f"{transfer} is not allowed without specialization.")
555 def _prepIngest(self, *datasets: FileDataset, transfer: str | None = None) -> IngestPrepData:
556 """Process datasets to identify which ones can be ingested.
558 Parameters
559 ----------
560 datasets : `FileDataset`
561 Each positional argument is a struct containing information about
562 a file to be ingested, including its path (either absolute or
563 relative to the datastore root, if applicable), a complete
564 `DatasetRef` (with ``dataset_id not None``), and optionally a
565 formatter class or its fully-qualified string name. If a formatter
566 is not provided, this method should populate that attribute with
567 the formatter the datastore would use for `put`. Subclasses are
568 also permitted to modify the path attribute (typically to put it
569 in what the datastore considers its standard form).
570 transfer : `str`, optional
571 How (and whether) the dataset should be added to the datastore.
572 See `ingest` for details of transfer modes.
574 Returns
575 -------
576 data : `IngestPrepData`
577 An instance of a subclass of `IngestPrepData`, used to pass
578 arbitrary data from `_prepIngest` to `_finishIngest`. This should
579 include only the datasets this datastore can actually ingest;
580 others should be silently ignored (`Datastore.ingest` will inspect
581 `IngestPrepData.refs` and raise `DatasetTypeNotSupportedError` if
582 necessary).
584 Raises
585 ------
586 NotImplementedError
587 Raised if the datastore does not support the given transfer mode
588 (including the case where ingest is not supported at all).
589 FileNotFoundError
590 Raised if one of the given files does not exist.
591 FileExistsError
592 Raised if transfer is not `None` but the (internal) location the
593 file would be moved to is already occupied.
595 Notes
596 -----
597 This method (along with `_finishIngest`) should be implemented by
598 subclasses to provide ingest support instead of implementing `ingest`
599 directly.
601 `_prepIngest` should not modify the data repository or given files in
602 any way; all changes should be deferred to `_finishIngest`.
604 When possible, exceptions should be raised in `_prepIngest` instead of
605 `_finishIngest`. `NotImplementedError` exceptions that indicate that
606 the transfer mode is not supported must be raised by `_prepIngest`
607 instead of `_finishIngest`.
608 """
609 raise NotImplementedError(f"Datastore {self} does not support direct file-based ingest.")
611 def _finishIngest(
612 self, prepData: IngestPrepData, *, transfer: str | None = None, record_validation_info: bool = True
613 ) -> None:
614 """Complete an ingest operation.
616 Parameters
617 ----------
618 data : `IngestPrepData`
619 An instance of a subclass of `IngestPrepData`. Guaranteed to be
620 the direct result of a call to `_prepIngest` on this datastore.
621 transfer : `str`, optional
622 How (and whether) the dataset should be added to the datastore.
623 See `ingest` for details of transfer modes.
624 record_validation_info : `bool`, optional
625 If `True`, the default, the datastore can record validation
626 information associated with the file. If `False` the datastore
627 will not attempt to track any information such as checksums
628 or file sizes. This can be useful if such information is tracked
629 in an external system or if the file is to be compressed in place.
630 It is up to the datastore whether this parameter is relevant.
632 Raises
633 ------
634 FileNotFoundError
635 Raised if one of the given files does not exist.
636 FileExistsError
637 Raised if transfer is not `None` but the (internal) location the
638 file would be moved to is already occupied.
640 Notes
641 -----
642 This method (along with `_prepIngest`) should be implemented by
643 subclasses to provide ingest support instead of implementing `ingest`
644 directly.
645 """
646 raise NotImplementedError(f"Datastore {self} does not support direct file-based ingest.")
648 def ingest(
649 self, *datasets: FileDataset, transfer: str | None = None, record_validation_info: bool = True
650 ) -> None:
651 """Ingest one or more files into the datastore.
653 Parameters
654 ----------
655 datasets : `FileDataset`
656 Each positional argument is a struct containing information about
657 a file to be ingested, including its path (either absolute or
658 relative to the datastore root, if applicable), a complete
659 `DatasetRef` (with ``dataset_id not None``), and optionally a
660 formatter class or its fully-qualified string name. If a formatter
661 is not provided, the one the datastore would use for ``put`` on
662 that dataset is assumed.
663 transfer : `str`, optional
664 How (and whether) the dataset should be added to the datastore.
665 If `None` (default), the file must already be in a location
666 appropriate for the datastore (e.g. within its root directory),
667 and will not be modified. Other choices include "move", "copy",
668 "link", "symlink", "relsymlink", and "hardlink". "link" is a
669 special transfer mode that will first try to make a hardlink and
670 if that fails a symlink will be used instead. "relsymlink" creates
671 a relative symlink rather than use an absolute path.
672 Most datastores do not support all transfer modes.
673 "auto" is a special option that will let the
674 data store choose the most natural option for itself.
675 record_validation_info : `bool`, optional
676 If `True`, the default, the datastore can record validation
677 information associated with the file. If `False` the datastore
678 will not attempt to track any information such as checksums
679 or file sizes. This can be useful if such information is tracked
680 in an external system or if the file is to be compressed in place.
681 It is up to the datastore whether this parameter is relevant.
683 Raises
684 ------
685 NotImplementedError
686 Raised if the datastore does not support the given transfer mode
687 (including the case where ingest is not supported at all).
688 DatasetTypeNotSupportedError
689 Raised if one or more files to be ingested have a dataset type that
690 is not supported by the datastore.
691 FileNotFoundError
692 Raised if one of the given files does not exist.
693 FileExistsError
694 Raised if transfer is not `None` but the (internal) location the
695 file would be moved to is already occupied.
697 Notes
698 -----
699 Subclasses should implement `_prepIngest` and `_finishIngest` instead
700 of implementing `ingest` directly. Datastores that hold and
701 delegate to child datastores may want to call those methods as well.
703 Subclasses are encouraged to document their supported transfer modes
704 in their class documentation.
705 """
706 # Allow a datastore to select a default transfer mode
707 transfer = self._overrideTransferMode(*datasets, transfer=transfer)
708 prepData = self._prepIngest(*datasets, transfer=transfer)
709 refs = {ref.id: ref for dataset in datasets for ref in dataset.refs}
710 if refs.keys() != prepData.refs.keys():
711 unsupported = refs.keys() - prepData.refs.keys()
712 # Group unsupported refs by DatasetType for an informative
713 # but still concise error message.
714 byDatasetType = defaultdict(list)
715 for datasetId in unsupported:
716 ref = refs[datasetId]
717 byDatasetType[ref.datasetType].append(ref)
718 raise DatasetTypeNotSupportedError(
719 "DatasetType(s) not supported in ingest: "
720 + ", ".join(f"{k.name} ({len(v)} dataset(s))" for k, v in byDatasetType.items())
721 )
722 self._finishIngest(prepData, transfer=transfer, record_validation_info=record_validation_info)
724 def transfer_from(
725 self,
726 source_datastore: Datastore,
727 refs: Iterable[DatasetRef],
728 transfer: str = "auto",
729 artifact_existence: dict[ResourcePath, bool] | None = None,
730 ) -> tuple[set[DatasetRef], set[DatasetRef]]:
731 """Transfer dataset artifacts from another datastore to this one.
733 Parameters
734 ----------
735 source_datastore : `Datastore`
736 The datastore from which to transfer artifacts. That datastore
737 must be compatible with this datastore receiving the artifacts.
738 refs : iterable of `DatasetRef`
739 The datasets to transfer from the source datastore.
740 transfer : `str`, optional
741 How (and whether) the dataset should be added to the datastore.
742 Choices include "move", "copy",
743 "link", "symlink", "relsymlink", and "hardlink". "link" is a
744 special transfer mode that will first try to make a hardlink and
745 if that fails a symlink will be used instead. "relsymlink" creates
746 a relative symlink rather than use an absolute path.
747 Most datastores do not support all transfer modes.
748 "auto" (the default) is a special option that will let the
749 data store choose the most natural option for itself.
750 If the source location and transfer location are identical the
751 transfer mode will be ignored.
752 artifact_existence : `dict` [`lsst.resources.ResourcePath`, `bool`]
753 Optional mapping of datastore artifact to existence. Updated by
754 this method with details of all artifacts tested. Can be `None`
755 if the caller is not interested.
757 Returns
758 -------
759 accepted : `set` [`DatasetRef`]
760 The datasets that were transferred.
761 rejected : `set` [`DatasetRef`]
762 The datasets that were rejected due to a constraints violation.
764 Raises
765 ------
766 TypeError
767 Raised if the two datastores are not compatible.
768 """
769 if type(self) is not type(source_datastore):
770 raise TypeError(
771 f"Datastore mismatch between this datastore ({type(self)}) and the "
772 f"source datastore ({type(source_datastore)})."
773 )
775 raise NotImplementedError(f"Datastore {type(self)} must implement a transfer_from method.")
777 def getManyURIs(
778 self,
779 refs: Iterable[DatasetRef],
780 predict: bool = False,
781 allow_missing: bool = False,
782 ) -> dict[DatasetRef, DatasetRefURIs]:
783 """Return URIs associated with many datasets.
785 Parameters
786 ----------
787 refs : iterable of `DatasetIdRef`
788 References to the required datasets.
789 predict : `bool`, optional
790 If `True`, allow URIs to be returned of datasets that have not
791 been written.
792 allow_missing : `bool`
793 If `False`, and ``predict`` is `False`, will raise if a
794 `DatasetRef` does not exist.
796 Returns
797 -------
798 URIs : `dict` of [`DatasetRef`, `DatasetRefUris`]
799 A dict of primary and component URIs, indexed by the passed-in
800 refs.
802 Raises
803 ------
804 FileNotFoundError
805 A URI has been requested for a dataset that does not exist and
806 guessing is not allowed.
808 Notes
809 -----
810 In file-based datastores, getManyURIs does not check that the file is
811 really there, it's assuming it is if datastore is aware of the file
812 then it actually exists.
813 """
814 uris: dict[DatasetRef, DatasetRefURIs] = {}
815 missing_refs = []
816 for ref in refs:
817 try:
818 uris[ref] = self.getURIs(ref, predict=predict)
819 except FileNotFoundError:
820 missing_refs.append(ref)
821 if missing_refs and not allow_missing:
822 raise FileNotFoundError(
823 "Missing {} refs from datastore out of {} and predict=False.".format(
824 num_missing := len(missing_refs), num_missing + len(uris)
825 )
826 )
827 return uris
829 @abstractmethod
830 def getURIs(self, datasetRef: DatasetRef, predict: bool = False) -> DatasetRefURIs:
831 """Return URIs associated with dataset.
833 Parameters
834 ----------
835 ref : `DatasetRef`
836 Reference to the required dataset.
837 predict : `bool`, optional
838 If the datastore does not know about the dataset, should it
839 return a predicted URI or not?
841 Returns
842 -------
843 uris : `DatasetRefURIs`
844 The URI to the primary artifact associated with this dataset (if
845 the dataset was disassembled within the datastore this may be
846 `None`), and the URIs to any components associated with the dataset
847 artifact. (can be empty if there are no components).
848 """
849 raise NotImplementedError()
851 @abstractmethod
852 def getURI(self, datasetRef: DatasetRef, predict: bool = False) -> ResourcePath:
853 """URI to the Dataset.
855 Parameters
856 ----------
857 datasetRef : `DatasetRef`
858 Reference to the required Dataset.
859 predict : `bool`
860 If `True` attempt to predict the URI for a dataset if it does
861 not exist in datastore.
863 Returns
864 -------
865 uri : `str`
866 URI string pointing to the Dataset within the datastore. If the
867 Dataset does not exist in the datastore, the URI may be a guess.
868 If the datastore does not have entities that relate well
869 to the concept of a URI the returned URI string will be
870 descriptive. The returned URI is not guaranteed to be obtainable.
872 Raises
873 ------
874 FileNotFoundError
875 A URI has been requested for a dataset that does not exist and
876 guessing is not allowed.
877 """
878 raise NotImplementedError("Must be implemented by subclass")
880 @abstractmethod
881 def retrieveArtifacts(
882 self,
883 refs: Iterable[DatasetRef],
884 destination: ResourcePath,
885 transfer: str = "auto",
886 preserve_path: bool = True,
887 overwrite: bool = False,
888 ) -> list[ResourcePath]:
889 """Retrieve the artifacts associated with the supplied refs.
891 Parameters
892 ----------
893 refs : iterable of `DatasetRef`
894 The datasets for which artifacts are to be retrieved.
895 A single ref can result in multiple artifacts. The refs must
896 be resolved.
897 destination : `lsst.resources.ResourcePath`
898 Location to write the artifacts.
899 transfer : `str`, optional
900 Method to use to transfer the artifacts. Must be one of the options
901 supported by `lsst.resources.ResourcePath.transfer_from()`.
902 "move" is not allowed.
903 preserve_path : `bool`, optional
904 If `True` the full path of the artifact within the datastore
905 is preserved. If `False` the final file component of the path
906 is used.
907 overwrite : `bool`, optional
908 If `True` allow transfers to overwrite existing files at the
909 destination.
911 Returns
912 -------
913 targets : `list` of `lsst.resources.ResourcePath`
914 URIs of file artifacts in destination location. Order is not
915 preserved.
917 Notes
918 -----
919 For non-file datastores the artifacts written to the destination
920 may not match the representation inside the datastore. For example
921 a hierarchichal data structure in a NoSQL database may well be stored
922 as a JSON file.
923 """
924 raise NotImplementedError()
926 @abstractmethod
927 def remove(self, datasetRef: DatasetRef) -> None:
928 """Indicate to the Datastore that a Dataset can be removed.
930 Parameters
931 ----------
932 datasetRef : `DatasetRef`
933 Reference to the required Dataset.
935 Raises
936 ------
937 FileNotFoundError
938 When Dataset does not exist.
940 Notes
941 -----
942 Some Datastores may implement this method as a silent no-op to
943 disable Dataset deletion through standard interfaces.
944 """
945 raise NotImplementedError("Must be implemented by subclass")
947 @abstractmethod
948 def forget(self, refs: Iterable[DatasetRef]) -> None:
949 """Indicate to the Datastore that it should remove all records of the
950 given datasets, without actually deleting them.
952 Parameters
953 ----------
954 refs : `~collections.abc.Iterable` [ `DatasetRef` ]
955 References to the datasets being forgotten.
957 Notes
958 -----
959 Asking a datastore to forget a `DatasetRef` it does not hold should be
960 a silent no-op, not an error.
961 """
962 raise NotImplementedError("Must be implemented by subclass")
964 @abstractmethod
965 def trash(self, ref: DatasetRef | Iterable[DatasetRef], ignore_errors: bool = True) -> None:
966 """Indicate to the Datastore that a Dataset can be moved to the trash.
968 Parameters
969 ----------
970 ref : `DatasetRef` or iterable thereof
971 Reference(s) to the required Dataset.
972 ignore_errors : `bool`, optional
973 Determine whether errors should be ignored. When multiple
974 refs are being trashed there will be no per-ref check.
976 Raises
977 ------
978 FileNotFoundError
979 When Dataset does not exist and errors are not ignored. Only
980 checked if a single ref is supplied (and not in a list).
982 Notes
983 -----
984 Some Datastores may implement this method as a silent no-op to
985 disable Dataset deletion through standard interfaces.
986 """
987 raise NotImplementedError("Must be implemented by subclass")
989 @abstractmethod
990 def emptyTrash(self, ignore_errors: bool = True) -> None:
991 """Remove all datasets from the trash.
993 Parameters
994 ----------
995 ignore_errors : `bool`, optional
996 Determine whether errors should be ignored.
998 Notes
999 -----
1000 Some Datastores may implement this method as a silent no-op to
1001 disable Dataset deletion through standard interfaces.
1002 """
1003 raise NotImplementedError("Must be implemented by subclass")
1005 @abstractmethod
1006 def transfer(self, inputDatastore: Datastore, datasetRef: DatasetRef) -> None:
1007 """Transfer a dataset from another datastore to this datastore.
1009 Parameters
1010 ----------
1011 inputDatastore : `Datastore`
1012 The external `Datastore` from which to retrieve the Dataset.
1013 datasetRef : `DatasetRef`
1014 Reference to the required Dataset.
1015 """
1016 raise NotImplementedError("Must be implemented by subclass")
1018 def export(
1019 self,
1020 refs: Iterable[DatasetRef],
1021 *,
1022 directory: ResourcePathExpression | None = None,
1023 transfer: str | None = "auto",
1024 ) -> Iterable[FileDataset]:
1025 """Export datasets for transfer to another data repository.
1027 Parameters
1028 ----------
1029 refs : iterable of `DatasetRef`
1030 Dataset references to be exported.
1031 directory : `str`, optional
1032 Path to a directory that should contain files corresponding to
1033 output datasets. Ignored if ``transfer`` is explicitly `None`.
1034 transfer : `str`, optional
1035 Mode that should be used to move datasets out of the repository.
1036 Valid options are the same as those of the ``transfer`` argument
1037 to ``ingest``, and datastores may similarly signal that a transfer
1038 mode is not supported by raising `NotImplementedError`. If "auto"
1039 is given and no ``directory`` is specified, `None` will be
1040 implied.
1042 Returns
1043 -------
1044 dataset : iterable of `DatasetTransfer`
1045 Structs containing information about the exported datasets, in the
1046 same order as ``refs``.
1048 Raises
1049 ------
1050 NotImplementedError
1051 Raised if the given transfer mode is not supported.
1052 """
1053 raise NotImplementedError(f"Transfer mode {transfer} not supported.")
1055 @abstractmethod
1056 def validateConfiguration(
1057 self, entities: Iterable[DatasetRef | DatasetType | StorageClass], logFailures: bool = False
1058 ) -> None:
1059 """Validate some of the configuration for this datastore.
1061 Parameters
1062 ----------
1063 entities : iterable of `DatasetRef`, `DatasetType`, or `StorageClass`
1064 Entities to test against this configuration. Can be differing
1065 types.
1066 logFailures : `bool`, optional
1067 If `True`, output a log message for every validation error
1068 detected.
1070 Raises
1071 ------
1072 DatastoreValidationError
1073 Raised if there is a validation problem with a configuration.
1075 Notes
1076 -----
1077 Which parts of the configuration are validated is at the discretion
1078 of each Datastore implementation.
1079 """
1080 raise NotImplementedError("Must be implemented by subclass")
1082 @abstractmethod
1083 def validateKey(self, lookupKey: LookupKey, entity: DatasetRef | DatasetType | StorageClass) -> None:
1084 """Validate a specific look up key with supplied entity.
1086 Parameters
1087 ----------
1088 lookupKey : `LookupKey`
1089 Key to use to retrieve information from the datastore
1090 configuration.
1091 entity : `DatasetRef`, `DatasetType`, or `StorageClass`
1092 Entity to compare with configuration retrieved using the
1093 specified lookup key.
1095 Raises
1096 ------
1097 DatastoreValidationError
1098 Raised if there is a problem with the combination of entity
1099 and lookup key.
1101 Notes
1102 -----
1103 Bypasses the normal selection priorities by allowing a key that
1104 would normally not be selected to be validated.
1105 """
1106 raise NotImplementedError("Must be implemented by subclass")
1108 @abstractmethod
1109 def getLookupKeys(self) -> set[LookupKey]:
1110 """Return all the lookup keys relevant to this datastore.
1112 Returns
1113 -------
1114 keys : `set` of `LookupKey`
1115 The keys stored internally for looking up information based
1116 on `DatasetType` name or `StorageClass`.
1117 """
1118 raise NotImplementedError("Must be implemented by subclass")
1120 def needs_expanded_data_ids(
1121 self,
1122 transfer: str | None,
1123 entity: DatasetRef | DatasetType | StorageClass | None = None,
1124 ) -> bool:
1125 """Test whether this datastore needs expanded data IDs to ingest.
1127 Parameters
1128 ----------
1129 transfer : `str` or `None`
1130 Transfer mode for ingest.
1131 entity, optional
1132 Object representing what will be ingested. If not provided (or not
1133 specific enough), `True` may be returned even if expanded data
1134 IDs aren't necessary.
1136 Returns
1137 -------
1138 needed : `bool`
1139 If `True`, expanded data IDs may be needed. `False` only if
1140 expansion definitely isn't necessary.
1141 """
1142 return True
1144 @abstractmethod
1145 def import_records(
1146 self,
1147 data: Mapping[str, DatastoreRecordData],
1148 ) -> None:
1149 """Import datastore location and record data from an in-memory data
1150 structure.
1152 Parameters
1153 ----------
1154 data : `~collections.abc.Mapping` [ `str`, `DatastoreRecordData` ]
1155 Datastore records indexed by datastore name. May contain data for
1156 other `Datastore` instances (generally because they are chained to
1157 this one), which should be ignored.
1159 Notes
1160 -----
1161 Implementations should generally not check that any external resources
1162 (e.g. files) referred to by these records actually exist, for
1163 performance reasons; we expect higher-level code to guarantee that they
1164 do.
1166 Implementations are responsible for calling
1167 `DatastoreRegistryBridge.insert` on all datasets in ``data.locations``
1168 where the key is in `names`, as well as loading any opaque table data.
1170 Implementations may assume that datasets are either fully present or
1171 not at all (single-component exports are not permitted).
1172 """
1173 raise NotImplementedError()
1175 @abstractmethod
1176 def export_records(
1177 self,
1178 refs: Iterable[DatasetIdRef],
1179 ) -> Mapping[str, DatastoreRecordData]:
1180 """Export datastore records and locations to an in-memory data
1181 structure.
1183 Parameters
1184 ----------
1185 refs : `~collections.abc.Iterable` [ `DatasetIdRef` ]
1186 Datasets to save. This may include datasets not known to this
1187 datastore, which should be ignored. May not include component
1188 datasets.
1190 Returns
1191 -------
1192 data : `~collections.abc.Mapping` [ `str`, `DatastoreRecordData` ]
1193 Exported datastore records indexed by datastore name.
1194 """
1195 raise NotImplementedError()
1197 def set_retrieve_dataset_type_method(self, method: Callable[[str], DatasetType | None] | None) -> None:
1198 """Specify a method that can be used by datastore to retrieve
1199 registry-defined dataset type.
1201 Parameters
1202 ----------
1203 method : `~collections.abc.Callable` | `None`
1204 Method that takes a name of the dataset type and returns a
1205 corresponding `DatasetType` instance as defined in Registry. If
1206 dataset type name is not known to registry `None` is returned.
1208 Notes
1209 -----
1210 This method is only needed for a Datastore supporting a "trusted" mode
1211 when it does not have an access to datastore records and needs to
1212 guess dataset location based on its stored dataset type.
1213 """
1214 pass
1217class NullDatastore(Datastore):
1218 """A datastore that implements the `Datastore` API but always fails when
1219 it accepts any request.
1220 """
1222 @classmethod
1223 def setConfigRoot(cls, root: str, config: Config, full: Config, overwrite: bool = True) -> None:
1224 # Nothing to do. This is not a real Datastore.
1225 pass
1227 def __init__(
1228 self,
1229 config: Config | ResourcePathExpression | None,
1230 bridgeManager: DatastoreRegistryBridgeManager | None,
1231 butlerRoot: ResourcePathExpression | None = None,
1232 ):
1233 # Name ourselves with the timestamp the datastore
1234 # was created.
1235 self.name = f"{type(self).__name__}@{time.time()}"
1236 _LOG.debug("Creating datastore %s", self.name)
1238 return
1240 def knows(self, ref: DatasetRef) -> bool:
1241 return False
1243 def exists(self, datasetRef: DatasetRef) -> bool:
1244 return False
1246 def get(
1247 self,
1248 datasetRef: DatasetRef,
1249 parameters: Mapping[str, Any] | None = None,
1250 storageClass: StorageClass | str | None = None,
1251 ) -> Any:
1252 raise FileNotFoundError("This is a no-op datastore that can not access a real datastore")
1254 def put(self, inMemoryDataset: Any, datasetRef: DatasetRef) -> None:
1255 raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
1257 def ingest(
1258 self, *datasets: FileDataset, transfer: str | None = None, record_validation_info: bool = True
1259 ) -> None:
1260 raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
1262 def transfer_from(
1263 self,
1264 source_datastore: Datastore,
1265 refs: Iterable[DatasetRef],
1266 transfer: str = "auto",
1267 artifact_existence: dict[ResourcePath, bool] | None = None,
1268 ) -> tuple[set[DatasetRef], set[DatasetRef]]:
1269 raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
1271 def getURIs(self, datasetRef: DatasetRef, predict: bool = False) -> DatasetRefURIs:
1272 raise FileNotFoundError("This is a no-op datastore that can not access a real datastore")
1274 def getURI(self, datasetRef: DatasetRef, predict: bool = False) -> ResourcePath:
1275 raise FileNotFoundError("This is a no-op datastore that can not access a real datastore")
1277 def retrieveArtifacts(
1278 self,
1279 refs: Iterable[DatasetRef],
1280 destination: ResourcePath,
1281 transfer: str = "auto",
1282 preserve_path: bool = True,
1283 overwrite: bool = False,
1284 ) -> list[ResourcePath]:
1285 raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
1287 def remove(self, datasetRef: DatasetRef) -> None:
1288 raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
1290 def forget(self, refs: Iterable[DatasetRef]) -> None:
1291 raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
1293 def trash(self, ref: DatasetRef | Iterable[DatasetRef], ignore_errors: bool = True) -> None:
1294 raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
1296 def emptyTrash(self, ignore_errors: bool = True) -> None:
1297 raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
1299 def transfer(self, inputDatastore: Datastore, datasetRef: DatasetRef) -> None:
1300 raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
1302 def export(
1303 self,
1304 refs: Iterable[DatasetRef],
1305 *,
1306 directory: ResourcePathExpression | None = None,
1307 transfer: str | None = "auto",
1308 ) -> Iterable[FileDataset]:
1309 raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
1311 def validateConfiguration(
1312 self, entities: Iterable[DatasetRef | DatasetType | StorageClass], logFailures: bool = False
1313 ) -> None:
1314 # No configuration so always validates.
1315 pass
1317 def validateKey(self, lookupKey: LookupKey, entity: DatasetRef | DatasetType | StorageClass) -> None:
1318 pass
1320 def getLookupKeys(self) -> set[LookupKey]:
1321 raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
1323 def import_records(
1324 self,
1325 data: Mapping[str, DatastoreRecordData],
1326 ) -> None:
1327 raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
1329 def export_records(
1330 self,
1331 refs: Iterable[DatasetIdRef],
1332 ) -> Mapping[str, DatastoreRecordData]:
1333 raise NotImplementedError("This is a no-op datastore that can not access a real datastore")