Coverage for python/lsst/daf/butler/core/datastore.py: 61%
250 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-10-02 08:00 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-10-02 08:00 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28"""Support for generic data stores."""
30from __future__ import annotations
32__all__ = ("DatastoreConfig", "Datastore", "DatastoreValidationError", "DatasetRefURIs", "NullDatastore")
34import contextlib
35import dataclasses
36import logging
37import time
38from abc import ABCMeta, abstractmethod
39from collections import abc, defaultdict
40from collections.abc import Callable, Iterable, Iterator, Mapping
41from typing import TYPE_CHECKING, Any, ClassVar
43from lsst.utils import doImportType
45from .config import Config, ConfigSubset
46from .constraints import Constraints
47from .exceptions import DatasetTypeNotSupportedError, ValidationError
48from .fileDataset import FileDataset
49from .storageClass import StorageClassFactory
51if TYPE_CHECKING:
52 from lsst.resources import ResourcePath, ResourcePathExpression
54 from ..registry.interfaces import DatasetIdRef, DatastoreRegistryBridgeManager
55 from .configSupport import LookupKey
56 from .datasets import DatasetRef, DatasetType
57 from .datastoreRecordData import DatastoreRecordData
58 from .storageClass import StorageClass
60_LOG = logging.getLogger(__name__)
63class DatastoreConfig(ConfigSubset):
64 """Configuration for Datastores."""
66 component = "datastore"
67 requiredKeys = ("cls",)
68 defaultConfigFile = "datastore.yaml"
71class DatastoreValidationError(ValidationError):
72 """There is a problem with the Datastore configuration."""
74 pass
77@dataclasses.dataclass(frozen=True)
78class Event:
79 """Representation of an event that can be rolled back."""
81 __slots__ = {"name", "undoFunc", "args", "kwargs"}
82 name: str
83 undoFunc: Callable
84 args: tuple
85 kwargs: dict
88class IngestPrepData:
89 """A helper base class for `Datastore` ingest implementations.
91 Datastore implementations will generally need a custom implementation of
92 this class.
94 Should be accessed as ``Datastore.IngestPrepData`` instead of via direct
95 import.
97 Parameters
98 ----------
99 refs : iterable of `DatasetRef`
100 References for the datasets that can be ingested by this datastore.
101 """
103 def __init__(self, refs: Iterable[DatasetRef]):
104 self.refs = {ref.id: ref for ref in refs}
107class DatastoreTransaction:
108 """Keeps a log of `Datastore` activity and allow rollback.
110 Parameters
111 ----------
112 parent : `DatastoreTransaction`, optional
113 The parent transaction (if any)
114 """
116 Event: ClassVar[type] = Event
118 parent: DatastoreTransaction | None
119 """The parent transaction. (`DatastoreTransaction`, optional)"""
121 def __init__(self, parent: DatastoreTransaction | None = None):
122 self.parent = parent
123 self._log: list[Event] = []
125 def registerUndo(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> None:
126 """Register event with undo function.
128 Parameters
129 ----------
130 name : `str`
131 Name of the event.
132 undoFunc : func
133 Function to undo this event.
134 args : `tuple`
135 Positional arguments to `undoFunc`.
136 **kwargs
137 Keyword arguments to `undoFunc`.
138 """
139 self._log.append(self.Event(name, undoFunc, args, kwargs))
141 @contextlib.contextmanager
142 def undoWith(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> Iterator[None]:
143 """Register undo function if nested operation succeeds.
145 Calls `registerUndo`.
147 This can be used to wrap individual undo-able statements within a
148 DatastoreTransaction block. Multiple statements that can fail
149 separately should not be part of the same `undoWith` block.
151 All arguments are forwarded directly to `registerUndo`.
152 """
153 try:
154 yield None
155 except BaseException:
156 raise
157 else:
158 self.registerUndo(name, undoFunc, *args, **kwargs)
160 def rollback(self) -> None:
161 """Roll back all events in this transaction."""
162 log = logging.getLogger(__name__)
163 while self._log:
164 ev = self._log.pop()
165 try:
166 log.debug(
167 "Rolling back transaction: %s: %s(%s,%s)",
168 ev.name,
169 ev.undoFunc,
170 ",".join(str(a) for a in ev.args),
171 ",".join(f"{k}={v}" for k, v in ev.kwargs.items()),
172 )
173 except Exception:
174 # In case we had a problem in stringification of arguments
175 log.warning("Rolling back transaction: %s", ev.name)
176 try:
177 ev.undoFunc(*ev.args, **ev.kwargs)
178 except BaseException as e:
179 # Deliberately swallow error that may occur in unrolling
180 log.warning("Exception: %s caught while unrolling: %s", e, ev.name)
181 pass
183 def commit(self) -> None:
184 """Commit this transaction."""
185 if self.parent is None:
186 # Just forget about the events, they have already happened.
187 return
188 else:
189 # We may still want to events from this transaction as part of
190 # the parent.
191 self.parent._log.extend(self._log)
194@dataclasses.dataclass
195class DatasetRefURIs(abc.Sequence):
196 """Represents the primary and component ResourcePath(s) associated with a
197 DatasetRef.
199 This is used in places where its members used to be represented as a tuple
200 `(primaryURI, componentURIs)`. To maintain backward compatibility this
201 inherits from Sequence and so instances can be treated as a two-item
202 tuple.
203 """
205 def __init__(
206 self,
207 primaryURI: ResourcePath | None = None,
208 componentURIs: dict[str, ResourcePath] | None = None,
209 ):
210 self.primaryURI = primaryURI
211 """The URI to the primary artifact associated with this dataset. If the
212 dataset was disassembled within the datastore this may be `None`.
213 """
215 self.componentURIs = componentURIs or {}
216 """The URIs to any components associated with the dataset artifact
217 indexed by component name. This can be empty if there are no
218 components.
219 """
221 def __getitem__(self, index: Any) -> Any:
222 """Get primaryURI and componentURIs by index.
224 Provides support for tuple-like access.
225 """
226 if index == 0:
227 return self.primaryURI
228 elif index == 1:
229 return self.componentURIs
230 raise IndexError("list index out of range")
232 def __len__(self) -> int:
233 """Get the number of data members.
235 Provides support for tuple-like access.
236 """
237 return 2
239 def __repr__(self) -> str:
240 return f"DatasetRefURIs({repr(self.primaryURI)}, {repr(self.componentURIs)})"
243class Datastore(metaclass=ABCMeta):
244 """Datastore interface.
246 Parameters
247 ----------
248 config : `DatastoreConfig` or `str`
249 Load configuration either from an existing config instance or by
250 referring to a configuration file.
251 bridgeManager : `DatastoreRegistryBridgeManager`
252 Object that manages the interface between `Registry` and datastores.
253 butlerRoot : `str`, optional
254 New datastore root to use to override the configuration value.
255 """
257 defaultConfigFile: ClassVar[str | None] = None
258 """Path to configuration defaults. Accessed within the ``config`` resource
259 or relative to a search path. Can be None if no defaults specified.
260 """
262 containerKey: ClassVar[str | None] = None
263 """Name of the key containing a list of subconfigurations that also
264 need to be merged with defaults and will likely use different Python
265 datastore classes (but all using DatastoreConfig). Assumed to be a
266 list of configurations that can be represented in a DatastoreConfig
267 and containing a "cls" definition. None indicates that no containers
268 are expected in this Datastore."""
270 isEphemeral: bool = False
271 """Indicate whether this Datastore is ephemeral or not. An ephemeral
272 datastore is one where the contents of the datastore will not exist
273 across process restarts. This value can change per-instance."""
275 config: DatastoreConfig
276 """Configuration used to create Datastore."""
278 name: str
279 """Label associated with this Datastore."""
281 storageClassFactory: StorageClassFactory
282 """Factory for creating storage class instances from name."""
284 constraints: Constraints
285 """Constraints to apply when putting datasets into the datastore."""
287 # MyPy does not like for this to be annotated as any kind of type, because
288 # it can't do static checking on type variables that can change at runtime.
289 IngestPrepData: ClassVar[Any] = IngestPrepData
290 """Helper base class for ingest implementations.
291 """
293 @classmethod
294 @abstractmethod
295 def setConfigRoot(cls, root: str, config: Config, full: Config, overwrite: bool = True) -> None:
296 """Set filesystem-dependent config options for this datastore.
298 The options will be appropriate for a new empty repository with the
299 given root.
301 Parameters
302 ----------
303 root : `str`
304 Filesystem path to the root of the data repository.
305 config : `Config`
306 A `Config` to update. Only the subset understood by
307 this component will be updated. Will not expand
308 defaults.
309 full : `Config`
310 A complete config with all defaults expanded that can be
311 converted to a `DatastoreConfig`. Read-only and will not be
312 modified by this method.
313 Repository-specific options that should not be obtained
314 from defaults when Butler instances are constructed
315 should be copied from ``full`` to ``config``.
316 overwrite : `bool`, optional
317 If `False`, do not modify a value in ``config`` if the value
318 already exists. Default is always to overwrite with the provided
319 ``root``.
321 Notes
322 -----
323 If a keyword is explicitly defined in the supplied ``config`` it
324 will not be overridden by this method if ``overwrite`` is `False`.
325 This allows explicit values set in external configs to be retained.
326 """
327 raise NotImplementedError()
329 @staticmethod
330 def fromConfig(
331 config: Config,
332 bridgeManager: DatastoreRegistryBridgeManager,
333 butlerRoot: ResourcePathExpression | None = None,
334 ) -> Datastore:
335 """Create datastore from type specified in config file.
337 Parameters
338 ----------
339 config : `Config` or `~lsst.resources.ResourcePathExpression`
340 Configuration instance.
341 bridgeManager : `DatastoreRegistryBridgeManager`
342 Object that manages the interface between `Registry` and
343 datastores.
344 butlerRoot : `str`, optional
345 Butler root directory.
346 """
347 cls = doImportType(config["datastore", "cls"])
348 if not issubclass(cls, Datastore):
349 raise TypeError(f"Imported child class {config['datastore', 'cls']} is not a Datastore")
350 return cls(config=config, bridgeManager=bridgeManager, butlerRoot=butlerRoot)
352 def __init__(
353 self,
354 config: Config | ResourcePathExpression,
355 bridgeManager: DatastoreRegistryBridgeManager,
356 butlerRoot: ResourcePathExpression | None = None,
357 ):
358 self.config = DatastoreConfig(config)
359 self.name = "ABCDataStore"
360 self._transaction: DatastoreTransaction | None = None
362 # All Datastores need storage classes and constraints
363 self.storageClassFactory = StorageClassFactory()
365 # And read the constraints list
366 constraintsConfig = self.config.get("constraints")
367 self.constraints = Constraints(constraintsConfig, universe=bridgeManager.universe)
369 def __str__(self) -> str:
370 return self.name
372 def __repr__(self) -> str:
373 return self.name
375 @property
376 def names(self) -> tuple[str, ...]:
377 """Names associated with this datastore returned as a list.
379 Can be different to ``name`` for a chaining datastore.
380 """
381 # Default implementation returns solely the name itself
382 return (self.name,)
384 @property
385 def roots(self) -> dict[str, ResourcePath | None]:
386 """Return the root URIs for each named datastore.
388 Mapping from datastore name to root URI. The URI can be `None`
389 if a datastore has no concept of a root URI.
390 (`dict` [`str`, `ResourcePath` | `None`])
391 """
392 return {self.name: None}
394 @contextlib.contextmanager
395 def transaction(self) -> Iterator[DatastoreTransaction]:
396 """Context manager supporting `Datastore` transactions.
398 Transactions can be nested, and are to be used in combination with
399 `Registry.transaction`.
400 """
401 self._transaction = DatastoreTransaction(self._transaction)
402 try:
403 yield self._transaction
404 except BaseException:
405 self._transaction.rollback()
406 raise
407 else:
408 self._transaction.commit()
409 self._transaction = self._transaction.parent
411 @abstractmethod
412 def knows(self, ref: DatasetRef) -> bool:
413 """Check if the dataset is known to the datastore.
415 Does not check for existence of any artifact.
417 Parameters
418 ----------
419 ref : `DatasetRef`
420 Reference to the required dataset.
422 Returns
423 -------
424 exists : `bool`
425 `True` if the dataset is known to the datastore.
426 """
427 raise NotImplementedError()
429 def knows_these(self, refs: Iterable[DatasetRef]) -> dict[DatasetRef, bool]:
430 """Check which of the given datasets are known to this datastore.
432 This is like ``mexist()`` but does not check that the file exists.
434 Parameters
435 ----------
436 refs : iterable `DatasetRef`
437 The datasets to check.
439 Returns
440 -------
441 exists : `dict`[`DatasetRef`, `bool`]
442 Mapping of dataset to boolean indicating whether the dataset
443 is known to the datastore.
444 """
445 # Non-optimized default calls knows() repeatedly.
446 return {ref: self.knows(ref) for ref in refs}
448 def mexists(
449 self, refs: Iterable[DatasetRef], artifact_existence: dict[ResourcePath, bool] | None = None
450 ) -> dict[DatasetRef, bool]:
451 """Check the existence of multiple datasets at once.
453 Parameters
454 ----------
455 refs : iterable of `DatasetRef`
456 The datasets to be checked.
457 artifact_existence : `dict` [`lsst.resources.ResourcePath`, `bool`]
458 Optional mapping of datastore artifact to existence. Updated by
459 this method with details of all artifacts tested. Can be `None`
460 if the caller is not interested.
462 Returns
463 -------
464 existence : `dict` of [`DatasetRef`, `bool`]
465 Mapping from dataset to boolean indicating existence.
466 """
467 existence: dict[DatasetRef, bool] = {}
468 # Non-optimized default.
469 for ref in refs:
470 existence[ref] = self.exists(ref)
471 return existence
473 @abstractmethod
474 def exists(self, datasetRef: DatasetRef) -> bool:
475 """Check if the dataset exists in the datastore.
477 Parameters
478 ----------
479 datasetRef : `DatasetRef`
480 Reference to the required dataset.
482 Returns
483 -------
484 exists : `bool`
485 `True` if the entity exists in the `Datastore`.
486 """
487 raise NotImplementedError("Must be implemented by subclass")
489 @abstractmethod
490 def get(
491 self,
492 datasetRef: DatasetRef,
493 parameters: Mapping[str, Any] | None = None,
494 storageClass: StorageClass | str | None = None,
495 ) -> Any:
496 """Load an `InMemoryDataset` from the store.
498 Parameters
499 ----------
500 datasetRef : `DatasetRef`
501 Reference to the required Dataset.
502 parameters : `dict`
503 `StorageClass`-specific parameters that specify a slice of the
504 Dataset to be loaded.
505 storageClass : `StorageClass` or `str`, optional
506 The storage class to be used to override the Python type
507 returned by this method. By default the returned type matches
508 the dataset type definition for this dataset. Specifying a
509 read `StorageClass` can force a different type to be returned.
510 This type must be compatible with the original type.
512 Returns
513 -------
514 inMemoryDataset : `object`
515 Requested Dataset or slice thereof as an InMemoryDataset.
516 """
517 raise NotImplementedError("Must be implemented by subclass")
519 @abstractmethod
520 def put(self, inMemoryDataset: Any, datasetRef: DatasetRef) -> None:
521 """Write a `InMemoryDataset` with a given `DatasetRef` to the store.
523 Parameters
524 ----------
525 inMemoryDataset : `object`
526 The Dataset to store.
527 datasetRef : `DatasetRef`
528 Reference to the associated Dataset.
529 """
530 raise NotImplementedError("Must be implemented by subclass")
532 def _overrideTransferMode(self, *datasets: FileDataset, transfer: str | None = None) -> str | None:
533 """Allow ingest transfer mode to be defaulted based on datasets.
535 Parameters
536 ----------
537 datasets : `FileDataset`
538 Each positional argument is a struct containing information about
539 a file to be ingested, including its path (either absolute or
540 relative to the datastore root, if applicable), a complete
541 `DatasetRef` (with ``dataset_id not None``), and optionally a
542 formatter class or its fully-qualified string name. If a formatter
543 is not provided, this method should populate that attribute with
544 the formatter the datastore would use for `put`. Subclasses are
545 also permitted to modify the path attribute (typically to put it
546 in what the datastore considers its standard form).
547 transfer : `str`, optional
548 How (and whether) the dataset should be added to the datastore.
549 See `ingest` for details of transfer modes.
551 Returns
552 -------
553 newTransfer : `str`
554 Transfer mode to use. Will be identical to the supplied transfer
555 mode unless "auto" is used.
556 """
557 if transfer != "auto":
558 return transfer
559 raise RuntimeError(f"{transfer} is not allowed without specialization.")
561 def _prepIngest(self, *datasets: FileDataset, transfer: str | None = None) -> IngestPrepData:
562 """Process datasets to identify which ones can be ingested.
564 Parameters
565 ----------
566 datasets : `FileDataset`
567 Each positional argument is a struct containing information about
568 a file to be ingested, including its path (either absolute or
569 relative to the datastore root, if applicable), a complete
570 `DatasetRef` (with ``dataset_id not None``), and optionally a
571 formatter class or its fully-qualified string name. If a formatter
572 is not provided, this method should populate that attribute with
573 the formatter the datastore would use for `put`. Subclasses are
574 also permitted to modify the path attribute (typically to put it
575 in what the datastore considers its standard form).
576 transfer : `str`, optional
577 How (and whether) the dataset should be added to the datastore.
578 See `ingest` for details of transfer modes.
580 Returns
581 -------
582 data : `IngestPrepData`
583 An instance of a subclass of `IngestPrepData`, used to pass
584 arbitrary data from `_prepIngest` to `_finishIngest`. This should
585 include only the datasets this datastore can actually ingest;
586 others should be silently ignored (`Datastore.ingest` will inspect
587 `IngestPrepData.refs` and raise `DatasetTypeNotSupportedError` if
588 necessary).
590 Raises
591 ------
592 NotImplementedError
593 Raised if the datastore does not support the given transfer mode
594 (including the case where ingest is not supported at all).
595 FileNotFoundError
596 Raised if one of the given files does not exist.
597 FileExistsError
598 Raised if transfer is not `None` but the (internal) location the
599 file would be moved to is already occupied.
601 Notes
602 -----
603 This method (along with `_finishIngest`) should be implemented by
604 subclasses to provide ingest support instead of implementing `ingest`
605 directly.
607 `_prepIngest` should not modify the data repository or given files in
608 any way; all changes should be deferred to `_finishIngest`.
610 When possible, exceptions should be raised in `_prepIngest` instead of
611 `_finishIngest`. `NotImplementedError` exceptions that indicate that
612 the transfer mode is not supported must be raised by `_prepIngest`
613 instead of `_finishIngest`.
614 """
615 raise NotImplementedError(f"Datastore {self} does not support direct file-based ingest.")
617 def _finishIngest(
618 self, prepData: IngestPrepData, *, transfer: str | None = None, record_validation_info: bool = True
619 ) -> None:
620 """Complete an ingest operation.
622 Parameters
623 ----------
624 data : `IngestPrepData`
625 An instance of a subclass of `IngestPrepData`. Guaranteed to be
626 the direct result of a call to `_prepIngest` on this datastore.
627 transfer : `str`, optional
628 How (and whether) the dataset should be added to the datastore.
629 See `ingest` for details of transfer modes.
630 record_validation_info : `bool`, optional
631 If `True`, the default, the datastore can record validation
632 information associated with the file. If `False` the datastore
633 will not attempt to track any information such as checksums
634 or file sizes. This can be useful if such information is tracked
635 in an external system or if the file is to be compressed in place.
636 It is up to the datastore whether this parameter is relevant.
638 Raises
639 ------
640 FileNotFoundError
641 Raised if one of the given files does not exist.
642 FileExistsError
643 Raised if transfer is not `None` but the (internal) location the
644 file would be moved to is already occupied.
646 Notes
647 -----
648 This method (along with `_prepIngest`) should be implemented by
649 subclasses to provide ingest support instead of implementing `ingest`
650 directly.
651 """
652 raise NotImplementedError(f"Datastore {self} does not support direct file-based ingest.")
654 def ingest(
655 self, *datasets: FileDataset, transfer: str | None = None, record_validation_info: bool = True
656 ) -> None:
657 """Ingest one or more files into the datastore.
659 Parameters
660 ----------
661 datasets : `FileDataset`
662 Each positional argument is a struct containing information about
663 a file to be ingested, including its path (either absolute or
664 relative to the datastore root, if applicable), a complete
665 `DatasetRef` (with ``dataset_id not None``), and optionally a
666 formatter class or its fully-qualified string name. If a formatter
667 is not provided, the one the datastore would use for ``put`` on
668 that dataset is assumed.
669 transfer : `str`, optional
670 How (and whether) the dataset should be added to the datastore.
671 If `None` (default), the file must already be in a location
672 appropriate for the datastore (e.g. within its root directory),
673 and will not be modified. Other choices include "move", "copy",
674 "link", "symlink", "relsymlink", and "hardlink". "link" is a
675 special transfer mode that will first try to make a hardlink and
676 if that fails a symlink will be used instead. "relsymlink" creates
677 a relative symlink rather than use an absolute path.
678 Most datastores do not support all transfer modes.
679 "auto" is a special option that will let the
680 data store choose the most natural option for itself.
681 record_validation_info : `bool`, optional
682 If `True`, the default, the datastore can record validation
683 information associated with the file. If `False` the datastore
684 will not attempt to track any information such as checksums
685 or file sizes. This can be useful if such information is tracked
686 in an external system or if the file is to be compressed in place.
687 It is up to the datastore whether this parameter is relevant.
689 Raises
690 ------
691 NotImplementedError
692 Raised if the datastore does not support the given transfer mode
693 (including the case where ingest is not supported at all).
694 DatasetTypeNotSupportedError
695 Raised if one or more files to be ingested have a dataset type that
696 is not supported by the datastore.
697 FileNotFoundError
698 Raised if one of the given files does not exist.
699 FileExistsError
700 Raised if transfer is not `None` but the (internal) location the
701 file would be moved to is already occupied.
703 Notes
704 -----
705 Subclasses should implement `_prepIngest` and `_finishIngest` instead
706 of implementing `ingest` directly. Datastores that hold and
707 delegate to child datastores may want to call those methods as well.
709 Subclasses are encouraged to document their supported transfer modes
710 in their class documentation.
711 """
712 # Allow a datastore to select a default transfer mode
713 transfer = self._overrideTransferMode(*datasets, transfer=transfer)
714 prepData = self._prepIngest(*datasets, transfer=transfer)
715 refs = {ref.id: ref for dataset in datasets for ref in dataset.refs}
716 if refs.keys() != prepData.refs.keys():
717 unsupported = refs.keys() - prepData.refs.keys()
718 # Group unsupported refs by DatasetType for an informative
719 # but still concise error message.
720 byDatasetType = defaultdict(list)
721 for datasetId in unsupported:
722 ref = refs[datasetId]
723 byDatasetType[ref.datasetType].append(ref)
724 raise DatasetTypeNotSupportedError(
725 "DatasetType(s) not supported in ingest: "
726 + ", ".join(f"{k.name} ({len(v)} dataset(s))" for k, v in byDatasetType.items())
727 )
728 self._finishIngest(prepData, transfer=transfer, record_validation_info=record_validation_info)
730 def transfer_from(
731 self,
732 source_datastore: Datastore,
733 refs: Iterable[DatasetRef],
734 transfer: str = "auto",
735 artifact_existence: dict[ResourcePath, bool] | None = None,
736 ) -> tuple[set[DatasetRef], set[DatasetRef]]:
737 """Transfer dataset artifacts from another datastore to this one.
739 Parameters
740 ----------
741 source_datastore : `Datastore`
742 The datastore from which to transfer artifacts. That datastore
743 must be compatible with this datastore receiving the artifacts.
744 refs : iterable of `DatasetRef`
745 The datasets to transfer from the source datastore.
746 transfer : `str`, optional
747 How (and whether) the dataset should be added to the datastore.
748 Choices include "move", "copy",
749 "link", "symlink", "relsymlink", and "hardlink". "link" is a
750 special transfer mode that will first try to make a hardlink and
751 if that fails a symlink will be used instead. "relsymlink" creates
752 a relative symlink rather than use an absolute path.
753 Most datastores do not support all transfer modes.
754 "auto" (the default) is a special option that will let the
755 data store choose the most natural option for itself.
756 If the source location and transfer location are identical the
757 transfer mode will be ignored.
758 artifact_existence : `dict` [`lsst.resources.ResourcePath`, `bool`]
759 Optional mapping of datastore artifact to existence. Updated by
760 this method with details of all artifacts tested. Can be `None`
761 if the caller is not interested.
763 Returns
764 -------
765 accepted : `set` [`DatasetRef`]
766 The datasets that were transferred.
767 rejected : `set` [`DatasetRef`]
768 The datasets that were rejected due to a constraints violation.
770 Raises
771 ------
772 TypeError
773 Raised if the two datastores are not compatible.
774 """
775 if type(self) is not type(source_datastore):
776 raise TypeError(
777 f"Datastore mismatch between this datastore ({type(self)}) and the "
778 f"source datastore ({type(source_datastore)})."
779 )
781 raise NotImplementedError(f"Datastore {type(self)} must implement a transfer_from method.")
783 def getManyURIs(
784 self,
785 refs: Iterable[DatasetRef],
786 predict: bool = False,
787 allow_missing: bool = False,
788 ) -> dict[DatasetRef, DatasetRefURIs]:
789 """Return URIs associated with many datasets.
791 Parameters
792 ----------
793 refs : iterable of `DatasetIdRef`
794 References to the required datasets.
795 predict : `bool`, optional
796 If `True`, allow URIs to be returned of datasets that have not
797 been written.
798 allow_missing : `bool`
799 If `False`, and ``predict`` is `False`, will raise if a
800 `DatasetRef` does not exist.
802 Returns
803 -------
804 URIs : `dict` of [`DatasetRef`, `DatasetRefUris`]
805 A dict of primary and component URIs, indexed by the passed-in
806 refs.
808 Raises
809 ------
810 FileNotFoundError
811 A URI has been requested for a dataset that does not exist and
812 guessing is not allowed.
814 Notes
815 -----
816 In file-based datastores, getManyURIs does not check that the file is
817 really there, it's assuming it is if datastore is aware of the file
818 then it actually exists.
819 """
820 uris: dict[DatasetRef, DatasetRefURIs] = {}
821 missing_refs = []
822 for ref in refs:
823 try:
824 uris[ref] = self.getURIs(ref, predict=predict)
825 except FileNotFoundError:
826 missing_refs.append(ref)
827 if missing_refs and not allow_missing:
828 raise FileNotFoundError(
829 "Missing {} refs from datastore out of {} and predict=False.".format(
830 num_missing := len(missing_refs), num_missing + len(uris)
831 )
832 )
833 return uris
835 @abstractmethod
836 def getURIs(self, datasetRef: DatasetRef, predict: bool = False) -> DatasetRefURIs:
837 """Return URIs associated with dataset.
839 Parameters
840 ----------
841 ref : `DatasetRef`
842 Reference to the required dataset.
843 predict : `bool`, optional
844 If the datastore does not know about the dataset, should it
845 return a predicted URI or not?
847 Returns
848 -------
849 uris : `DatasetRefURIs`
850 The URI to the primary artifact associated with this dataset (if
851 the dataset was disassembled within the datastore this may be
852 `None`), and the URIs to any components associated with the dataset
853 artifact. (can be empty if there are no components).
854 """
855 raise NotImplementedError()
857 @abstractmethod
858 def getURI(self, datasetRef: DatasetRef, predict: bool = False) -> ResourcePath:
859 """URI to the Dataset.
861 Parameters
862 ----------
863 datasetRef : `DatasetRef`
864 Reference to the required Dataset.
865 predict : `bool`
866 If `True` attempt to predict the URI for a dataset if it does
867 not exist in datastore.
869 Returns
870 -------
871 uri : `str`
872 URI string pointing to the Dataset within the datastore. If the
873 Dataset does not exist in the datastore, the URI may be a guess.
874 If the datastore does not have entities that relate well
875 to the concept of a URI the returned URI string will be
876 descriptive. The returned URI is not guaranteed to be obtainable.
878 Raises
879 ------
880 FileNotFoundError
881 A URI has been requested for a dataset that does not exist and
882 guessing is not allowed.
883 """
884 raise NotImplementedError("Must be implemented by subclass")
886 @abstractmethod
887 def retrieveArtifacts(
888 self,
889 refs: Iterable[DatasetRef],
890 destination: ResourcePath,
891 transfer: str = "auto",
892 preserve_path: bool = True,
893 overwrite: bool = False,
894 ) -> list[ResourcePath]:
895 """Retrieve the artifacts associated with the supplied refs.
897 Parameters
898 ----------
899 refs : iterable of `DatasetRef`
900 The datasets for which artifacts are to be retrieved.
901 A single ref can result in multiple artifacts. The refs must
902 be resolved.
903 destination : `lsst.resources.ResourcePath`
904 Location to write the artifacts.
905 transfer : `str`, optional
906 Method to use to transfer the artifacts. Must be one of the options
907 supported by `lsst.resources.ResourcePath.transfer_from()`.
908 "move" is not allowed.
909 preserve_path : `bool`, optional
910 If `True` the full path of the artifact within the datastore
911 is preserved. If `False` the final file component of the path
912 is used.
913 overwrite : `bool`, optional
914 If `True` allow transfers to overwrite existing files at the
915 destination.
917 Returns
918 -------
919 targets : `list` of `lsst.resources.ResourcePath`
920 URIs of file artifacts in destination location. Order is not
921 preserved.
923 Notes
924 -----
925 For non-file datastores the artifacts written to the destination
926 may not match the representation inside the datastore. For example
927 a hierarchichal data structure in a NoSQL database may well be stored
928 as a JSON file.
929 """
930 raise NotImplementedError()
932 @abstractmethod
933 def remove(self, datasetRef: DatasetRef) -> None:
934 """Indicate to the Datastore that a Dataset can be removed.
936 Parameters
937 ----------
938 datasetRef : `DatasetRef`
939 Reference to the required Dataset.
941 Raises
942 ------
943 FileNotFoundError
944 When Dataset does not exist.
946 Notes
947 -----
948 Some Datastores may implement this method as a silent no-op to
949 disable Dataset deletion through standard interfaces.
950 """
951 raise NotImplementedError("Must be implemented by subclass")
953 @abstractmethod
954 def forget(self, refs: Iterable[DatasetRef]) -> None:
955 """Indicate to the Datastore that it should remove all records of the
956 given datasets, without actually deleting them.
958 Parameters
959 ----------
960 refs : `~collections.abc.Iterable` [ `DatasetRef` ]
961 References to the datasets being forgotten.
963 Notes
964 -----
965 Asking a datastore to forget a `DatasetRef` it does not hold should be
966 a silent no-op, not an error.
967 """
968 raise NotImplementedError("Must be implemented by subclass")
970 @abstractmethod
971 def trash(self, ref: DatasetRef | Iterable[DatasetRef], ignore_errors: bool = True) -> None:
972 """Indicate to the Datastore that a Dataset can be moved to the trash.
974 Parameters
975 ----------
976 ref : `DatasetRef` or iterable thereof
977 Reference(s) to the required Dataset.
978 ignore_errors : `bool`, optional
979 Determine whether errors should be ignored. When multiple
980 refs are being trashed there will be no per-ref check.
982 Raises
983 ------
984 FileNotFoundError
985 When Dataset does not exist and errors are not ignored. Only
986 checked if a single ref is supplied (and not in a list).
988 Notes
989 -----
990 Some Datastores may implement this method as a silent no-op to
991 disable Dataset deletion through standard interfaces.
992 """
993 raise NotImplementedError("Must be implemented by subclass")
995 @abstractmethod
996 def emptyTrash(self, ignore_errors: bool = True) -> None:
997 """Remove all datasets from the trash.
999 Parameters
1000 ----------
1001 ignore_errors : `bool`, optional
1002 Determine whether errors should be ignored.
1004 Notes
1005 -----
1006 Some Datastores may implement this method as a silent no-op to
1007 disable Dataset deletion through standard interfaces.
1008 """
1009 raise NotImplementedError("Must be implemented by subclass")
1011 @abstractmethod
1012 def transfer(self, inputDatastore: Datastore, datasetRef: DatasetRef) -> None:
1013 """Transfer a dataset from another datastore to this datastore.
1015 Parameters
1016 ----------
1017 inputDatastore : `Datastore`
1018 The external `Datastore` from which to retrieve the Dataset.
1019 datasetRef : `DatasetRef`
1020 Reference to the required Dataset.
1021 """
1022 raise NotImplementedError("Must be implemented by subclass")
1024 def export(
1025 self,
1026 refs: Iterable[DatasetRef],
1027 *,
1028 directory: ResourcePathExpression | None = None,
1029 transfer: str | None = "auto",
1030 ) -> Iterable[FileDataset]:
1031 """Export datasets for transfer to another data repository.
1033 Parameters
1034 ----------
1035 refs : iterable of `DatasetRef`
1036 Dataset references to be exported.
1037 directory : `str`, optional
1038 Path to a directory that should contain files corresponding to
1039 output datasets. Ignored if ``transfer`` is explicitly `None`.
1040 transfer : `str`, optional
1041 Mode that should be used to move datasets out of the repository.
1042 Valid options are the same as those of the ``transfer`` argument
1043 to ``ingest``, and datastores may similarly signal that a transfer
1044 mode is not supported by raising `NotImplementedError`. If "auto"
1045 is given and no ``directory`` is specified, `None` will be
1046 implied.
1048 Returns
1049 -------
1050 dataset : iterable of `DatasetTransfer`
1051 Structs containing information about the exported datasets, in the
1052 same order as ``refs``.
1054 Raises
1055 ------
1056 NotImplementedError
1057 Raised if the given transfer mode is not supported.
1058 """
1059 raise NotImplementedError(f"Transfer mode {transfer} not supported.")
1061 @abstractmethod
1062 def validateConfiguration(
1063 self, entities: Iterable[DatasetRef | DatasetType | StorageClass], logFailures: bool = False
1064 ) -> None:
1065 """Validate some of the configuration for this datastore.
1067 Parameters
1068 ----------
1069 entities : iterable of `DatasetRef`, `DatasetType`, or `StorageClass`
1070 Entities to test against this configuration. Can be differing
1071 types.
1072 logFailures : `bool`, optional
1073 If `True`, output a log message for every validation error
1074 detected.
1076 Raises
1077 ------
1078 DatastoreValidationError
1079 Raised if there is a validation problem with a configuration.
1081 Notes
1082 -----
1083 Which parts of the configuration are validated is at the discretion
1084 of each Datastore implementation.
1085 """
1086 raise NotImplementedError("Must be implemented by subclass")
1088 @abstractmethod
1089 def validateKey(self, lookupKey: LookupKey, entity: DatasetRef | DatasetType | StorageClass) -> None:
1090 """Validate a specific look up key with supplied entity.
1092 Parameters
1093 ----------
1094 lookupKey : `LookupKey`
1095 Key to use to retrieve information from the datastore
1096 configuration.
1097 entity : `DatasetRef`, `DatasetType`, or `StorageClass`
1098 Entity to compare with configuration retrieved using the
1099 specified lookup key.
1101 Raises
1102 ------
1103 DatastoreValidationError
1104 Raised if there is a problem with the combination of entity
1105 and lookup key.
1107 Notes
1108 -----
1109 Bypasses the normal selection priorities by allowing a key that
1110 would normally not be selected to be validated.
1111 """
1112 raise NotImplementedError("Must be implemented by subclass")
1114 @abstractmethod
1115 def getLookupKeys(self) -> set[LookupKey]:
1116 """Return all the lookup keys relevant to this datastore.
1118 Returns
1119 -------
1120 keys : `set` of `LookupKey`
1121 The keys stored internally for looking up information based
1122 on `DatasetType` name or `StorageClass`.
1123 """
1124 raise NotImplementedError("Must be implemented by subclass")
1126 def needs_expanded_data_ids(
1127 self,
1128 transfer: str | None,
1129 entity: DatasetRef | DatasetType | StorageClass | None = None,
1130 ) -> bool:
1131 """Test whether this datastore needs expanded data IDs to ingest.
1133 Parameters
1134 ----------
1135 transfer : `str` or `None`
1136 Transfer mode for ingest.
1137 entity, optional
1138 Object representing what will be ingested. If not provided (or not
1139 specific enough), `True` may be returned even if expanded data
1140 IDs aren't necessary.
1142 Returns
1143 -------
1144 needed : `bool`
1145 If `True`, expanded data IDs may be needed. `False` only if
1146 expansion definitely isn't necessary.
1147 """
1148 return True
1150 @abstractmethod
1151 def import_records(
1152 self,
1153 data: Mapping[str, DatastoreRecordData],
1154 ) -> None:
1155 """Import datastore location and record data from an in-memory data
1156 structure.
1158 Parameters
1159 ----------
1160 data : `~collections.abc.Mapping` [ `str`, `DatastoreRecordData` ]
1161 Datastore records indexed by datastore name. May contain data for
1162 other `Datastore` instances (generally because they are chained to
1163 this one), which should be ignored.
1165 Notes
1166 -----
1167 Implementations should generally not check that any external resources
1168 (e.g. files) referred to by these records actually exist, for
1169 performance reasons; we expect higher-level code to guarantee that they
1170 do.
1172 Implementations are responsible for calling
1173 `DatastoreRegistryBridge.insert` on all datasets in ``data.locations``
1174 where the key is in `names`, as well as loading any opaque table data.
1176 Implementations may assume that datasets are either fully present or
1177 not at all (single-component exports are not permitted).
1178 """
1179 raise NotImplementedError()
1181 @abstractmethod
1182 def export_records(
1183 self,
1184 refs: Iterable[DatasetIdRef],
1185 ) -> Mapping[str, DatastoreRecordData]:
1186 """Export datastore records and locations to an in-memory data
1187 structure.
1189 Parameters
1190 ----------
1191 refs : `~collections.abc.Iterable` [ `DatasetIdRef` ]
1192 Datasets to save. This may include datasets not known to this
1193 datastore, which should be ignored. May not include component
1194 datasets.
1196 Returns
1197 -------
1198 data : `~collections.abc.Mapping` [ `str`, `DatastoreRecordData` ]
1199 Exported datastore records indexed by datastore name.
1200 """
1201 raise NotImplementedError()
1203 def set_retrieve_dataset_type_method(self, method: Callable[[str], DatasetType | None] | None) -> None:
1204 """Specify a method that can be used by datastore to retrieve
1205 registry-defined dataset type.
1207 Parameters
1208 ----------
1209 method : `~collections.abc.Callable` | `None`
1210 Method that takes a name of the dataset type and returns a
1211 corresponding `DatasetType` instance as defined in Registry. If
1212 dataset type name is not known to registry `None` is returned.
1214 Notes
1215 -----
1216 This method is only needed for a Datastore supporting a "trusted" mode
1217 when it does not have an access to datastore records and needs to
1218 guess dataset location based on its stored dataset type.
1219 """
1220 pass
1223class NullDatastore(Datastore):
1224 """A datastore that implements the `Datastore` API but always fails when
1225 it accepts any request.
1226 """
1228 @classmethod
1229 def setConfigRoot(cls, root: str, config: Config, full: Config, overwrite: bool = True) -> None:
1230 # Nothing to do. This is not a real Datastore.
1231 pass
1233 def __init__(
1234 self,
1235 config: Config | ResourcePathExpression | None,
1236 bridgeManager: DatastoreRegistryBridgeManager | None,
1237 butlerRoot: ResourcePathExpression | None = None,
1238 ):
1239 # Name ourselves with the timestamp the datastore
1240 # was created.
1241 self.name = f"{type(self).__name__}@{time.time()}"
1242 _LOG.debug("Creating datastore %s", self.name)
1244 return
1246 def knows(self, ref: DatasetRef) -> bool:
1247 return False
1249 def exists(self, datasetRef: DatasetRef) -> bool:
1250 return False
1252 def get(
1253 self,
1254 datasetRef: DatasetRef,
1255 parameters: Mapping[str, Any] | None = None,
1256 storageClass: StorageClass | str | None = None,
1257 ) -> Any:
1258 raise FileNotFoundError("This is a no-op datastore that can not access a real datastore")
1260 def put(self, inMemoryDataset: Any, datasetRef: DatasetRef) -> None:
1261 raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
1263 def ingest(
1264 self, *datasets: FileDataset, transfer: str | None = None, record_validation_info: bool = True
1265 ) -> None:
1266 raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
1268 def transfer_from(
1269 self,
1270 source_datastore: Datastore,
1271 refs: Iterable[DatasetRef],
1272 transfer: str = "auto",
1273 artifact_existence: dict[ResourcePath, bool] | None = None,
1274 ) -> tuple[set[DatasetRef], set[DatasetRef]]:
1275 raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
1277 def getURIs(self, datasetRef: DatasetRef, predict: bool = False) -> DatasetRefURIs:
1278 raise FileNotFoundError("This is a no-op datastore that can not access a real datastore")
1280 def getURI(self, datasetRef: DatasetRef, predict: bool = False) -> ResourcePath:
1281 raise FileNotFoundError("This is a no-op datastore that can not access a real datastore")
1283 def retrieveArtifacts(
1284 self,
1285 refs: Iterable[DatasetRef],
1286 destination: ResourcePath,
1287 transfer: str = "auto",
1288 preserve_path: bool = True,
1289 overwrite: bool = False,
1290 ) -> list[ResourcePath]:
1291 raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
1293 def remove(self, datasetRef: DatasetRef) -> None:
1294 raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
1296 def forget(self, refs: Iterable[DatasetRef]) -> None:
1297 raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
1299 def trash(self, ref: DatasetRef | Iterable[DatasetRef], ignore_errors: bool = True) -> None:
1300 raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
1302 def emptyTrash(self, ignore_errors: bool = True) -> None:
1303 raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
1305 def transfer(self, inputDatastore: Datastore, datasetRef: DatasetRef) -> None:
1306 raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
1308 def export(
1309 self,
1310 refs: Iterable[DatasetRef],
1311 *,
1312 directory: ResourcePathExpression | None = None,
1313 transfer: str | None = "auto",
1314 ) -> Iterable[FileDataset]:
1315 raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
1317 def validateConfiguration(
1318 self, entities: Iterable[DatasetRef | DatasetType | StorageClass], logFailures: bool = False
1319 ) -> None:
1320 # No configuration so always validates.
1321 pass
1323 def validateKey(self, lookupKey: LookupKey, entity: DatasetRef | DatasetType | StorageClass) -> None:
1324 pass
1326 def getLookupKeys(self) -> set[LookupKey]:
1327 raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
1329 def import_records(
1330 self,
1331 data: Mapping[str, DatastoreRecordData],
1332 ) -> None:
1333 raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
1335 def export_records(
1336 self,
1337 refs: Iterable[DatasetIdRef],
1338 ) -> Mapping[str, DatastoreRecordData]:
1339 raise NotImplementedError("This is a no-op datastore that can not access a real datastore")