Coverage for python/lsst/daf/butler/datastore/_datastore.py: 61%
250 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-12 09:44 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-12 09:44 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28"""Support for generic data stores."""
30from __future__ import annotations
32__all__ = (
33 "DatastoreConfig",
34 "Datastore",
35 "DatastoreValidationError",
36 "DatasetRefURIs",
37 "NullDatastore",
38 "DatastoreTransaction",
39)
41import contextlib
42import dataclasses
43import logging
44import time
45from abc import ABCMeta, abstractmethod
46from collections import abc, defaultdict
47from collections.abc import Callable, Iterable, Iterator, Mapping
48from typing import TYPE_CHECKING, Any, ClassVar
50from lsst.utils import doImportType
52from .._config import Config, ConfigSubset
53from .._exceptions import DatasetTypeNotSupportedError, ValidationError
54from .._file_dataset import FileDataset
55from .._storage_class import StorageClassFactory
56from .constraints import Constraints
58if TYPE_CHECKING:
59 from lsst.resources import ResourcePath, ResourcePathExpression
61 from .._config_support import LookupKey
62 from .._dataset_ref import DatasetRef
63 from .._dataset_type import DatasetType
64 from .._storage_class import StorageClass
65 from ..registry.interfaces import DatasetIdRef, DatastoreRegistryBridgeManager
66 from .record_data import DatastoreRecordData
68_LOG = logging.getLogger(__name__)
71class DatastoreConfig(ConfigSubset):
72 """Configuration for Datastores."""
74 component = "datastore"
75 requiredKeys = ("cls",)
76 defaultConfigFile = "datastore.yaml"
79class DatastoreValidationError(ValidationError):
80 """There is a problem with the Datastore configuration."""
82 pass
85@dataclasses.dataclass(frozen=True)
86class Event:
87 """Representation of an event that can be rolled back."""
89 __slots__ = {"name", "undoFunc", "args", "kwargs"}
90 name: str
91 undoFunc: Callable
92 args: tuple
93 kwargs: dict
96class IngestPrepData:
97 """A helper base class for `Datastore` ingest implementations.
99 Datastore implementations will generally need a custom implementation of
100 this class.
102 Should be accessed as ``Datastore.IngestPrepData`` instead of via direct
103 import.
105 Parameters
106 ----------
107 refs : iterable of `DatasetRef`
108 References for the datasets that can be ingested by this datastore.
109 """
111 def __init__(self, refs: Iterable[DatasetRef]):
112 self.refs = {ref.id: ref for ref in refs}
115class DatastoreTransaction:
116 """Keeps a log of `Datastore` activity and allow rollback.
118 Parameters
119 ----------
120 parent : `DatastoreTransaction`, optional
121 The parent transaction (if any)
122 """
124 Event: ClassVar[type] = Event
126 parent: DatastoreTransaction | None
127 """The parent transaction. (`DatastoreTransaction`, optional)"""
129 def __init__(self, parent: DatastoreTransaction | None = None):
130 self.parent = parent
131 self._log: list[Event] = []
133 def registerUndo(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> None:
134 """Register event with undo function.
136 Parameters
137 ----------
138 name : `str`
139 Name of the event.
140 undoFunc : func
141 Function to undo this event.
142 args : `tuple`
143 Positional arguments to `undoFunc`.
144 **kwargs
145 Keyword arguments to `undoFunc`.
146 """
147 self._log.append(self.Event(name, undoFunc, args, kwargs))
149 @contextlib.contextmanager
150 def undoWith(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> Iterator[None]:
151 """Register undo function if nested operation succeeds.
153 Calls `registerUndo`.
155 This can be used to wrap individual undo-able statements within a
156 DatastoreTransaction block. Multiple statements that can fail
157 separately should not be part of the same `undoWith` block.
159 All arguments are forwarded directly to `registerUndo`.
160 """
161 try:
162 yield None
163 except BaseException:
164 raise
165 else:
166 self.registerUndo(name, undoFunc, *args, **kwargs)
168 def rollback(self) -> None:
169 """Roll back all events in this transaction."""
170 log = logging.getLogger(__name__)
171 while self._log:
172 ev = self._log.pop()
173 try:
174 log.debug(
175 "Rolling back transaction: %s: %s(%s,%s)",
176 ev.name,
177 ev.undoFunc,
178 ",".join(str(a) for a in ev.args),
179 ",".join(f"{k}={v}" for k, v in ev.kwargs.items()),
180 )
181 except Exception:
182 # In case we had a problem in stringification of arguments
183 log.warning("Rolling back transaction: %s", ev.name)
184 try:
185 ev.undoFunc(*ev.args, **ev.kwargs)
186 except BaseException as e:
187 # Deliberately swallow error that may occur in unrolling
188 log.warning("Exception: %s caught while unrolling: %s", e, ev.name)
189 pass
191 def commit(self) -> None:
192 """Commit this transaction."""
193 if self.parent is None:
194 # Just forget about the events, they have already happened.
195 return
196 else:
197 # We may still want to events from this transaction as part of
198 # the parent.
199 self.parent._log.extend(self._log)
202@dataclasses.dataclass
203class DatasetRefURIs(abc.Sequence):
204 """Represents the primary and component ResourcePath(s) associated with a
205 DatasetRef.
207 This is used in places where its members used to be represented as a tuple
208 `(primaryURI, componentURIs)`. To maintain backward compatibility this
209 inherits from Sequence and so instances can be treated as a two-item
210 tuple.
211 """
213 def __init__(
214 self,
215 primaryURI: ResourcePath | None = None,
216 componentURIs: dict[str, ResourcePath] | None = None,
217 ):
218 self.primaryURI = primaryURI
219 """The URI to the primary artifact associated with this dataset. If the
220 dataset was disassembled within the datastore this may be `None`.
221 """
223 self.componentURIs = componentURIs or {}
224 """The URIs to any components associated with the dataset artifact
225 indexed by component name. This can be empty if there are no
226 components.
227 """
229 def __getitem__(self, index: Any) -> Any:
230 """Get primaryURI and componentURIs by index.
232 Provides support for tuple-like access.
233 """
234 if index == 0:
235 return self.primaryURI
236 elif index == 1:
237 return self.componentURIs
238 raise IndexError("list index out of range")
240 def __len__(self) -> int:
241 """Get the number of data members.
243 Provides support for tuple-like access.
244 """
245 return 2
247 def __repr__(self) -> str:
248 return f"DatasetRefURIs({repr(self.primaryURI)}, {repr(self.componentURIs)})"
251class Datastore(metaclass=ABCMeta):
252 """Datastore interface.
254 Parameters
255 ----------
256 config : `DatastoreConfig` or `str`
257 Load configuration either from an existing config instance or by
258 referring to a configuration file.
259 bridgeManager : `DatastoreRegistryBridgeManager`
260 Object that manages the interface between `Registry` and datastores.
261 butlerRoot : `str`, optional
262 New datastore root to use to override the configuration value.
263 """
265 defaultConfigFile: ClassVar[str | None] = None
266 """Path to configuration defaults. Accessed within the ``config`` resource
267 or relative to a search path. Can be None if no defaults specified.
268 """
270 containerKey: ClassVar[str | None] = None
271 """Name of the key containing a list of subconfigurations that also
272 need to be merged with defaults and will likely use different Python
273 datastore classes (but all using DatastoreConfig). Assumed to be a
274 list of configurations that can be represented in a DatastoreConfig
275 and containing a "cls" definition. None indicates that no containers
276 are expected in this Datastore."""
278 isEphemeral: bool = False
279 """Indicate whether this Datastore is ephemeral or not. An ephemeral
280 datastore is one where the contents of the datastore will not exist
281 across process restarts. This value can change per-instance."""
283 config: DatastoreConfig
284 """Configuration used to create Datastore."""
286 name: str
287 """Label associated with this Datastore."""
289 storageClassFactory: StorageClassFactory
290 """Factory for creating storage class instances from name."""
292 constraints: Constraints
293 """Constraints to apply when putting datasets into the datastore."""
295 # MyPy does not like for this to be annotated as any kind of type, because
296 # it can't do static checking on type variables that can change at runtime.
297 IngestPrepData: ClassVar[Any] = IngestPrepData
298 """Helper base class for ingest implementations.
299 """
301 @classmethod
302 @abstractmethod
303 def setConfigRoot(cls, root: str, config: Config, full: Config, overwrite: bool = True) -> None:
304 """Set filesystem-dependent config options for this datastore.
306 The options will be appropriate for a new empty repository with the
307 given root.
309 Parameters
310 ----------
311 root : `str`
312 Filesystem path to the root of the data repository.
313 config : `Config`
314 A `Config` to update. Only the subset understood by
315 this component will be updated. Will not expand
316 defaults.
317 full : `Config`
318 A complete config with all defaults expanded that can be
319 converted to a `DatastoreConfig`. Read-only and will not be
320 modified by this method.
321 Repository-specific options that should not be obtained
322 from defaults when Butler instances are constructed
323 should be copied from ``full`` to ``config``.
324 overwrite : `bool`, optional
325 If `False`, do not modify a value in ``config`` if the value
326 already exists. Default is always to overwrite with the provided
327 ``root``.
329 Notes
330 -----
331 If a keyword is explicitly defined in the supplied ``config`` it
332 will not be overridden by this method if ``overwrite`` is `False`.
333 This allows explicit values set in external configs to be retained.
334 """
335 raise NotImplementedError()
337 @staticmethod
338 def fromConfig(
339 config: Config,
340 bridgeManager: DatastoreRegistryBridgeManager,
341 butlerRoot: ResourcePathExpression | None = None,
342 ) -> Datastore:
343 """Create datastore from type specified in config file.
345 Parameters
346 ----------
347 config : `Config` or `~lsst.resources.ResourcePathExpression`
348 Configuration instance.
349 bridgeManager : `DatastoreRegistryBridgeManager`
350 Object that manages the interface between `Registry` and
351 datastores.
352 butlerRoot : `str`, optional
353 Butler root directory.
354 """
355 cls = doImportType(config["datastore", "cls"])
356 if not issubclass(cls, Datastore):
357 raise TypeError(f"Imported child class {config['datastore', 'cls']} is not a Datastore")
358 return cls(config=config, bridgeManager=bridgeManager, butlerRoot=butlerRoot)
360 def __init__(
361 self,
362 config: Config | ResourcePathExpression,
363 bridgeManager: DatastoreRegistryBridgeManager,
364 butlerRoot: ResourcePathExpression | None = None,
365 ):
366 self.config = DatastoreConfig(config)
367 self.name = "ABCDataStore"
368 self._transaction: DatastoreTransaction | None = None
370 # All Datastores need storage classes and constraints
371 self.storageClassFactory = StorageClassFactory()
373 # And read the constraints list
374 constraintsConfig = self.config.get("constraints")
375 self.constraints = Constraints(constraintsConfig, universe=bridgeManager.universe)
377 def __str__(self) -> str:
378 return self.name
380 def __repr__(self) -> str:
381 return self.name
383 @property
384 def names(self) -> tuple[str, ...]:
385 """Names associated with this datastore returned as a list.
387 Can be different to ``name`` for a chaining datastore.
388 """
389 # Default implementation returns solely the name itself
390 return (self.name,)
392 @property
393 def roots(self) -> dict[str, ResourcePath | None]:
394 """Return the root URIs for each named datastore.
396 Mapping from datastore name to root URI. The URI can be `None`
397 if a datastore has no concept of a root URI.
398 (`dict` [`str`, `ResourcePath` | `None`])
399 """
400 return {self.name: None}
402 @contextlib.contextmanager
403 def transaction(self) -> Iterator[DatastoreTransaction]:
404 """Context manager supporting `Datastore` transactions.
406 Transactions can be nested, and are to be used in combination with
407 `Registry.transaction`.
408 """
409 self._transaction = DatastoreTransaction(self._transaction)
410 try:
411 yield self._transaction
412 except BaseException:
413 self._transaction.rollback()
414 raise
415 else:
416 self._transaction.commit()
417 self._transaction = self._transaction.parent
419 @abstractmethod
420 def knows(self, ref: DatasetRef) -> bool:
421 """Check if the dataset is known to the datastore.
423 Does not check for existence of any artifact.
425 Parameters
426 ----------
427 ref : `DatasetRef`
428 Reference to the required dataset.
430 Returns
431 -------
432 exists : `bool`
433 `True` if the dataset is known to the datastore.
434 """
435 raise NotImplementedError()
437 def knows_these(self, refs: Iterable[DatasetRef]) -> dict[DatasetRef, bool]:
438 """Check which of the given datasets are known to this datastore.
440 This is like ``mexist()`` but does not check that the file exists.
442 Parameters
443 ----------
444 refs : iterable `DatasetRef`
445 The datasets to check.
447 Returns
448 -------
449 exists : `dict`[`DatasetRef`, `bool`]
450 Mapping of dataset to boolean indicating whether the dataset
451 is known to the datastore.
452 """
453 # Non-optimized default calls knows() repeatedly.
454 return {ref: self.knows(ref) for ref in refs}
456 def mexists(
457 self, refs: Iterable[DatasetRef], artifact_existence: dict[ResourcePath, bool] | None = None
458 ) -> dict[DatasetRef, bool]:
459 """Check the existence of multiple datasets at once.
461 Parameters
462 ----------
463 refs : iterable of `DatasetRef`
464 The datasets to be checked.
465 artifact_existence : `dict` [`lsst.resources.ResourcePath`, `bool`]
466 Optional mapping of datastore artifact to existence. Updated by
467 this method with details of all artifacts tested. Can be `None`
468 if the caller is not interested.
470 Returns
471 -------
472 existence : `dict` of [`DatasetRef`, `bool`]
473 Mapping from dataset to boolean indicating existence.
474 """
475 existence: dict[DatasetRef, bool] = {}
476 # Non-optimized default.
477 for ref in refs:
478 existence[ref] = self.exists(ref)
479 return existence
481 @abstractmethod
482 def exists(self, datasetRef: DatasetRef) -> bool:
483 """Check if the dataset exists in the datastore.
485 Parameters
486 ----------
487 datasetRef : `DatasetRef`
488 Reference to the required dataset.
490 Returns
491 -------
492 exists : `bool`
493 `True` if the entity exists in the `Datastore`.
494 """
495 raise NotImplementedError("Must be implemented by subclass")
497 @abstractmethod
498 def get(
499 self,
500 datasetRef: DatasetRef,
501 parameters: Mapping[str, Any] | None = None,
502 storageClass: StorageClass | str | None = None,
503 ) -> Any:
504 """Load an `InMemoryDataset` from the store.
506 Parameters
507 ----------
508 datasetRef : `DatasetRef`
509 Reference to the required Dataset.
510 parameters : `dict`
511 `StorageClass`-specific parameters that specify a slice of the
512 Dataset to be loaded.
513 storageClass : `StorageClass` or `str`, optional
514 The storage class to be used to override the Python type
515 returned by this method. By default the returned type matches
516 the dataset type definition for this dataset. Specifying a
517 read `StorageClass` can force a different type to be returned.
518 This type must be compatible with the original type.
520 Returns
521 -------
522 inMemoryDataset : `object`
523 Requested Dataset or slice thereof as an InMemoryDataset.
524 """
525 raise NotImplementedError("Must be implemented by subclass")
527 @abstractmethod
528 def put(self, inMemoryDataset: Any, datasetRef: DatasetRef) -> None:
529 """Write a `InMemoryDataset` with a given `DatasetRef` to the store.
531 Parameters
532 ----------
533 inMemoryDataset : `object`
534 The Dataset to store.
535 datasetRef : `DatasetRef`
536 Reference to the associated Dataset.
537 """
538 raise NotImplementedError("Must be implemented by subclass")
540 def _overrideTransferMode(self, *datasets: FileDataset, transfer: str | None = None) -> str | None:
541 """Allow ingest transfer mode to be defaulted based on datasets.
543 Parameters
544 ----------
545 datasets : `FileDataset`
546 Each positional argument is a struct containing information about
547 a file to be ingested, including its path (either absolute or
548 relative to the datastore root, if applicable), a complete
549 `DatasetRef` (with ``dataset_id not None``), and optionally a
550 formatter class or its fully-qualified string name. If a formatter
551 is not provided, this method should populate that attribute with
552 the formatter the datastore would use for `put`. Subclasses are
553 also permitted to modify the path attribute (typically to put it
554 in what the datastore considers its standard form).
555 transfer : `str`, optional
556 How (and whether) the dataset should be added to the datastore.
557 See `ingest` for details of transfer modes.
559 Returns
560 -------
561 newTransfer : `str`
562 Transfer mode to use. Will be identical to the supplied transfer
563 mode unless "auto" is used.
564 """
565 if transfer != "auto":
566 return transfer
567 raise RuntimeError(f"{transfer} is not allowed without specialization.")
569 def _prepIngest(self, *datasets: FileDataset, transfer: str | None = None) -> IngestPrepData:
570 """Process datasets to identify which ones can be ingested.
572 Parameters
573 ----------
574 datasets : `FileDataset`
575 Each positional argument is a struct containing information about
576 a file to be ingested, including its path (either absolute or
577 relative to the datastore root, if applicable), a complete
578 `DatasetRef` (with ``dataset_id not None``), and optionally a
579 formatter class or its fully-qualified string name. If a formatter
580 is not provided, this method should populate that attribute with
581 the formatter the datastore would use for `put`. Subclasses are
582 also permitted to modify the path attribute (typically to put it
583 in what the datastore considers its standard form).
584 transfer : `str`, optional
585 How (and whether) the dataset should be added to the datastore.
586 See `ingest` for details of transfer modes.
588 Returns
589 -------
590 data : `IngestPrepData`
591 An instance of a subclass of `IngestPrepData`, used to pass
592 arbitrary data from `_prepIngest` to `_finishIngest`. This should
593 include only the datasets this datastore can actually ingest;
594 others should be silently ignored (`Datastore.ingest` will inspect
595 `IngestPrepData.refs` and raise `DatasetTypeNotSupportedError` if
596 necessary).
598 Raises
599 ------
600 NotImplementedError
601 Raised if the datastore does not support the given transfer mode
602 (including the case where ingest is not supported at all).
603 FileNotFoundError
604 Raised if one of the given files does not exist.
605 FileExistsError
606 Raised if transfer is not `None` but the (internal) location the
607 file would be moved to is already occupied.
609 Notes
610 -----
611 This method (along with `_finishIngest`) should be implemented by
612 subclasses to provide ingest support instead of implementing `ingest`
613 directly.
615 `_prepIngest` should not modify the data repository or given files in
616 any way; all changes should be deferred to `_finishIngest`.
618 When possible, exceptions should be raised in `_prepIngest` instead of
619 `_finishIngest`. `NotImplementedError` exceptions that indicate that
620 the transfer mode is not supported must be raised by `_prepIngest`
621 instead of `_finishIngest`.
622 """
623 raise NotImplementedError(f"Datastore {self} does not support direct file-based ingest.")
625 def _finishIngest(
626 self, prepData: IngestPrepData, *, transfer: str | None = None, record_validation_info: bool = True
627 ) -> None:
628 """Complete an ingest operation.
630 Parameters
631 ----------
632 data : `IngestPrepData`
633 An instance of a subclass of `IngestPrepData`. Guaranteed to be
634 the direct result of a call to `_prepIngest` on this datastore.
635 transfer : `str`, optional
636 How (and whether) the dataset should be added to the datastore.
637 See `ingest` for details of transfer modes.
638 record_validation_info : `bool`, optional
639 If `True`, the default, the datastore can record validation
640 information associated with the file. If `False` the datastore
641 will not attempt to track any information such as checksums
642 or file sizes. This can be useful if such information is tracked
643 in an external system or if the file is to be compressed in place.
644 It is up to the datastore whether this parameter is relevant.
646 Raises
647 ------
648 FileNotFoundError
649 Raised if one of the given files does not exist.
650 FileExistsError
651 Raised if transfer is not `None` but the (internal) location the
652 file would be moved to is already occupied.
654 Notes
655 -----
656 This method (along with `_prepIngest`) should be implemented by
657 subclasses to provide ingest support instead of implementing `ingest`
658 directly.
659 """
660 raise NotImplementedError(f"Datastore {self} does not support direct file-based ingest.")
662 def ingest(
663 self, *datasets: FileDataset, transfer: str | None = None, record_validation_info: bool = True
664 ) -> None:
665 """Ingest one or more files into the datastore.
667 Parameters
668 ----------
669 datasets : `FileDataset`
670 Each positional argument is a struct containing information about
671 a file to be ingested, including its path (either absolute or
672 relative to the datastore root, if applicable), a complete
673 `DatasetRef` (with ``dataset_id not None``), and optionally a
674 formatter class or its fully-qualified string name. If a formatter
675 is not provided, the one the datastore would use for ``put`` on
676 that dataset is assumed.
677 transfer : `str`, optional
678 How (and whether) the dataset should be added to the datastore.
679 If `None` (default), the file must already be in a location
680 appropriate for the datastore (e.g. within its root directory),
681 and will not be modified. Other choices include "move", "copy",
682 "link", "symlink", "relsymlink", and "hardlink". "link" is a
683 special transfer mode that will first try to make a hardlink and
684 if that fails a symlink will be used instead. "relsymlink" creates
685 a relative symlink rather than use an absolute path.
686 Most datastores do not support all transfer modes.
687 "auto" is a special option that will let the
688 data store choose the most natural option for itself.
689 record_validation_info : `bool`, optional
690 If `True`, the default, the datastore can record validation
691 information associated with the file. If `False` the datastore
692 will not attempt to track any information such as checksums
693 or file sizes. This can be useful if such information is tracked
694 in an external system or if the file is to be compressed in place.
695 It is up to the datastore whether this parameter is relevant.
697 Raises
698 ------
699 NotImplementedError
700 Raised if the datastore does not support the given transfer mode
701 (including the case where ingest is not supported at all).
702 DatasetTypeNotSupportedError
703 Raised if one or more files to be ingested have a dataset type that
704 is not supported by the datastore.
705 FileNotFoundError
706 Raised if one of the given files does not exist.
707 FileExistsError
708 Raised if transfer is not `None` but the (internal) location the
709 file would be moved to is already occupied.
711 Notes
712 -----
713 Subclasses should implement `_prepIngest` and `_finishIngest` instead
714 of implementing `ingest` directly. Datastores that hold and
715 delegate to child datastores may want to call those methods as well.
717 Subclasses are encouraged to document their supported transfer modes
718 in their class documentation.
719 """
720 # Allow a datastore to select a default transfer mode
721 transfer = self._overrideTransferMode(*datasets, transfer=transfer)
722 prepData = self._prepIngest(*datasets, transfer=transfer)
723 refs = {ref.id: ref for dataset in datasets for ref in dataset.refs}
724 if refs.keys() != prepData.refs.keys():
725 unsupported = refs.keys() - prepData.refs.keys()
726 # Group unsupported refs by DatasetType for an informative
727 # but still concise error message.
728 byDatasetType = defaultdict(list)
729 for datasetId in unsupported:
730 ref = refs[datasetId]
731 byDatasetType[ref.datasetType].append(ref)
732 raise DatasetTypeNotSupportedError(
733 "DatasetType(s) not supported in ingest: "
734 + ", ".join(f"{k.name} ({len(v)} dataset(s))" for k, v in byDatasetType.items())
735 )
736 self._finishIngest(prepData, transfer=transfer, record_validation_info=record_validation_info)
738 def transfer_from(
739 self,
740 source_datastore: Datastore,
741 refs: Iterable[DatasetRef],
742 transfer: str = "auto",
743 artifact_existence: dict[ResourcePath, bool] | None = None,
744 ) -> tuple[set[DatasetRef], set[DatasetRef]]:
745 """Transfer dataset artifacts from another datastore to this one.
747 Parameters
748 ----------
749 source_datastore : `Datastore`
750 The datastore from which to transfer artifacts. That datastore
751 must be compatible with this datastore receiving the artifacts.
752 refs : iterable of `DatasetRef`
753 The datasets to transfer from the source datastore.
754 transfer : `str`, optional
755 How (and whether) the dataset should be added to the datastore.
756 Choices include "move", "copy",
757 "link", "symlink", "relsymlink", and "hardlink". "link" is a
758 special transfer mode that will first try to make a hardlink and
759 if that fails a symlink will be used instead. "relsymlink" creates
760 a relative symlink rather than use an absolute path.
761 Most datastores do not support all transfer modes.
762 "auto" (the default) is a special option that will let the
763 data store choose the most natural option for itself.
764 If the source location and transfer location are identical the
765 transfer mode will be ignored.
766 artifact_existence : `dict` [`lsst.resources.ResourcePath`, `bool`]
767 Optional mapping of datastore artifact to existence. Updated by
768 this method with details of all artifacts tested. Can be `None`
769 if the caller is not interested.
771 Returns
772 -------
773 accepted : `set` [`DatasetRef`]
774 The datasets that were transferred.
775 rejected : `set` [`DatasetRef`]
776 The datasets that were rejected due to a constraints violation.
778 Raises
779 ------
780 TypeError
781 Raised if the two datastores are not compatible.
782 """
783 if type(self) is not type(source_datastore):
784 raise TypeError(
785 f"Datastore mismatch between this datastore ({type(self)}) and the "
786 f"source datastore ({type(source_datastore)})."
787 )
789 raise NotImplementedError(f"Datastore {type(self)} must implement a transfer_from method.")
791 def getManyURIs(
792 self,
793 refs: Iterable[DatasetRef],
794 predict: bool = False,
795 allow_missing: bool = False,
796 ) -> dict[DatasetRef, DatasetRefURIs]:
797 """Return URIs associated with many datasets.
799 Parameters
800 ----------
801 refs : iterable of `DatasetIdRef`
802 References to the required datasets.
803 predict : `bool`, optional
804 If `True`, allow URIs to be returned of datasets that have not
805 been written.
806 allow_missing : `bool`
807 If `False`, and ``predict`` is `False`, will raise if a
808 `DatasetRef` does not exist.
810 Returns
811 -------
812 URIs : `dict` of [`DatasetRef`, `DatasetRefUris`]
813 A dict of primary and component URIs, indexed by the passed-in
814 refs.
816 Raises
817 ------
818 FileNotFoundError
819 A URI has been requested for a dataset that does not exist and
820 guessing is not allowed.
822 Notes
823 -----
824 In file-based datastores, getManyURIs does not check that the file is
825 really there, it's assuming it is if datastore is aware of the file
826 then it actually exists.
827 """
828 uris: dict[DatasetRef, DatasetRefURIs] = {}
829 missing_refs = []
830 for ref in refs:
831 try:
832 uris[ref] = self.getURIs(ref, predict=predict)
833 except FileNotFoundError:
834 missing_refs.append(ref)
835 if missing_refs and not allow_missing:
836 raise FileNotFoundError(
837 "Missing {} refs from datastore out of {} and predict=False.".format(
838 num_missing := len(missing_refs), num_missing + len(uris)
839 )
840 )
841 return uris
843 @abstractmethod
844 def getURIs(self, datasetRef: DatasetRef, predict: bool = False) -> DatasetRefURIs:
845 """Return URIs associated with dataset.
847 Parameters
848 ----------
849 ref : `DatasetRef`
850 Reference to the required dataset.
851 predict : `bool`, optional
852 If the datastore does not know about the dataset, should it
853 return a predicted URI or not?
855 Returns
856 -------
857 uris : `DatasetRefURIs`
858 The URI to the primary artifact associated with this dataset (if
859 the dataset was disassembled within the datastore this may be
860 `None`), and the URIs to any components associated with the dataset
861 artifact. (can be empty if there are no components).
862 """
863 raise NotImplementedError()
865 @abstractmethod
866 def getURI(self, datasetRef: DatasetRef, predict: bool = False) -> ResourcePath:
867 """URI to the Dataset.
869 Parameters
870 ----------
871 datasetRef : `DatasetRef`
872 Reference to the required Dataset.
873 predict : `bool`
874 If `True` attempt to predict the URI for a dataset if it does
875 not exist in datastore.
877 Returns
878 -------
879 uri : `str`
880 URI string pointing to the Dataset within the datastore. If the
881 Dataset does not exist in the datastore, the URI may be a guess.
882 If the datastore does not have entities that relate well
883 to the concept of a URI the returned URI string will be
884 descriptive. The returned URI is not guaranteed to be obtainable.
886 Raises
887 ------
888 FileNotFoundError
889 A URI has been requested for a dataset that does not exist and
890 guessing is not allowed.
891 """
892 raise NotImplementedError("Must be implemented by subclass")
894 @abstractmethod
895 def retrieveArtifacts(
896 self,
897 refs: Iterable[DatasetRef],
898 destination: ResourcePath,
899 transfer: str = "auto",
900 preserve_path: bool = True,
901 overwrite: bool = False,
902 ) -> list[ResourcePath]:
903 """Retrieve the artifacts associated with the supplied refs.
905 Parameters
906 ----------
907 refs : iterable of `DatasetRef`
908 The datasets for which artifacts are to be retrieved.
909 A single ref can result in multiple artifacts. The refs must
910 be resolved.
911 destination : `lsst.resources.ResourcePath`
912 Location to write the artifacts.
913 transfer : `str`, optional
914 Method to use to transfer the artifacts. Must be one of the options
915 supported by `lsst.resources.ResourcePath.transfer_from()`.
916 "move" is not allowed.
917 preserve_path : `bool`, optional
918 If `True` the full path of the artifact within the datastore
919 is preserved. If `False` the final file component of the path
920 is used.
921 overwrite : `bool`, optional
922 If `True` allow transfers to overwrite existing files at the
923 destination.
925 Returns
926 -------
927 targets : `list` of `lsst.resources.ResourcePath`
928 URIs of file artifacts in destination location. Order is not
929 preserved.
931 Notes
932 -----
933 For non-file datastores the artifacts written to the destination
934 may not match the representation inside the datastore. For example
935 a hierarchichal data structure in a NoSQL database may well be stored
936 as a JSON file.
937 """
938 raise NotImplementedError()
940 @abstractmethod
941 def remove(self, datasetRef: DatasetRef) -> None:
942 """Indicate to the Datastore that a Dataset can be removed.
944 Parameters
945 ----------
946 datasetRef : `DatasetRef`
947 Reference to the required Dataset.
949 Raises
950 ------
951 FileNotFoundError
952 When Dataset does not exist.
954 Notes
955 -----
956 Some Datastores may implement this method as a silent no-op to
957 disable Dataset deletion through standard interfaces.
958 """
959 raise NotImplementedError("Must be implemented by subclass")
961 @abstractmethod
962 def forget(self, refs: Iterable[DatasetRef]) -> None:
963 """Indicate to the Datastore that it should remove all records of the
964 given datasets, without actually deleting them.
966 Parameters
967 ----------
968 refs : `~collections.abc.Iterable` [ `DatasetRef` ]
969 References to the datasets being forgotten.
971 Notes
972 -----
973 Asking a datastore to forget a `DatasetRef` it does not hold should be
974 a silent no-op, not an error.
975 """
976 raise NotImplementedError("Must be implemented by subclass")
978 @abstractmethod
979 def trash(self, ref: DatasetRef | Iterable[DatasetRef], ignore_errors: bool = True) -> None:
980 """Indicate to the Datastore that a Dataset can be moved to the trash.
982 Parameters
983 ----------
984 ref : `DatasetRef` or iterable thereof
985 Reference(s) to the required Dataset.
986 ignore_errors : `bool`, optional
987 Determine whether errors should be ignored. When multiple
988 refs are being trashed there will be no per-ref check.
990 Raises
991 ------
992 FileNotFoundError
993 When Dataset does not exist and errors are not ignored. Only
994 checked if a single ref is supplied (and not in a list).
996 Notes
997 -----
998 Some Datastores may implement this method as a silent no-op to
999 disable Dataset deletion through standard interfaces.
1000 """
1001 raise NotImplementedError("Must be implemented by subclass")
1003 @abstractmethod
1004 def emptyTrash(self, ignore_errors: bool = True) -> None:
1005 """Remove all datasets from the trash.
1007 Parameters
1008 ----------
1009 ignore_errors : `bool`, optional
1010 Determine whether errors should be ignored.
1012 Notes
1013 -----
1014 Some Datastores may implement this method as a silent no-op to
1015 disable Dataset deletion through standard interfaces.
1016 """
1017 raise NotImplementedError("Must be implemented by subclass")
1019 @abstractmethod
1020 def transfer(self, inputDatastore: Datastore, datasetRef: DatasetRef) -> None:
1021 """Transfer a dataset from another datastore to this datastore.
1023 Parameters
1024 ----------
1025 inputDatastore : `Datastore`
1026 The external `Datastore` from which to retrieve the Dataset.
1027 datasetRef : `DatasetRef`
1028 Reference to the required Dataset.
1029 """
1030 raise NotImplementedError("Must be implemented by subclass")
1032 def export(
1033 self,
1034 refs: Iterable[DatasetRef],
1035 *,
1036 directory: ResourcePathExpression | None = None,
1037 transfer: str | None = "auto",
1038 ) -> Iterable[FileDataset]:
1039 """Export datasets for transfer to another data repository.
1041 Parameters
1042 ----------
1043 refs : iterable of `DatasetRef`
1044 Dataset references to be exported.
1045 directory : `str`, optional
1046 Path to a directory that should contain files corresponding to
1047 output datasets. Ignored if ``transfer`` is explicitly `None`.
1048 transfer : `str`, optional
1049 Mode that should be used to move datasets out of the repository.
1050 Valid options are the same as those of the ``transfer`` argument
1051 to ``ingest``, and datastores may similarly signal that a transfer
1052 mode is not supported by raising `NotImplementedError`. If "auto"
1053 is given and no ``directory`` is specified, `None` will be
1054 implied.
1056 Returns
1057 -------
1058 dataset : iterable of `DatasetTransfer`
1059 Structs containing information about the exported datasets, in the
1060 same order as ``refs``.
1062 Raises
1063 ------
1064 NotImplementedError
1065 Raised if the given transfer mode is not supported.
1066 """
1067 raise NotImplementedError(f"Transfer mode {transfer} not supported.")
1069 @abstractmethod
1070 def validateConfiguration(
1071 self, entities: Iterable[DatasetRef | DatasetType | StorageClass], logFailures: bool = False
1072 ) -> None:
1073 """Validate some of the configuration for this datastore.
1075 Parameters
1076 ----------
1077 entities : iterable of `DatasetRef`, `DatasetType`, or `StorageClass`
1078 Entities to test against this configuration. Can be differing
1079 types.
1080 logFailures : `bool`, optional
1081 If `True`, output a log message for every validation error
1082 detected.
1084 Raises
1085 ------
1086 DatastoreValidationError
1087 Raised if there is a validation problem with a configuration.
1089 Notes
1090 -----
1091 Which parts of the configuration are validated is at the discretion
1092 of each Datastore implementation.
1093 """
1094 raise NotImplementedError("Must be implemented by subclass")
1096 @abstractmethod
1097 def validateKey(self, lookupKey: LookupKey, entity: DatasetRef | DatasetType | StorageClass) -> None:
1098 """Validate a specific look up key with supplied entity.
1100 Parameters
1101 ----------
1102 lookupKey : `LookupKey`
1103 Key to use to retrieve information from the datastore
1104 configuration.
1105 entity : `DatasetRef`, `DatasetType`, or `StorageClass`
1106 Entity to compare with configuration retrieved using the
1107 specified lookup key.
1109 Raises
1110 ------
1111 DatastoreValidationError
1112 Raised if there is a problem with the combination of entity
1113 and lookup key.
1115 Notes
1116 -----
1117 Bypasses the normal selection priorities by allowing a key that
1118 would normally not be selected to be validated.
1119 """
1120 raise NotImplementedError("Must be implemented by subclass")
1122 @abstractmethod
1123 def getLookupKeys(self) -> set[LookupKey]:
1124 """Return all the lookup keys relevant to this datastore.
1126 Returns
1127 -------
1128 keys : `set` of `LookupKey`
1129 The keys stored internally for looking up information based
1130 on `DatasetType` name or `StorageClass`.
1131 """
1132 raise NotImplementedError("Must be implemented by subclass")
1134 def needs_expanded_data_ids(
1135 self,
1136 transfer: str | None,
1137 entity: DatasetRef | DatasetType | StorageClass | None = None,
1138 ) -> bool:
1139 """Test whether this datastore needs expanded data IDs to ingest.
1141 Parameters
1142 ----------
1143 transfer : `str` or `None`
1144 Transfer mode for ingest.
1145 entity, optional
1146 Object representing what will be ingested. If not provided (or not
1147 specific enough), `True` may be returned even if expanded data
1148 IDs aren't necessary.
1150 Returns
1151 -------
1152 needed : `bool`
1153 If `True`, expanded data IDs may be needed. `False` only if
1154 expansion definitely isn't necessary.
1155 """
1156 return True
1158 @abstractmethod
1159 def import_records(
1160 self,
1161 data: Mapping[str, DatastoreRecordData],
1162 ) -> None:
1163 """Import datastore location and record data from an in-memory data
1164 structure.
1166 Parameters
1167 ----------
1168 data : `~collections.abc.Mapping` [ `str`, `DatastoreRecordData` ]
1169 Datastore records indexed by datastore name. May contain data for
1170 other `Datastore` instances (generally because they are chained to
1171 this one), which should be ignored.
1173 Notes
1174 -----
1175 Implementations should generally not check that any external resources
1176 (e.g. files) referred to by these records actually exist, for
1177 performance reasons; we expect higher-level code to guarantee that they
1178 do.
1180 Implementations are responsible for calling
1181 `DatastoreRegistryBridge.insert` on all datasets in ``data.locations``
1182 where the key is in `names`, as well as loading any opaque table data.
1184 Implementations may assume that datasets are either fully present or
1185 not at all (single-component exports are not permitted).
1186 """
1187 raise NotImplementedError()
1189 @abstractmethod
1190 def export_records(
1191 self,
1192 refs: Iterable[DatasetIdRef],
1193 ) -> Mapping[str, DatastoreRecordData]:
1194 """Export datastore records and locations to an in-memory data
1195 structure.
1197 Parameters
1198 ----------
1199 refs : `~collections.abc.Iterable` [ `DatasetIdRef` ]
1200 Datasets to save. This may include datasets not known to this
1201 datastore, which should be ignored. May not include component
1202 datasets.
1204 Returns
1205 -------
1206 data : `~collections.abc.Mapping` [ `str`, `DatastoreRecordData` ]
1207 Exported datastore records indexed by datastore name.
1208 """
1209 raise NotImplementedError()
1211 def set_retrieve_dataset_type_method(self, method: Callable[[str], DatasetType | None] | None) -> None:
1212 """Specify a method that can be used by datastore to retrieve
1213 registry-defined dataset type.
1215 Parameters
1216 ----------
1217 method : `~collections.abc.Callable` | `None`
1218 Method that takes a name of the dataset type and returns a
1219 corresponding `DatasetType` instance as defined in Registry. If
1220 dataset type name is not known to registry `None` is returned.
1222 Notes
1223 -----
1224 This method is only needed for a Datastore supporting a "trusted" mode
1225 when it does not have an access to datastore records and needs to
1226 guess dataset location based on its stored dataset type.
1227 """
1228 pass
1231class NullDatastore(Datastore):
1232 """A datastore that implements the `Datastore` API but always fails when
1233 it accepts any request.
1234 """
1236 @classmethod
1237 def setConfigRoot(cls, root: str, config: Config, full: Config, overwrite: bool = True) -> None:
1238 # Nothing to do. This is not a real Datastore.
1239 pass
1241 def __init__(
1242 self,
1243 config: Config | ResourcePathExpression | None,
1244 bridgeManager: DatastoreRegistryBridgeManager | None,
1245 butlerRoot: ResourcePathExpression | None = None,
1246 ):
1247 # Name ourselves with the timestamp the datastore
1248 # was created.
1249 self.name = f"{type(self).__name__}@{time.time()}"
1250 _LOG.debug("Creating datastore %s", self.name)
1252 return
1254 def knows(self, ref: DatasetRef) -> bool:
1255 return False
1257 def exists(self, datasetRef: DatasetRef) -> bool:
1258 return False
1260 def get(
1261 self,
1262 datasetRef: DatasetRef,
1263 parameters: Mapping[str, Any] | None = None,
1264 storageClass: StorageClass | str | None = None,
1265 ) -> Any:
1266 raise FileNotFoundError("This is a no-op datastore that can not access a real datastore")
1268 def put(self, inMemoryDataset: Any, datasetRef: DatasetRef) -> None:
1269 raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
1271 def ingest(
1272 self, *datasets: FileDataset, transfer: str | None = None, record_validation_info: bool = True
1273 ) -> None:
1274 raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
1276 def transfer_from(
1277 self,
1278 source_datastore: Datastore,
1279 refs: Iterable[DatasetRef],
1280 transfer: str = "auto",
1281 artifact_existence: dict[ResourcePath, bool] | None = None,
1282 ) -> tuple[set[DatasetRef], set[DatasetRef]]:
1283 raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
1285 def getURIs(self, datasetRef: DatasetRef, predict: bool = False) -> DatasetRefURIs:
1286 raise FileNotFoundError("This is a no-op datastore that can not access a real datastore")
1288 def getURI(self, datasetRef: DatasetRef, predict: bool = False) -> ResourcePath:
1289 raise FileNotFoundError("This is a no-op datastore that can not access a real datastore")
1291 def retrieveArtifacts(
1292 self,
1293 refs: Iterable[DatasetRef],
1294 destination: ResourcePath,
1295 transfer: str = "auto",
1296 preserve_path: bool = True,
1297 overwrite: bool = False,
1298 ) -> list[ResourcePath]:
1299 raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
1301 def remove(self, datasetRef: DatasetRef) -> None:
1302 raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
1304 def forget(self, refs: Iterable[DatasetRef]) -> None:
1305 raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
1307 def trash(self, ref: DatasetRef | Iterable[DatasetRef], ignore_errors: bool = True) -> None:
1308 raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
1310 def emptyTrash(self, ignore_errors: bool = True) -> None:
1311 raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
1313 def transfer(self, inputDatastore: Datastore, datasetRef: DatasetRef) -> None:
1314 raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
1316 def export(
1317 self,
1318 refs: Iterable[DatasetRef],
1319 *,
1320 directory: ResourcePathExpression | None = None,
1321 transfer: str | None = "auto",
1322 ) -> Iterable[FileDataset]:
1323 raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
1325 def validateConfiguration(
1326 self, entities: Iterable[DatasetRef | DatasetType | StorageClass], logFailures: bool = False
1327 ) -> None:
1328 # No configuration so always validates.
1329 pass
1331 def validateKey(self, lookupKey: LookupKey, entity: DatasetRef | DatasetType | StorageClass) -> None:
1332 pass
1334 def getLookupKeys(self) -> set[LookupKey]:
1335 raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
1337 def import_records(
1338 self,
1339 data: Mapping[str, DatastoreRecordData],
1340 ) -> None:
1341 raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
1343 def export_records(
1344 self,
1345 refs: Iterable[DatasetIdRef],
1346 ) -> Mapping[str, DatastoreRecordData]:
1347 raise NotImplementedError("This is a no-op datastore that can not access a real datastore")