Coverage for python/lsst/daf/butler/datastores/posixDatastore.py : 83%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24"""POSIX datastore."""
26__all__ = ("PosixDatastore", )
28import logging
29import os
30from typing import (
31 TYPE_CHECKING,
32 Any,
33 ClassVar,
34 Optional,
35 Union
36)
38from .fileLikeDatastore import FileLikeDatastore
39from lsst.daf.butler.core.utils import safeMakeDir
40from lsst.daf.butler import StoredFileInfo, DatasetRef
42if TYPE_CHECKING: 42 ↛ 43line 42 didn't jump to line 43, because the condition on line 42 was never true
43 from .fileLikeDatastore import DatastoreFileGetInformation
44 from lsst.daf.butler import DatastoreConfig
45 from lsst.daf.butler.registry.interfaces import DatastoreRegistryBridgeManager
47log = logging.getLogger(__name__)
50class PosixDatastore(FileLikeDatastore):
51 """Basic POSIX filesystem backed Datastore.
53 Parameters
54 ----------
55 config : `DatastoreConfig` or `str`
56 Configuration. A string should refer to the name of the config file.
57 bridgeManager : `DatastoreRegistryBridgeManager`
58 Object that manages the interface between `Registry` and datastores.
59 butlerRoot : `str`, optional
60 New datastore root to use to override the configuration value.
62 Raises
63 ------
64 ValueError
65 If root location does not exist and ``create`` is `False` in the
66 configuration.
68 Notes
69 -----
70 PosixDatastore supports all transfer modes for file-based ingest:
71 `"move"`, `"copy"`, `"symlink"`, `"hardlink"`, `"relsymlink"`
72 and `None` (no transfer).
74 For PosixDatastore, the `"auto"` transfer mode will operate in-place (like
75 ``transfer=None``) if the file is already within the datastore root, and
76 fall back to `"link"` otherwise.
78 See `Datastore.ingest` for more information on transfer modes.
79 """
81 defaultConfigFile: ClassVar[Optional[str]] = "datastores/posixDatastore.yaml"
82 """Path to configuration defaults. Accessed within the ``configs`` resource
83 or relative to a search path. Can be None if no defaults specified.
84 """
86 def __init__(self, config: Union[DatastoreConfig, str],
87 bridgeManager: DatastoreRegistryBridgeManager, butlerRoot: str = None):
88 super().__init__(config, bridgeManager, butlerRoot)
90 # Check that root is a valid URI for this datastore
91 if self.root.scheme and self.root.scheme != "file": 91 ↛ 92line 91 didn't jump to line 92, because the condition on line 91 was never true
92 raise ValueError(f"Root location must only be a file URI not {self.root}")
94 if not self.root.exists():
95 if "create" not in self.config or not self.config["create"]: 95 ↛ 96line 95 didn't jump to line 96, because the condition on line 95 was never true
96 raise ValueError(f"No valid root and not allowed to create one at: {self.root}")
97 self.root.mkdir()
99 def _read_artifact_into_memory(self, getInfo: DatastoreFileGetInformation,
100 ref: DatasetRef, isComponent: bool = False) -> Any:
101 location = getInfo.location
103 # Too expensive to recalculate the checksum on fetch
104 # but we can check size and existence
105 if not os.path.exists(location.path):
106 raise FileNotFoundError("Dataset with Id {} does not seem to exist at"
107 " expected location of {}".format(ref.id, location.path))
108 size = location.uri.size()
109 storedFileInfo = getInfo.info
110 if size != storedFileInfo.file_size: 110 ↛ 111line 110 didn't jump to line 111, because the condition on line 110 was never true
111 raise RuntimeError("Integrity failure in Datastore. Size of file {} ({}) does not"
112 " match recorded size of {}".format(location.path, size,
113 storedFileInfo.file_size))
115 formatter = getInfo.formatter
116 try:
117 log.debug("Reading %s from location %s with formatter %s",
118 f"component {getInfo.component}" if isComponent else "",
119 location.uri, type(formatter).__name__)
120 result = formatter.read(component=getInfo.component if isComponent else None)
121 except Exception as e:
122 raise ValueError(f"Failure from formatter '{formatter.name()}' for dataset {ref.id}"
123 f" ({ref.datasetType.name} from {location.path}): {e}") from e
125 return self._post_process_get(result, getInfo.readStorageClass, getInfo.assemblerParams,
126 isComponent=isComponent)
128 def _write_in_memory_to_artifact(self, inMemoryDataset: Any, ref: DatasetRef) -> StoredFileInfo:
129 # Inherit docstring
131 location, formatter = self._prepare_for_put(inMemoryDataset, ref)
133 storageDir = os.path.dirname(location.path)
134 if not os.path.isdir(storageDir):
135 # Never try to remove this after creating it since there might
136 # be a butler ingest process running concurrently that will
137 # already think this directory exists.
138 safeMakeDir(storageDir)
140 # Write the file
141 predictedFullPath = os.path.join(self.root.ospath, formatter.predictPath())
143 if os.path.exists(predictedFullPath): 143 ↛ 150line 143 didn't jump to line 150, because the condition on line 143 was never true
144 # Assume that by this point if registry thinks the file should
145 # not exist then the file should not exist and therefore we can
146 # overwrite it. This can happen if a put was interrupted by
147 # an external interrupt. The only time this could be problematic is
148 # if the file template is incomplete and multiple dataset refs
149 # result in identical filenames.
150 log.warning("Object %s exists in datastore for ref %s", location.uri, ref)
152 def _removeFileExists(path: str) -> None:
153 """Remove a file and do not complain if it is not there.
155 This is important since a formatter might fail before the file
156 is written and we should not confuse people by writing spurious
157 error messages to the log.
158 """
159 try:
160 os.remove(path)
161 except FileNotFoundError:
162 pass
164 if self._transaction is None: 164 ↛ 165line 164 didn't jump to line 165, because the condition on line 164 was never true
165 raise RuntimeError("Attempting to write dataset without transaction enabled")
167 formatter_exception = None
168 with self._transaction.undoWith("write", _removeFileExists, predictedFullPath):
169 try:
170 path = formatter.write(inMemoryDataset)
171 log.debug("Wrote file to %s", path)
172 except Exception as e:
173 formatter_exception = e
175 if formatter_exception:
176 raise formatter_exception
178 assert predictedFullPath == os.path.join(self.root.ospath, path)
180 return self._extractIngestInfo(path, ref, formatter=formatter)