Coverage for python/lsst/daf/butler/datastores/posixDatastore.py : 87%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24"""POSIX datastore."""
26__all__ = ("PosixDatastore", )
28import hashlib
29import logging
30import os
31from typing import (
32 TYPE_CHECKING,
33 Any,
34 ClassVar,
35 Optional,
36 Type,
37 Union
38)
40from .fileLikeDatastore import FileLikeDatastore
41from lsst.daf.butler.core.utils import safeMakeDir
42from lsst.daf.butler import ButlerURI, FileDataset, StoredFileInfo, Formatter, DatasetRef
44if TYPE_CHECKING: 44 ↛ 45line 44 didn't jump to line 45, because the condition on line 44 was never true
45 from .fileLikeDatastore import DatastoreFileGetInformation
46 from lsst.daf.butler import DatastoreConfig, Location
47 from lsst.daf.butler.registry.interfaces import DatastoreRegistryBridgeManager
49log = logging.getLogger(__name__)
52class PosixDatastore(FileLikeDatastore):
53 """Basic POSIX filesystem backed Datastore.
55 Parameters
56 ----------
57 config : `DatastoreConfig` or `str`
58 Configuration. A string should refer to the name of the config file.
59 bridgeManager : `DatastoreRegistryBridgeManager`
60 Object that manages the interface between `Registry` and datastores.
61 butlerRoot : `str`, optional
62 New datastore root to use to override the configuration value.
64 Raises
65 ------
66 ValueError
67 If root location does not exist and ``create`` is `False` in the
68 configuration.
70 Notes
71 -----
72 PosixDatastore supports all transfer modes for file-based ingest:
73 `"move"`, `"copy"`, `"symlink"`, `"hardlink"`, `"relsymlink"`
74 and `None` (no transfer).
75 """
77 defaultConfigFile: ClassVar[Optional[str]] = "datastores/posixDatastore.yaml"
78 """Path to configuration defaults. Accessed within the ``config`` resource
79 or relative to a search path. Can be None if no defaults specified.
80 """
82 def __init__(self, config: Union[DatastoreConfig, str],
83 bridgeManager: DatastoreRegistryBridgeManager, butlerRoot: str = None):
84 super().__init__(config, bridgeManager, butlerRoot)
86 # Check that root is a valid URI for this datastore
87 root = ButlerURI(self.root, forceDirectory=True)
88 if root.scheme and root.scheme != "file": 88 ↛ 89line 88 didn't jump to line 89, because the condition on line 88 was never true
89 raise ValueError(f"Root location must only be a file URI not {self.root}")
91 self.root = root.ospath
92 if not os.path.isdir(self.root):
93 if "create" not in self.config or not self.config["create"]: 93 ↛ 94line 93 didn't jump to line 94, because the condition on line 93 was never true
94 raise ValueError(f"No valid root at: {self.root}")
95 safeMakeDir(self.root)
97 def _artifact_exists(self, location: Location) -> bool:
98 """Check that an artifact exists in this datastore at the specified
99 location.
101 Parameters
102 ----------
103 location : `Location`
104 Expected location of the artifact associated with this datastore.
106 Returns
107 -------
108 exists : `bool`
109 True if the location can be found, false otherwise.
110 """
111 return os.path.exists(location.path)
113 def _delete_artifact(self, location: Location) -> None:
114 """Delete the artifact from the datastore.
116 Parameters
117 ----------
118 location : `Location`
119 Location of the artifact associated with this datastore.
120 """
121 os.remove(location.path)
123 def _read_artifact_into_memory(self, getInfo: DatastoreFileGetInformation,
124 ref: DatasetRef, isComponent: bool = False) -> Any:
125 location = getInfo.location
127 # Too expensive to recalculate the checksum on fetch
128 # but we can check size and existence
129 if not os.path.exists(location.path):
130 raise FileNotFoundError("Dataset with Id {} does not seem to exist at"
131 " expected location of {}".format(ref.id, location.path))
132 stat = os.stat(location.path)
133 size = stat.st_size
134 storedFileInfo = getInfo.info
135 if size != storedFileInfo.file_size: 135 ↛ 136line 135 didn't jump to line 136, because the condition on line 135 was never true
136 raise RuntimeError("Integrity failure in Datastore. Size of file {} ({}) does not"
137 " match recorded size of {}".format(location.path, size,
138 storedFileInfo.file_size))
140 formatter = getInfo.formatter
141 try:
142 result = formatter.read(component=getInfo.component if isComponent else None)
143 except Exception as e:
144 raise ValueError(f"Failure from formatter '{formatter.name()}' for dataset {ref.id}"
145 f" ({ref.datasetType.name} from {location.path}): {e}") from e
147 return self._post_process_get(result, getInfo.readStorageClass, getInfo.assemblerParams,
148 isComponent=isComponent)
150 def _write_in_memory_to_artifact(self, inMemoryDataset: Any, ref: DatasetRef) -> StoredFileInfo:
151 # Inherit docstring
153 location, formatter = self._prepare_for_put(inMemoryDataset, ref)
155 storageDir = os.path.dirname(location.path)
156 if not os.path.isdir(storageDir):
157 # Never try to remove this after creating it since there might
158 # be a butler ingest process running concurrently that will
159 # already think this directory exists.
160 safeMakeDir(storageDir)
162 # Write the file
163 predictedFullPath = os.path.join(self.root, formatter.predictPath())
165 if os.path.exists(predictedFullPath):
166 raise FileExistsError(f"Cannot write file for ref {ref} as "
167 f"output file {predictedFullPath} already exists")
169 def _removeFileExists(path: str) -> None:
170 """Remove a file and do not complain if it is not there.
172 This is important since a formatter might fail before the file
173 is written and we should not confuse people by writing spurious
174 error messages to the log.
175 """
176 try:
177 os.remove(path)
178 except FileNotFoundError:
179 pass
181 if self._transaction is None: 181 ↛ 182line 181 didn't jump to line 182, because the condition on line 181 was never true
182 raise RuntimeError("Attempting to write dataset without transaction enabled")
184 formatter_exception = None
185 with self._transaction.undoWith("write", _removeFileExists, predictedFullPath):
186 try:
187 path = formatter.write(inMemoryDataset)
188 log.debug("Wrote file to %s", path)
189 except Exception as e:
190 formatter_exception = e
192 if formatter_exception:
193 raise formatter_exception
195 assert predictedFullPath == os.path.join(self.root, path)
197 return self._extractIngestInfo(path, ref, formatter=formatter)
199 def _overrideTransferMode(self, *datasets: FileDataset, transfer: Optional[str] = None) -> Optional[str]:
200 # Docstring inherited from base class
201 if transfer != "auto":
202 return transfer
204 # See if the paths are within the datastore or not
205 inside = [self._pathInStore(d.path) is not None for d in datasets]
207 if all(inside):
208 transfer = None
209 elif not any(inside): 209 ↛ 212line 209 didn't jump to line 212, because the condition on line 209 was never false
210 transfer = "link"
211 else:
212 raise ValueError("Some datasets are inside the datastore and some are outside."
213 " Please use an explicit transfer mode and not 'auto'.")
215 return transfer
217 def _pathInStore(self, path: str) -> Optional[str]:
218 """Return path relative to datastore root
220 Parameters
221 ----------
222 path : `str`
223 Path to dataset. Can be absolute path. Returns path in datastore
224 or raises an exception if the path it outside.
226 Returns
227 -------
228 inStore : `str`
229 Path relative to datastore root. Returns `None` if the file is
230 outside the root.
231 """
232 pathUri = ButlerURI(path, forceAbsolute=False)
233 rootUri = ButlerURI(self.root, forceDirectory=True, forceAbsolute=True)
234 return pathUri.relative_to(rootUri)
236 def _standardizeIngestPath(self, path: str, *, transfer: Optional[str] = None) -> str:
237 # Docstring inherited from FileLikeDatastore._standardizeIngestPath.
238 fullPath = os.path.normpath(os.path.join(self.root, path))
239 if not os.path.exists(fullPath):
240 raise FileNotFoundError(f"File at '{fullPath}' does not exist; note that paths to ingest "
241 f"are assumed to be relative to self.root unless they are absolute.")
242 if transfer is None:
243 # Can not reuse path var because of typing
244 pathx = self._pathInStore(path)
245 if pathx is None:
246 raise RuntimeError(f"'{path}' is not inside repository root '{self.root}'.")
247 path = pathx
248 return path
250 def _extractIngestInfo(self, path: Union[str, ButlerURI], ref: DatasetRef, *,
251 formatter: Union[Formatter, Type[Formatter]],
252 transfer: Optional[str] = None) -> StoredFileInfo:
253 # Docstring inherited from FileLikeDatastore._extractIngestInfo.
254 if self._transaction is None: 254 ↛ 255line 254 didn't jump to line 255, because the condition on line 254 was never true
255 raise RuntimeError("Ingest called without transaction enabled")
257 # Calculate the full path to the source
258 srcUri = ButlerURI(path, root=self.root, forceAbsolute=True)
259 if transfer is None:
260 # File should exist already
261 rootUri = ButlerURI(self.root, forceDirectory=True)
262 pathInStore = srcUri.relative_to(rootUri)
263 if pathInStore is None: 263 ↛ 264line 263 didn't jump to line 264, because the condition on line 263 was never true
264 raise RuntimeError(f"Unexpectedly learned that {srcUri} is not within datastore {rootUri}")
265 if not rootUri.exists(): 265 ↛ 266line 265 didn't jump to line 266, because the condition on line 265 was never true
266 raise RuntimeError(f"Unexpectedly discovered that {srcUri} does not exist inside datastore"
267 f" {rootUri}")
268 path = pathInStore
269 fullPath = srcUri.ospath
270 elif transfer is not None: 270 ↛ 279line 270 didn't jump to line 279, because the condition on line 270 was never false
271 # Work out the name we want this ingested file to have
272 # inside the datastore
273 location = self._calculate_ingested_datastore_name(srcUri, ref, formatter)
274 path = location.pathInStore
275 fullPath = location.path
276 targetUri = ButlerURI(location.uri)
277 targetUri.transfer_from(srcUri, transfer=transfer, transaction=self._transaction)
279 checksum = self.computeChecksum(fullPath) if self.useChecksum else None
280 stat = os.stat(fullPath)
281 size = stat.st_size
282 return StoredFileInfo(formatter=formatter, path=path, storageClass=ref.datasetType.storageClass,
283 component=ref.datasetType.component(),
284 file_size=size, checksum=checksum)
286 @staticmethod
287 def computeChecksum(filename: str, algorithm: str = "blake2b", block_size: int = 8192) -> str:
288 """Compute the checksum of the supplied file.
290 Parameters
291 ----------
292 filename : `str`
293 Name of file to calculate checksum from.
294 algorithm : `str`, optional
295 Name of algorithm to use. Must be one of the algorithms supported
296 by :py:class`hashlib`.
297 block_size : `int`
298 Number of bytes to read from file at one time.
300 Returns
301 -------
302 hexdigest : `str`
303 Hex digest of the file.
304 """
305 if algorithm not in hashlib.algorithms_guaranteed: 305 ↛ 306line 305 didn't jump to line 306, because the condition on line 305 was never true
306 raise NameError("The specified algorithm '{}' is not supported by hashlib".format(algorithm))
308 hasher = hashlib.new(algorithm)
310 with open(filename, "rb") as f:
311 for chunk in iter(lambda: f.read(block_size), b""):
312 hasher.update(chunk)
314 return hasher.hexdigest()