Coverage for python/lsst/daf/butler/datastores/posixDatastore.py : 89%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24"""POSIX datastore."""
26__all__ = ("PosixDatastore", )
28import hashlib
29import logging
30import os
31import shutil
32from typing import (
33 TYPE_CHECKING,
34 Any,
35 ClassVar,
36 Iterable,
37 Optional,
38 Type,
39 Union
40)
42from .fileLikeDatastore import FileLikeDatastore
43from lsst.daf.butler.core.safeFileIo import safeMakeDir
44from lsst.daf.butler import ButlerURI, FileDataset, StoredFileInfo, Formatter, DatasetRef
46if TYPE_CHECKING: 46 ↛ 47line 46 didn't jump to line 47, because the condition on line 46 was never true
47 from .fileLikeDatastore import DatastoreFileGetInformation
48 from lsst.daf.butler import DatastoreConfig, Location
49 from lsst.daf.butler.registry.interfaces import DatastoreRegistryBridgeManager
51log = logging.getLogger(__name__)
54class PosixDatastore(FileLikeDatastore):
55 """Basic POSIX filesystem backed Datastore.
57 Parameters
58 ----------
59 config : `DatastoreConfig` or `str`
60 Configuration. A string should refer to the name of the config file.
61 bridgeManager : `DatastoreRegistryBridgeManager`
62 Object that manages the interface between `Registry` and datastores.
63 butlerRoot : `str`, optional
64 New datastore root to use to override the configuration value.
66 Raises
67 ------
68 ValueError
69 If root location does not exist and ``create`` is `False` in the
70 configuration.
72 Notes
73 -----
74 PosixDatastore supports all transfer modes for file-based ingest:
75 `"move"`, `"copy"`, `"symlink"`, `"hardlink"`, `"relsymlink"`
76 and `None` (no transfer).
77 """
79 defaultConfigFile: ClassVar[Optional[str]] = "datastores/posixDatastore.yaml"
80 """Path to configuration defaults. Relative to $DAF_BUTLER_DIR/config or
81 absolute path. Can be None if no defaults specified.
82 """
84 def __init__(self, config: Union[DatastoreConfig, str],
85 bridgeManager: DatastoreRegistryBridgeManager, butlerRoot: str = None):
86 super().__init__(config, bridgeManager, butlerRoot)
88 # Check that root is a valid URI for this datastore
89 root = ButlerURI(self.root)
90 if root.scheme and root.scheme != "file": 90 ↛ 91line 90 didn't jump to line 91, because the condition on line 90 was never true
91 raise ValueError(f"Root location must only be a file URI not {self.root}")
93 self.root = root.path
94 if not os.path.isdir(self.root):
95 if "create" not in self.config or not self.config["create"]: 95 ↛ 96line 95 didn't jump to line 96, because the condition on line 95 was never true
96 raise ValueError(f"No valid root at: {self.root}")
97 safeMakeDir(self.root)
99 def _artifact_exists(self, location: Location) -> bool:
100 """Check that an artifact exists in this datastore at the specified
101 location.
103 Parameters
104 ----------
105 location : `Location`
106 Expected location of the artifact associated with this datastore.
108 Returns
109 -------
110 exists : `bool`
111 True if the location can be found, false otherwise.
112 """
113 return os.path.exists(location.path)
115 def _delete_artifact(self, location: Location) -> None:
116 """Delete the artifact from the datastore.
118 Parameters
119 ----------
120 location : `Location`
121 Location of the artifact associated with this datastore.
122 """
123 os.remove(location.path)
125 def _read_artifact_into_memory(self, getInfo: DatastoreFileGetInformation,
126 ref: DatasetRef, isComponent: bool = False) -> Any:
127 location = getInfo.location
129 # Too expensive to recalculate the checksum on fetch
130 # but we can check size and existence
131 if not os.path.exists(location.path):
132 raise FileNotFoundError("Dataset with Id {} does not seem to exist at"
133 " expected location of {}".format(ref.id, location.path))
134 stat = os.stat(location.path)
135 size = stat.st_size
136 storedFileInfo = getInfo.info
137 if size != storedFileInfo.file_size: 137 ↛ 138line 137 didn't jump to line 138, because the condition on line 137 was never true
138 raise RuntimeError("Integrity failure in Datastore. Size of file {} ({}) does not"
139 " match recorded size of {}".format(location.path, size,
140 storedFileInfo.file_size))
142 formatter = getInfo.formatter
143 try:
144 result = formatter.read(component=getInfo.component if isComponent else None)
145 except Exception as e:
146 raise ValueError(f"Failure from formatter '{formatter.name()}' for dataset {ref.id}"
147 f" ({ref.datasetType.name} from {location.path}): {e}") from e
149 return self._post_process_get(result, getInfo.readStorageClass, getInfo.assemblerParams,
150 isComponent=isComponent)
152 def _write_in_memory_to_artifact(self, inMemoryDataset: Any, ref: DatasetRef) -> StoredFileInfo:
153 # Inherit docstring
155 location, formatter = self._prepare_for_put(inMemoryDataset, ref)
157 storageDir = os.path.dirname(location.path)
158 if not os.path.isdir(storageDir):
159 # Never try to remove this after creating it since there might
160 # be a butler ingest process running concurrently that will
161 # already think this directory exists.
162 safeMakeDir(storageDir)
164 # Write the file
165 predictedFullPath = os.path.join(self.root, formatter.predictPath())
167 if os.path.exists(predictedFullPath):
168 raise FileExistsError(f"Cannot write file for ref {ref} as "
169 f"output file {predictedFullPath} already exists")
171 def _removeFileExists(path: str) -> None:
172 """Remove a file and do not complain if it is not there.
174 This is important since a formatter might fail before the file
175 is written and we should not confuse people by writing spurious
176 error messages to the log.
177 """
178 try:
179 os.remove(path)
180 except FileNotFoundError:
181 pass
183 if self._transaction is None: 183 ↛ 184line 183 didn't jump to line 184, because the condition on line 183 was never true
184 raise RuntimeError("Attempting to write dataset without transaction enabled")
186 formatter_exception = None
187 with self._transaction.undoWith("write", _removeFileExists, predictedFullPath):
188 try:
189 path = formatter.write(inMemoryDataset)
190 log.debug("Wrote file to %s", path)
191 except Exception as e:
192 formatter_exception = e
194 if formatter_exception:
195 raise formatter_exception
197 assert predictedFullPath == os.path.join(self.root, path)
199 return self._extractIngestInfo(path, ref, formatter=formatter)
201 def _overrideTransferMode(self, *datasets: FileDataset, transfer: Optional[str] = None) -> Optional[str]:
202 # Docstring inherited from base class
203 if transfer != "auto":
204 return transfer
206 # See if the paths are within the datastore or not
207 inside = [self._pathInStore(d.path) is not None for d in datasets]
209 if all(inside):
210 transfer = None
211 elif not any(inside): 211 ↛ 214line 211 didn't jump to line 214, because the condition on line 211 was never false
212 transfer = "link"
213 else:
214 raise ValueError("Some datasets are inside the datastore and some are outside."
215 " Please use an explicit transfer mode and not 'auto'.")
217 return transfer
219 def _pathInStore(self, path: str) -> Optional[str]:
220 """Return path relative to datastore root
222 Parameters
223 ----------
224 path : `str`
225 Path to dataset. Can be absolute path. Returns path in datastore
226 or raises an exception if the path it outside.
228 Returns
229 -------
230 inStore : `str`
231 Path relative to datastore root. Returns `None` if the file is
232 outside the root.
233 """
234 if os.path.isabs(path):
235 absRoot = os.path.abspath(self.root)
236 if os.path.commonpath([absRoot, path]) != absRoot: 236 ↛ 238line 236 didn't jump to line 238, because the condition on line 236 was never false
237 return None
238 return os.path.relpath(path, absRoot)
239 elif path.startswith(os.path.pardir):
240 return None
241 return path
243 def _standardizeIngestPath(self, path: str, *, transfer: Optional[str] = None) -> str:
244 # Docstring inherited from FileLikeDatastore._standardizeIngestPath.
245 fullPath = os.path.normpath(os.path.join(self.root, path))
246 if not os.path.exists(fullPath):
247 raise FileNotFoundError(f"File at '{fullPath}' does not exist; note that paths to ingest "
248 f"are assumed to be relative to self.root unless they are absolute.")
249 if transfer is None:
250 # Can not reuse path var because of typing
251 pathx = self._pathInStore(path)
252 if pathx is None:
253 raise RuntimeError(f"'{path}' is not inside repository root '{self.root}'.")
254 path = pathx
255 return path
257 def _extractIngestInfo(self, path: str, ref: DatasetRef, *,
258 formatter: Union[Formatter, Type[Formatter]],
259 transfer: Optional[str] = None) -> StoredFileInfo:
260 # Docstring inherited from FileLikeDatastore._extractIngestInfo.
261 if self._transaction is None: 261 ↛ 262line 261 didn't jump to line 262, because the condition on line 261 was never true
262 raise RuntimeError("Ingest called without transaction enabled")
264 fullPath = os.path.normpath(os.path.join(self.root, path))
265 if transfer is not None:
266 template = self.templates.getTemplate(ref)
267 location = self.locationFactory.fromPath(template.format(ref))
268 newPath = formatter.predictPathFromLocation(location)
269 newFullPath = os.path.join(self.root, newPath)
270 if os.path.exists(newFullPath):
271 raise FileExistsError(f"File '{newFullPath}' already exists.")
272 storageDir = os.path.dirname(newFullPath)
273 if not os.path.isdir(storageDir):
274 # Do not attempt to reverse directory creation
275 # because of race conditions with other processes running
276 # ingest in parallel.
277 safeMakeDir(storageDir)
278 if transfer == "move":
279 with self._transaction.undoWith("move", shutil.move, newFullPath, fullPath):
280 shutil.move(fullPath, newFullPath)
281 elif transfer == "copy":
282 with self._transaction.undoWith("copy", os.remove, newFullPath):
283 shutil.copy(fullPath, newFullPath)
284 elif transfer == "link":
285 with self._transaction.undoWith("link", os.unlink, newFullPath):
286 realPath = os.path.realpath(fullPath)
287 # Try hard link and if that fails use a symlink
288 try:
289 os.link(realPath, newFullPath)
290 except OSError:
291 # Read through existing symlinks
292 os.symlink(realPath, newFullPath)
293 elif transfer == "hardlink":
294 with self._transaction.undoWith("hardlink", os.unlink, newFullPath):
295 os.link(os.path.realpath(fullPath), newFullPath)
296 elif transfer == "symlink":
297 with self._transaction.undoWith("symlink", os.unlink, newFullPath):
298 # Read through existing symlinks
299 os.symlink(os.path.realpath(fullPath), newFullPath)
300 elif transfer == "relsymlink": 300 ↛ 311line 300 didn't jump to line 311, because the condition on line 300 was never false
301 # This is a standard symlink but using a relative path
302 fullPath = os.path.realpath(fullPath)
304 # Need the directory name to give to relative root
305 # A full file path confuses it into an extra ../
306 newFullPathRoot, _ = os.path.split(newFullPath)
307 relPath = os.path.relpath(fullPath, newFullPathRoot)
308 with self._transaction.undoWith("relsymlink", os.unlink, newFullPath):
309 os.symlink(relPath, newFullPath)
310 else:
311 raise NotImplementedError("Transfer type '{}' not supported.".format(transfer))
312 path = newPath
313 fullPath = newFullPath
314 checksum = self.computeChecksum(fullPath) if self.useChecksum else None
315 stat = os.stat(fullPath)
316 size = stat.st_size
317 return StoredFileInfo(formatter=formatter, path=path, storageClass=ref.datasetType.storageClass,
318 component=ref.datasetType.component(),
319 file_size=size, checksum=checksum)
321 @staticmethod
322 def computeChecksum(filename: str, algorithm: str = "blake2b", block_size: int = 8192) -> str:
323 """Compute the checksum of the supplied file.
325 Parameters
326 ----------
327 filename : `str`
328 Name of file to calculate checksum from.
329 algorithm : `str`, optional
330 Name of algorithm to use. Must be one of the algorithms supported
331 by :py:class`hashlib`.
332 block_size : `int`
333 Number of bytes to read from file at one time.
335 Returns
336 -------
337 hexdigest : `str`
338 Hex digest of the file.
339 """
340 if algorithm not in hashlib.algorithms_guaranteed: 340 ↛ 341line 340 didn't jump to line 341, because the condition on line 340 was never true
341 raise NameError("The specified algorithm '{}' is not supported by hashlib".format(algorithm))
343 hasher = hashlib.new(algorithm)
345 with open(filename, "rb") as f:
346 for chunk in iter(lambda: f.read(block_size), b""):
347 hasher.update(chunk)
349 return hasher.hexdigest()
351 def export(self, refs: Iterable[DatasetRef], *,
352 directory: Optional[str] = None, transfer: Optional[str] = None) -> Iterable[FileDataset]:
353 # Docstring inherited from Datastore.export.
354 for ref in refs:
355 fileLocations = self._get_dataset_locations_info(ref)
356 if not fileLocations: 356 ↛ 357line 356 didn't jump to line 357, because the condition on line 356 was never true
357 raise FileNotFoundError(f"Could not retrieve dataset {ref}.")
358 # For now we can not export disassembled datasets
359 if len(fileLocations) > 1:
360 raise NotImplementedError(f"Can not export disassembled datasets such as {ref}")
361 location, storedFileInfo = fileLocations[0]
362 if transfer is None: 362 ↛ 368line 362 didn't jump to line 368, because the condition on line 362 was never false
363 # TODO: do we also need to return the readStorageClass somehow?
364 yield FileDataset(refs=[ref], path=location.pathInStore, formatter=storedFileInfo.formatter)
365 else:
366 # TODO: add support for other transfer modes. If we support
367 # moving, this method should become transactional.
368 raise NotImplementedError(f"Transfer mode '{transfer}' not yet supported.")