Coverage for python/lsst/daf/butler/datastores/posixDatastore.py : 88%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24"""POSIX datastore."""
26__all__ = ("PosixDatastore", )
28import hashlib
29import logging
30import os
31import shutil
32from typing import (
33 TYPE_CHECKING,
34 Any,
35 ClassVar,
36 Iterable,
37 Optional,
38 Type,
39 Union
40)
42from .fileLikeDatastore import FileLikeDatastore
43from lsst.daf.butler.core.utils import safeMakeDir
44from lsst.daf.butler import ButlerURI, FileDataset, StoredFileInfo, Formatter, DatasetRef
46if TYPE_CHECKING: 46 ↛ 47line 46 didn't jump to line 47, because the condition on line 46 was never true
47 from .fileLikeDatastore import DatastoreFileGetInformation
48 from lsst.daf.butler import DatastoreConfig, Location
49 from lsst.daf.butler.registry.interfaces import DatastoreRegistryBridgeManager
51log = logging.getLogger(__name__)
54class PosixDatastore(FileLikeDatastore):
55 """Basic POSIX filesystem backed Datastore.
57 Parameters
58 ----------
59 config : `DatastoreConfig` or `str`
60 Configuration. A string should refer to the name of the config file.
61 bridgeManager : `DatastoreRegistryBridgeManager`
62 Object that manages the interface between `Registry` and datastores.
63 butlerRoot : `str`, optional
64 New datastore root to use to override the configuration value.
66 Raises
67 ------
68 ValueError
69 If root location does not exist and ``create`` is `False` in the
70 configuration.
72 Notes
73 -----
74 PosixDatastore supports all transfer modes for file-based ingest:
75 `"move"`, `"copy"`, `"symlink"`, `"hardlink"`, `"relsymlink"`
76 and `None` (no transfer).
77 """
79 defaultConfigFile: ClassVar[Optional[str]] = "datastores/posixDatastore.yaml"
80 """Path to configuration defaults. Accessed within the ``config`` resource
81 or relative to a search path. Can be None if no defaults specified.
82 """
84 def __init__(self, config: Union[DatastoreConfig, str],
85 bridgeManager: DatastoreRegistryBridgeManager, butlerRoot: str = None):
86 super().__init__(config, bridgeManager, butlerRoot)
88 # Check that root is a valid URI for this datastore
89 root = ButlerURI(self.root)
90 if root.scheme and root.scheme != "file": 90 ↛ 91line 90 didn't jump to line 91, because the condition on line 90 was never true
91 raise ValueError(f"Root location must only be a file URI not {self.root}")
93 self.root = root.path
94 if not os.path.isdir(self.root):
95 if "create" not in self.config or not self.config["create"]: 95 ↛ 96line 95 didn't jump to line 96, because the condition on line 95 was never true
96 raise ValueError(f"No valid root at: {self.root}")
97 safeMakeDir(self.root)
99 def _artifact_exists(self, location: Location) -> bool:
100 """Check that an artifact exists in this datastore at the specified
101 location.
103 Parameters
104 ----------
105 location : `Location`
106 Expected location of the artifact associated with this datastore.
108 Returns
109 -------
110 exists : `bool`
111 True if the location can be found, false otherwise.
112 """
113 return os.path.exists(location.path)
115 def _delete_artifact(self, location: Location) -> None:
116 """Delete the artifact from the datastore.
118 Parameters
119 ----------
120 location : `Location`
121 Location of the artifact associated with this datastore.
122 """
123 os.remove(location.path)
125 def _read_artifact_into_memory(self, getInfo: DatastoreFileGetInformation,
126 ref: DatasetRef, isComponent: bool = False) -> Any:
127 location = getInfo.location
129 # Too expensive to recalculate the checksum on fetch
130 # but we can check size and existence
131 if not os.path.exists(location.path):
132 raise FileNotFoundError("Dataset with Id {} does not seem to exist at"
133 " expected location of {}".format(ref.id, location.path))
134 stat = os.stat(location.path)
135 size = stat.st_size
136 storedFileInfo = getInfo.info
137 if size != storedFileInfo.file_size: 137 ↛ 138line 137 didn't jump to line 138, because the condition on line 137 was never true
138 raise RuntimeError("Integrity failure in Datastore. Size of file {} ({}) does not"
139 " match recorded size of {}".format(location.path, size,
140 storedFileInfo.file_size))
142 formatter = getInfo.formatter
143 try:
144 result = formatter.read(component=getInfo.component if isComponent else None)
145 except Exception as e:
146 raise ValueError(f"Failure from formatter '{formatter.name()}' for dataset {ref.id}"
147 f" ({ref.datasetType.name} from {location.path}): {e}") from e
149 return self._post_process_get(result, getInfo.readStorageClass, getInfo.assemblerParams,
150 isComponent=isComponent)
152 def _write_in_memory_to_artifact(self, inMemoryDataset: Any, ref: DatasetRef) -> StoredFileInfo:
153 # Inherit docstring
155 location, formatter = self._prepare_for_put(inMemoryDataset, ref)
157 storageDir = os.path.dirname(location.path)
158 if not os.path.isdir(storageDir):
159 # Never try to remove this after creating it since there might
160 # be a butler ingest process running concurrently that will
161 # already think this directory exists.
162 safeMakeDir(storageDir)
164 # Write the file
165 predictedFullPath = os.path.join(self.root, formatter.predictPath())
167 if os.path.exists(predictedFullPath):
168 raise FileExistsError(f"Cannot write file for ref {ref} as "
169 f"output file {predictedFullPath} already exists")
171 def _removeFileExists(path: str) -> None:
172 """Remove a file and do not complain if it is not there.
174 This is important since a formatter might fail before the file
175 is written and we should not confuse people by writing spurious
176 error messages to the log.
177 """
178 try:
179 os.remove(path)
180 except FileNotFoundError:
181 pass
183 if self._transaction is None: 183 ↛ 184line 183 didn't jump to line 184, because the condition on line 183 was never true
184 raise RuntimeError("Attempting to write dataset without transaction enabled")
186 formatter_exception = None
187 with self._transaction.undoWith("write", _removeFileExists, predictedFullPath):
188 try:
189 path = formatter.write(inMemoryDataset)
190 log.debug("Wrote file to %s", path)
191 except Exception as e:
192 formatter_exception = e
194 if formatter_exception:
195 raise formatter_exception
197 assert predictedFullPath == os.path.join(self.root, path)
199 return self._extractIngestInfo(path, ref, formatter=formatter)
201 def _overrideTransferMode(self, *datasets: FileDataset, transfer: Optional[str] = None) -> Optional[str]:
202 # Docstring inherited from base class
203 if transfer != "auto":
204 return transfer
206 # See if the paths are within the datastore or not
207 inside = [self._pathInStore(d.path) is not None for d in datasets]
209 if all(inside):
210 transfer = None
211 elif not any(inside): 211 ↛ 214line 211 didn't jump to line 214, because the condition on line 211 was never false
212 transfer = "link"
213 else:
214 raise ValueError("Some datasets are inside the datastore and some are outside."
215 " Please use an explicit transfer mode and not 'auto'.")
217 return transfer
219 def _pathInStore(self, path: str) -> Optional[str]:
220 """Return path relative to datastore root
222 Parameters
223 ----------
224 path : `str`
225 Path to dataset. Can be absolute path. Returns path in datastore
226 or raises an exception if the path it outside.
228 Returns
229 -------
230 inStore : `str`
231 Path relative to datastore root. Returns `None` if the file is
232 outside the root.
233 """
234 if os.path.isabs(path):
235 absRoot = os.path.abspath(self.root)
236 if os.path.commonpath([absRoot, path]) != absRoot: 236 ↛ 238line 236 didn't jump to line 238, because the condition on line 236 was never false
237 return None
238 return os.path.relpath(path, absRoot)
239 elif path.startswith(os.path.pardir):
240 return None
241 return path
243 def _standardizeIngestPath(self, path: str, *, transfer: Optional[str] = None) -> str:
244 # Docstring inherited from FileLikeDatastore._standardizeIngestPath.
245 fullPath = os.path.normpath(os.path.join(self.root, path))
246 if not os.path.exists(fullPath):
247 raise FileNotFoundError(f"File at '{fullPath}' does not exist; note that paths to ingest "
248 f"are assumed to be relative to self.root unless they are absolute.")
249 if transfer is None:
250 # Can not reuse path var because of typing
251 pathx = self._pathInStore(path)
252 if pathx is None:
253 raise RuntimeError(f"'{path}' is not inside repository root '{self.root}'.")
254 path = pathx
255 return path
257 def _extractIngestInfo(self, path: str, ref: DatasetRef, *,
258 formatter: Union[Formatter, Type[Formatter]],
259 transfer: Optional[str] = None) -> StoredFileInfo:
260 # Docstring inherited from FileLikeDatastore._extractIngestInfo.
261 if self._transaction is None: 261 ↛ 262line 261 didn't jump to line 262, because the condition on line 261 was never true
262 raise RuntimeError("Ingest called without transaction enabled")
264 fullPath = os.path.normpath(os.path.join(self.root, path))
265 if transfer is not None:
266 # Work out the name we want this ingested file to have
267 # inside the datastore
268 location = self._calculate_ingested_datastore_name(ButlerURI(fullPath), ref, formatter)
270 newPath = location.pathInStore
271 newFullPath = location.path
272 if os.path.exists(newFullPath):
273 raise FileExistsError(f"File '{newFullPath}' already exists.")
274 storageDir = os.path.dirname(newFullPath)
275 if not os.path.isdir(storageDir):
276 # Do not attempt to reverse directory creation
277 # because of race conditions with other processes running
278 # ingest in parallel.
279 safeMakeDir(storageDir)
280 if transfer == "move":
281 with self._transaction.undoWith("move", shutil.move, newFullPath, fullPath):
282 shutil.move(fullPath, newFullPath)
283 elif transfer == "copy":
284 with self._transaction.undoWith("copy", os.remove, newFullPath):
285 shutil.copy(fullPath, newFullPath)
286 elif transfer == "link":
287 with self._transaction.undoWith("link", os.unlink, newFullPath):
288 realPath = os.path.realpath(fullPath)
289 # Try hard link and if that fails use a symlink
290 try:
291 os.link(realPath, newFullPath)
292 except OSError:
293 # Read through existing symlinks
294 os.symlink(realPath, newFullPath)
295 elif transfer == "hardlink":
296 with self._transaction.undoWith("hardlink", os.unlink, newFullPath):
297 os.link(os.path.realpath(fullPath), newFullPath)
298 elif transfer == "symlink":
299 with self._transaction.undoWith("symlink", os.unlink, newFullPath):
300 # Read through existing symlinks
301 os.symlink(os.path.realpath(fullPath), newFullPath)
302 elif transfer == "relsymlink": 302 ↛ 313line 302 didn't jump to line 313, because the condition on line 302 was never false
303 # This is a standard symlink but using a relative path
304 fullPath = os.path.realpath(fullPath)
306 # Need the directory name to give to relative root
307 # A full file path confuses it into an extra ../
308 newFullPathRoot, _ = os.path.split(newFullPath)
309 relPath = os.path.relpath(fullPath, newFullPathRoot)
310 with self._transaction.undoWith("relsymlink", os.unlink, newFullPath):
311 os.symlink(relPath, newFullPath)
312 else:
313 raise NotImplementedError("Transfer type '{}' not supported.".format(transfer))
314 path = newPath
315 fullPath = newFullPath
316 checksum = self.computeChecksum(fullPath) if self.useChecksum else None
317 stat = os.stat(fullPath)
318 size = stat.st_size
319 return StoredFileInfo(formatter=formatter, path=path, storageClass=ref.datasetType.storageClass,
320 component=ref.datasetType.component(),
321 file_size=size, checksum=checksum)
323 @staticmethod
324 def computeChecksum(filename: str, algorithm: str = "blake2b", block_size: int = 8192) -> str:
325 """Compute the checksum of the supplied file.
327 Parameters
328 ----------
329 filename : `str`
330 Name of file to calculate checksum from.
331 algorithm : `str`, optional
332 Name of algorithm to use. Must be one of the algorithms supported
333 by :py:class`hashlib`.
334 block_size : `int`
335 Number of bytes to read from file at one time.
337 Returns
338 -------
339 hexdigest : `str`
340 Hex digest of the file.
341 """
342 if algorithm not in hashlib.algorithms_guaranteed: 342 ↛ 343line 342 didn't jump to line 343, because the condition on line 342 was never true
343 raise NameError("The specified algorithm '{}' is not supported by hashlib".format(algorithm))
345 hasher = hashlib.new(algorithm)
347 with open(filename, "rb") as f:
348 for chunk in iter(lambda: f.read(block_size), b""):
349 hasher.update(chunk)
351 return hasher.hexdigest()
353 def export(self, refs: Iterable[DatasetRef], *,
354 directory: Optional[str] = None, transfer: Optional[str] = None) -> Iterable[FileDataset]:
355 # Docstring inherited from Datastore.export.
356 for ref in refs:
357 fileLocations = self._get_dataset_locations_info(ref)
358 if not fileLocations: 358 ↛ 359line 358 didn't jump to line 359, because the condition on line 358 was never true
359 raise FileNotFoundError(f"Could not retrieve dataset {ref}.")
360 # For now we can not export disassembled datasets
361 if len(fileLocations) > 1:
362 raise NotImplementedError(f"Can not export disassembled datasets such as {ref}")
363 location, storedFileInfo = fileLocations[0]
364 if transfer is None: 364 ↛ 370line 364 didn't jump to line 370, because the condition on line 364 was never false
365 # TODO: do we also need to return the readStorageClass somehow?
366 yield FileDataset(refs=[ref], path=location.pathInStore, formatter=storedFileInfo.formatter)
367 else:
368 # TODO: add support for other transfer modes. If we support
369 # moving, this method should become transactional.
370 raise NotImplementedError(f"Transfer mode '{transfer}' not yet supported.")