Coverage for python/lsst/daf/butler/datastores/posixDatastore.py : 87%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24"""POSIX datastore."""
26__all__ = ("PosixDatastore", )
28import hashlib
29import logging
30import os
31from typing import (
32 TYPE_CHECKING,
33 Any,
34 ClassVar,
35 Optional,
36 Type,
37 Union
38)
40from .fileLikeDatastore import FileLikeDatastore
41from lsst.daf.butler.core.utils import safeMakeDir
42from lsst.daf.butler import ButlerURI, FileDataset, StoredFileInfo, Formatter, DatasetRef
44if TYPE_CHECKING: 44 ↛ 45line 44 didn't jump to line 45, because the condition on line 44 was never true
45 from .fileLikeDatastore import DatastoreFileGetInformation
46 from lsst.daf.butler import DatastoreConfig, Location
47 from lsst.daf.butler.registry.interfaces import DatastoreRegistryBridgeManager
49log = logging.getLogger(__name__)
52class PosixDatastore(FileLikeDatastore):
53 """Basic POSIX filesystem backed Datastore.
55 Parameters
56 ----------
57 config : `DatastoreConfig` or `str`
58 Configuration. A string should refer to the name of the config file.
59 bridgeManager : `DatastoreRegistryBridgeManager`
60 Object that manages the interface between `Registry` and datastores.
61 butlerRoot : `str`, optional
62 New datastore root to use to override the configuration value.
64 Raises
65 ------
66 ValueError
67 If root location does not exist and ``create`` is `False` in the
68 configuration.
70 Notes
71 -----
72 PosixDatastore supports all transfer modes for file-based ingest:
73 `"move"`, `"copy"`, `"symlink"`, `"hardlink"`, `"relsymlink"`
74 and `None` (no transfer).
75 """
77 defaultConfigFile: ClassVar[Optional[str]] = "datastores/posixDatastore.yaml"
78 """Path to configuration defaults. Accessed within the ``config`` resource
79 or relative to a search path. Can be None if no defaults specified.
80 """
82 def __init__(self, config: Union[DatastoreConfig, str],
83 bridgeManager: DatastoreRegistryBridgeManager, butlerRoot: str = None):
84 super().__init__(config, bridgeManager, butlerRoot)
86 # Check that root is a valid URI for this datastore
87 root = ButlerURI(self.root, forceDirectory=True)
88 if root.scheme and root.scheme != "file": 88 ↛ 89line 88 didn't jump to line 89, because the condition on line 88 was never true
89 raise ValueError(f"Root location must only be a file URI not {self.root}")
91 self.root = root.ospath
92 if not os.path.isdir(self.root):
93 if "create" not in self.config or not self.config["create"]: 93 ↛ 94line 93 didn't jump to line 94, because the condition on line 93 was never true
94 raise ValueError(f"No valid root at: {self.root}")
95 safeMakeDir(self.root)
97 def _artifact_exists(self, location: Location) -> bool:
98 """Check that an artifact exists in this datastore at the specified
99 location.
101 Parameters
102 ----------
103 location : `Location`
104 Expected location of the artifact associated with this datastore.
106 Returns
107 -------
108 exists : `bool`
109 True if the location can be found, false otherwise.
110 """
111 return os.path.exists(location.path)
113 def _delete_artifact(self, location: Location) -> None:
114 """Delete the artifact from the datastore.
116 Parameters
117 ----------
118 location : `Location`
119 Location of the artifact associated with this datastore.
120 """
121 os.remove(location.path)
123 def _read_artifact_into_memory(self, getInfo: DatastoreFileGetInformation,
124 ref: DatasetRef, isComponent: bool = False) -> Any:
125 location = getInfo.location
127 # Too expensive to recalculate the checksum on fetch
128 # but we can check size and existence
129 if not os.path.exists(location.path):
130 raise FileNotFoundError("Dataset with Id {} does not seem to exist at"
131 " expected location of {}".format(ref.id, location.path))
132 stat = os.stat(location.path)
133 size = stat.st_size
134 storedFileInfo = getInfo.info
135 if size != storedFileInfo.file_size: 135 ↛ 136line 135 didn't jump to line 136, because the condition on line 135 was never true
136 raise RuntimeError("Integrity failure in Datastore. Size of file {} ({}) does not"
137 " match recorded size of {}".format(location.path, size,
138 storedFileInfo.file_size))
140 formatter = getInfo.formatter
141 try:
142 log.debug("Reading %s from location %s with formatter %s",
143 f"component {getInfo.component}" if isComponent else "",
144 location.uri, type(formatter).__name__)
145 result = formatter.read(component=getInfo.component if isComponent else None)
146 except Exception as e:
147 raise ValueError(f"Failure from formatter '{formatter.name()}' for dataset {ref.id}"
148 f" ({ref.datasetType.name} from {location.path}): {e}") from e
150 return self._post_process_get(result, getInfo.readStorageClass, getInfo.assemblerParams,
151 isComponent=isComponent)
153 def _write_in_memory_to_artifact(self, inMemoryDataset: Any, ref: DatasetRef) -> StoredFileInfo:
154 # Inherit docstring
156 location, formatter = self._prepare_for_put(inMemoryDataset, ref)
158 storageDir = os.path.dirname(location.path)
159 if not os.path.isdir(storageDir):
160 # Never try to remove this after creating it since there might
161 # be a butler ingest process running concurrently that will
162 # already think this directory exists.
163 safeMakeDir(storageDir)
165 # Write the file
166 predictedFullPath = os.path.join(self.root, formatter.predictPath())
168 if os.path.exists(predictedFullPath):
169 raise FileExistsError(f"Cannot write file for ref {ref} as "
170 f"output file {predictedFullPath} already exists")
172 def _removeFileExists(path: str) -> None:
173 """Remove a file and do not complain if it is not there.
175 This is important since a formatter might fail before the file
176 is written and we should not confuse people by writing spurious
177 error messages to the log.
178 """
179 try:
180 os.remove(path)
181 except FileNotFoundError:
182 pass
184 if self._transaction is None: 184 ↛ 185line 184 didn't jump to line 185, because the condition on line 184 was never true
185 raise RuntimeError("Attempting to write dataset without transaction enabled")
187 formatter_exception = None
188 with self._transaction.undoWith("write", _removeFileExists, predictedFullPath):
189 try:
190 path = formatter.write(inMemoryDataset)
191 log.debug("Wrote file to %s", path)
192 except Exception as e:
193 formatter_exception = e
195 if formatter_exception:
196 raise formatter_exception
198 assert predictedFullPath == os.path.join(self.root, path)
200 return self._extractIngestInfo(path, ref, formatter=formatter)
202 def _overrideTransferMode(self, *datasets: FileDataset, transfer: Optional[str] = None) -> Optional[str]:
203 # Docstring inherited from base class
204 if transfer != "auto":
205 return transfer
207 # See if the paths are within the datastore or not
208 inside = [self._pathInStore(d.path) is not None for d in datasets]
210 if all(inside):
211 transfer = None
212 elif not any(inside): 212 ↛ 215line 212 didn't jump to line 215, because the condition on line 212 was never false
213 transfer = "link"
214 else:
215 raise ValueError("Some datasets are inside the datastore and some are outside."
216 " Please use an explicit transfer mode and not 'auto'.")
218 return transfer
220 def _pathInStore(self, path: str) -> Optional[str]:
221 """Return path relative to datastore root
223 Parameters
224 ----------
225 path : `str`
226 Path to dataset. Can be absolute path. Returns path in datastore
227 or raises an exception if the path it outside.
229 Returns
230 -------
231 inStore : `str`
232 Path relative to datastore root. Returns `None` if the file is
233 outside the root.
234 """
235 pathUri = ButlerURI(path, forceAbsolute=False)
236 rootUri = ButlerURI(self.root, forceDirectory=True, forceAbsolute=True)
237 return pathUri.relative_to(rootUri)
239 def _standardizeIngestPath(self, path: str, *, transfer: Optional[str] = None) -> str:
240 # Docstring inherited from FileLikeDatastore._standardizeIngestPath.
241 fullPath = os.path.normpath(os.path.join(self.root, path))
242 if not os.path.exists(fullPath):
243 raise FileNotFoundError(f"File at '{fullPath}' does not exist; note that paths to ingest "
244 f"are assumed to be relative to self.root unless they are absolute.")
245 if transfer is None:
246 # Can not reuse path var because of typing
247 pathx = self._pathInStore(path)
248 if pathx is None:
249 raise RuntimeError(f"'{path}' is not inside repository root '{self.root}'.")
250 path = pathx
251 return path
253 def _extractIngestInfo(self, path: Union[str, ButlerURI], ref: DatasetRef, *,
254 formatter: Union[Formatter, Type[Formatter]],
255 transfer: Optional[str] = None) -> StoredFileInfo:
256 # Docstring inherited from FileLikeDatastore._extractIngestInfo.
257 if self._transaction is None: 257 ↛ 258line 257 didn't jump to line 258, because the condition on line 257 was never true
258 raise RuntimeError("Ingest called without transaction enabled")
260 # Calculate the full path to the source
261 srcUri = ButlerURI(path, root=self.root, forceAbsolute=True)
262 if transfer is None:
263 # File should exist already
264 rootUri = ButlerURI(self.root, forceDirectory=True)
265 pathInStore = srcUri.relative_to(rootUri)
266 if pathInStore is None: 266 ↛ 267line 266 didn't jump to line 267, because the condition on line 266 was never true
267 raise RuntimeError(f"Unexpectedly learned that {srcUri} is not within datastore {rootUri}")
268 if not rootUri.exists(): 268 ↛ 269line 268 didn't jump to line 269, because the condition on line 268 was never true
269 raise RuntimeError(f"Unexpectedly discovered that {srcUri} does not exist inside datastore"
270 f" {rootUri}")
271 path = pathInStore
272 fullPath = srcUri.ospath
273 elif transfer is not None: 273 ↛ 282line 273 didn't jump to line 282, because the condition on line 273 was never false
274 # Work out the name we want this ingested file to have
275 # inside the datastore
276 location = self._calculate_ingested_datastore_name(srcUri, ref, formatter)
277 path = location.pathInStore
278 fullPath = location.path
279 targetUri = ButlerURI(location.uri)
280 targetUri.transfer_from(srcUri, transfer=transfer, transaction=self._transaction)
282 checksum = self.computeChecksum(fullPath) if self.useChecksum else None
283 stat = os.stat(fullPath)
284 size = stat.st_size
285 return StoredFileInfo(formatter=formatter, path=path, storageClass=ref.datasetType.storageClass,
286 component=ref.datasetType.component(),
287 file_size=size, checksum=checksum)
289 @staticmethod
290 def computeChecksum(filename: str, algorithm: str = "blake2b", block_size: int = 8192) -> str:
291 """Compute the checksum of the supplied file.
293 Parameters
294 ----------
295 filename : `str`
296 Name of file to calculate checksum from.
297 algorithm : `str`, optional
298 Name of algorithm to use. Must be one of the algorithms supported
299 by :py:class`hashlib`.
300 block_size : `int`
301 Number of bytes to read from file at one time.
303 Returns
304 -------
305 hexdigest : `str`
306 Hex digest of the file.
307 """
308 if algorithm not in hashlib.algorithms_guaranteed: 308 ↛ 309line 308 didn't jump to line 309, because the condition on line 308 was never true
309 raise NameError("The specified algorithm '{}' is not supported by hashlib".format(algorithm))
311 hasher = hashlib.new(algorithm)
313 with open(filename, "rb") as f:
314 for chunk in iter(lambda: f.read(block_size), b""):
315 hasher.update(chunk)
317 return hasher.hexdigest()