Coverage for python/lsst/resources/file.py: 93%
180 statements
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-15 00:04 +0000
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-15 00:04 +0000
1# This file is part of lsst-resources.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# Use of this source code is governed by a 3-clause BSD-style
10# license that can be found in the LICENSE file.
12from __future__ import annotations
14import contextlib
15import copy
16import logging
17import os
18import os.path
19import posixpath
20import re
21import shutil
22import urllib.parse
24__all__ = ("FileResourcePath",)
26from typing import IO, TYPE_CHECKING, Iterator, List, Optional, Tuple, Union
28from ._resourcePath import ResourcePath
29from .utils import NoTransaction, os2posix, posix2os
31if TYPE_CHECKING: 31 ↛ 32line 31 didn't jump to line 32, because the condition on line 31 was never true
32 from .utils import TransactionProtocol
35log = logging.getLogger(__name__)
38class FileResourcePath(ResourcePath):
39 """Path for explicit ``file`` URI scheme."""
41 transferModes = ("copy", "link", "symlink", "hardlink", "relsymlink", "auto", "move")
42 transferDefault: str = "link"
44 # By definition refers to a local file
45 isLocal = True
47 @property
48 def ospath(self) -> str:
49 """Path component of the URI localized to current OS.
51 Will unquote URI path since a formal URI must include the quoting.
52 """
53 return urllib.parse.unquote(posix2os(self._uri.path))
55 def exists(self) -> bool:
56 """Indicate that the file exists."""
57 # Uses os.path.exists so if there is a soft link that points
58 # to a file that no longer exists this will return False
59 return os.path.exists(self.ospath)
61 def size(self) -> int:
62 """Return the size of the file in bytes."""
63 if not os.path.isdir(self.ospath):
64 stat = os.stat(self.ospath)
65 sz = stat.st_size
66 else:
67 sz = 0
68 return sz
70 def remove(self) -> None:
71 """Remove the resource."""
72 os.remove(self.ospath)
74 def _as_local(self) -> Tuple[str, bool]:
75 """Return the local path of the file.
77 This is an internal helper for ``as_local()``.
79 Returns
80 -------
81 path : `str`
82 The local path to this file.
83 temporary : `bool`
84 Always returns the temporary nature of the input file resource.
85 """
86 return self.ospath, self.isTemporary
88 def read(self, size: int = -1) -> bytes:
89 """Return the entire content of the file as bytes."""
90 with open(self.ospath, "rb") as fh:
91 return fh.read(size)
93 def write(self, data: bytes, overwrite: bool = True) -> None:
94 """Write the supplied data to the file."""
95 dir = os.path.dirname(self.ospath)
96 if not os.path.exists(dir):
97 os.makedirs(dir, exist_ok=True)
98 if overwrite:
99 mode = "wb"
100 else:
101 mode = "xb"
102 with open(self.ospath, mode) as f:
103 f.write(data)
105 def mkdir(self) -> None:
106 """Make the directory associated with this URI.
108 An attempt will be made to create the directory even if the URI
109 looks like a file.
111 Raises
112 ------
113 NotADirectoryError:
114 Raised if a non-directory already exists.
115 """
116 try:
117 os.makedirs(self.ospath, exist_ok=True)
118 except FileExistsError:
119 raise NotADirectoryError(f"{self.ospath} exists but is not a directory.") from None
121 def isdir(self) -> bool:
122 """Return whether this URI is a directory.
124 Returns
125 -------
126 isdir : `bool`
127 `True` if this URI is a directory or looks like a directory,
128 else `False`.
129 """
130 return self.dirLike or os.path.isdir(self.ospath)
132 def transfer_from(
133 self,
134 src: ResourcePath,
135 transfer: str,
136 overwrite: bool = False,
137 transaction: Optional[TransactionProtocol] = None,
138 ) -> None:
139 """Transfer the current resource to a local file.
141 Parameters
142 ----------
143 src : `ResourcePath`
144 Source URI.
145 transfer : `str`
146 Mode to use for transferring the resource. Supports the following
147 options: copy, link, symlink, hardlink, relsymlink.
148 overwrite : `bool`, optional
149 Allow an existing file to be overwritten. Defaults to `False`.
150 transaction : `~lsst.resources.utils.TransactionProtocol`, optional
151 If a transaction is provided, undo actions will be registered.
152 """
153 # Fail early to prevent delays if remote resources are requested
154 if transfer not in self.transferModes:
155 raise ValueError(f"Transfer mode '{transfer}' not supported by URI scheme {self.scheme}")
157 # Existence checks can take time so only try if the log message
158 # will be issued.
159 if log.isEnabledFor(logging.DEBUG):
160 log.debug(
161 "Transferring %s [exists: %s] -> %s [exists: %s] (transfer=%s)",
162 src,
163 src.exists(),
164 self,
165 self.exists(),
166 transfer,
167 )
169 # We do not have to special case FileResourcePath here because
170 # as_local handles that.
171 with src.as_local() as local_uri:
172 is_temporary = local_uri.isTemporary
173 local_src = local_uri.ospath
175 # Short circuit if the URIs are identical immediately.
176 if self == local_uri:
177 log.debug(
178 "Target and destination URIs are identical: %s, returning immediately."
179 " No further action required.",
180 self,
181 )
182 return
184 # Default transfer mode depends on whether we have a temporary
185 # file or not.
186 if transfer == "auto":
187 transfer = self.transferDefault if not is_temporary else "copy"
189 if not os.path.exists(local_src):
190 if is_temporary:
191 if src == local_uri: 191 ↛ 195line 191 didn't jump to line 195, because the condition on line 191 was never false
192 msg = f"Local temporary file {src} has gone missing."
193 else:
194 # This will not happen in normal scenarios.
195 msg = f"Local file {local_uri} downloaded from {src} has gone missing"
196 else:
197 msg = f"Source URI {src} does not exist"
198 raise FileNotFoundError(msg)
200 # Follow soft links
201 local_src = os.path.realpath(os.path.normpath(local_src))
203 # Creating a symlink to a local copy of a remote resource
204 # should never work. Creating a hardlink will work but should
205 # not be allowed since it is highly unlikely that this is ever
206 # an intended option and depends on the local target being
207 # on the same file system as was used for the temporary file
208 # download.
209 # If a symlink is being requested for a local temporary file
210 # that is likely undesirable but should not be refused.
211 if is_temporary and src != local_uri and "link" in transfer:
212 raise RuntimeError(
213 f"Can not use local file system transfer mode {transfer} for remote resource ({src})"
214 )
215 elif is_temporary and src == local_uri and "symlink" in transfer:
216 log.debug(
217 "Using a symlink for a temporary resource may lead to unexpected downstream failures."
218 )
220 # For temporary files we can own them if we created it.
221 requested_transfer = transfer
222 if src != local_uri and is_temporary and transfer == "copy":
223 transfer = "move"
225 # The output location should not exist unless overwrite=True.
226 # Rather than use `exists()`, use os.stat since we might need
227 # the full answer later.
228 dest_stat: Optional[os.stat_result]
229 try:
230 # Do not read through links of the file itself.
231 dest_stat = os.lstat(self.ospath)
232 except FileNotFoundError:
233 dest_stat = None
235 # It is possible that the source URI and target URI refer
236 # to the same file. This can happen for a number of reasons
237 # (such as soft links in the path, or they really are the same).
238 # In that case log a message and return as if the transfer
239 # completed (it technically did). A temporary file download
240 # can't be the same so the test can be skipped.
241 if dest_stat and not is_temporary:
242 # Be consistent and use lstat here (even though realpath
243 # has been called). It does not harm.
244 local_src_stat = os.lstat(local_src)
245 if dest_stat.st_ino == local_src_stat.st_ino and dest_stat.st_dev == local_src_stat.st_dev:
246 log.debug(
247 "Destination URI %s is the same file as source URI %s, returning immediately."
248 " No further action required.",
249 self,
250 local_uri,
251 )
252 return
254 if not overwrite and dest_stat:
255 raise FileExistsError(
256 f"Destination path '{self}' already exists. Transfer from {src} cannot be completed."
257 )
259 # Make the path absolute (but don't follow links since that
260 # would possibly cause us to end up in the wrong place if the
261 # file existed already as a soft link)
262 newFullPath = os.path.abspath(self.ospath)
263 outputDir = os.path.dirname(newFullPath)
264 if not os.path.isdir(outputDir):
265 # Must create the directory -- this can not be rolled back
266 # since another transfer running concurrently may
267 # be relying on this existing.
268 os.makedirs(outputDir, exist_ok=True)
270 if transaction is None: 270 ↛ 277line 270 didn't jump to line 277, because the condition on line 270 was never false
271 # Use a no-op transaction to reduce code duplication
272 transaction = NoTransaction()
274 # For links the OS doesn't let us overwrite so if something does
275 # exist we have to remove it before we do the actual "transfer"
276 # below
277 if "link" in transfer and overwrite and dest_stat:
278 try:
279 self.remove()
280 except Exception:
281 # If this fails we ignore it since it's a problem
282 # that will manifest immediately below with a more relevant
283 # error message
284 pass
286 if transfer == "move":
287 # If a rename works we try that since that is guaranteed to
288 # be atomic. If that fails we copy and rename. We do this
289 # in case other processes are trying to move to the same
290 # file and we want the "winner" to not be corrupted.
291 try:
292 with transaction.undoWith(f"move from {local_src}", os.rename, newFullPath, local_src):
293 os.rename(local_src, newFullPath)
294 except OSError:
295 with self.temporary_uri(prefix=self.parent(), suffix=self.getExtension()) as temp_copy:
296 shutil.copy(local_src, temp_copy.ospath)
297 with transaction.undoWith(
298 f"move from {local_src}",
299 shutil.move,
300 newFullPath,
301 local_src,
302 copy_function=shutil.copy,
303 ):
304 os.rename(temp_copy.ospath, newFullPath)
305 os.remove(local_src)
306 elif transfer == "copy":
307 # We want atomic copies so first copy to a temp location in
308 # the same output directory. This at least guarantees that
309 # if multiple processes are writing to the same file
310 # simultaneously the file we end up with will not be corrupt.
311 with self.temporary_uri(prefix=self.parent(), suffix=self.getExtension()) as temp_copy:
312 shutil.copy(local_src, temp_copy.ospath)
313 with transaction.undoWith(f"copy from {local_src}", os.remove, newFullPath):
314 # os.rename works even if the file exists.
315 # It's possible that another process has copied a file
316 # in whilst this one was copying. If overwrite
317 # protection is needed then another stat() call should
318 # happen here.
319 os.rename(temp_copy.ospath, newFullPath)
320 elif transfer == "link":
321 # Try hard link and if that fails use a symlink
322 with transaction.undoWith(f"link to {local_src}", os.remove, newFullPath):
323 try:
324 os.link(local_src, newFullPath)
325 except OSError:
326 # Read through existing symlinks
327 os.symlink(local_src, newFullPath)
328 elif transfer == "hardlink":
329 with transaction.undoWith(f"hardlink to {local_src}", os.remove, newFullPath):
330 os.link(local_src, newFullPath)
331 elif transfer == "symlink":
332 # Read through existing symlinks
333 with transaction.undoWith(f"symlink to {local_src}", os.remove, newFullPath):
334 os.symlink(local_src, newFullPath)
335 elif transfer == "relsymlink": 335 ↛ 344line 335 didn't jump to line 344, because the condition on line 335 was never false
336 # This is a standard symlink but using a relative path
337 # Need the directory name to give to relative root
338 # A full file path confuses it into an extra ../
339 newFullPathRoot = os.path.dirname(newFullPath)
340 relPath = os.path.relpath(local_src, newFullPathRoot)
341 with transaction.undoWith(f"relsymlink to {local_src}", os.remove, newFullPath):
342 os.symlink(relPath, newFullPath)
343 else:
344 raise NotImplementedError("Transfer type '{}' not supported.".format(transfer))
346 # This was an explicit move requested from a remote resource
347 # try to remove that remote resource. We check is_temporary because
348 # the local file would have been moved by shutil.move already.
349 if requested_transfer == "move" and is_temporary and src != local_uri:
350 # Transactions do not work here
351 src.remove()
353 def walk(
354 self, file_filter: Optional[Union[str, re.Pattern]] = None
355 ) -> Iterator[Union[List, Tuple[ResourcePath, List[str], List[str]]]]:
356 """Walk the directory tree returning matching files and directories.
358 Parameters
359 ----------
360 file_filter : `str` or `re.Pattern`, optional
361 Regex to filter out files from the list before it is returned.
363 Yields
364 ------
365 dirpath : `ResourcePath`
366 Current directory being examined.
367 dirnames : `list` of `str`
368 Names of subdirectories within dirpath.
369 filenames : `list` of `str`
370 Names of all the files within dirpath.
371 """
372 if not self.isdir():
373 raise ValueError("Can not walk a non-directory URI")
375 if isinstance(file_filter, str): 375 ↛ 376line 375 didn't jump to line 376, because the condition on line 375 was never true
376 file_filter = re.compile(file_filter)
378 for root, dirs, files in os.walk(self.ospath, followlinks=True):
379 # Filter by the regex
380 if file_filter is not None:
381 files = [f for f in files if file_filter.search(f)]
382 yield type(self)(root, forceAbsolute=False, forceDirectory=True), dirs, files
384 @classmethod
385 def _fixupPathUri(
386 cls,
387 parsed: urllib.parse.ParseResult,
388 root: Optional[Union[str, ResourcePath]] = None,
389 forceAbsolute: bool = False,
390 forceDirectory: bool = False,
391 ) -> Tuple[urllib.parse.ParseResult, bool]:
392 """Fix up relative paths in URI instances.
394 Parameters
395 ----------
396 parsed : `~urllib.parse.ParseResult`
397 The result from parsing a URI using `urllib.parse`.
398 root : `str` or `ResourcePath`, optional
399 Path to use as root when converting relative to absolute.
400 If `None`, it will be the current working directory. This
401 is a local file system path, or a file URI. It is only used if
402 a file-scheme is used incorrectly with a relative path.
403 forceAbsolute : `bool`, ignored
404 Has no effect for this subclass. ``file`` URIs are always
405 absolute.
406 forceDirectory : `bool`, optional
407 If `True` forces the URI to end with a separator, otherwise given
408 URI is interpreted as is.
410 Returns
411 -------
412 modified : `~urllib.parse.ParseResult`
413 Update result if a URI is being handled.
414 dirLike : `bool`
415 `True` if given parsed URI has a trailing separator or
416 forceDirectory is True. Otherwise `False`.
418 Notes
419 -----
420 Relative paths are explicitly not supported by RFC8089 but `urllib`
421 does accept URIs of the form ``file:relative/path.ext``. They need
422 to be turned into absolute paths before they can be used. This is
423 always done regardless of the ``forceAbsolute`` parameter.
424 """
425 # assume we are not dealing with a directory like URI
426 dirLike = False
428 # file URI implies POSIX path separators so split as POSIX,
429 # then join as os, and convert to abspath. Do not handle
430 # home directories since "file" scheme is explicitly documented
431 # to not do tilde expansion.
432 sep = posixpath.sep
434 # For local file system we can explicitly check to see if this
435 # really is a directory. The URI might point to a location that
436 # does not exists yet but all that matters is if it is a directory
437 # then we make sure use that fact. No need to do the check if
438 # we are already being told.
439 if not forceDirectory and posixpath.isdir(parsed.path):
440 forceDirectory = True
442 # For an absolute path all we need to do is check if we need
443 # to force the directory separator
444 if posixpath.isabs(parsed.path):
445 if forceDirectory:
446 if not parsed.path.endswith(sep):
447 parsed = parsed._replace(path=parsed.path + sep)
448 dirLike = True
449 return copy.copy(parsed), dirLike
451 # Relative path so must fix it to be compliant with the standard
453 # Replacement values for the URI
454 replacements = {}
456 if root is None:
457 root = os.path.abspath(os.path.curdir)
458 elif isinstance(root, ResourcePath): 458 ↛ 459line 458 didn't jump to line 459, because the condition on line 458 was never true
459 if root.scheme and root.scheme != "file":
460 raise RuntimeError(f"The override root must be a file URI not {root.scheme}")
461 root = os.path.abspath(root.ospath)
463 replacements["path"] = posixpath.normpath(posixpath.join(os2posix(root), parsed.path))
465 # normpath strips trailing "/" so put it back if necessary
466 # Acknowledge that trailing separator exists.
467 if forceDirectory or (parsed.path.endswith(sep) and not replacements["path"].endswith(sep)):
468 replacements["path"] += sep
469 dirLike = True
471 # ParseResult is a NamedTuple so _replace is standard API
472 parsed = parsed._replace(**replacements)
474 if parsed.params or parsed.query: 474 ↛ 475line 474 didn't jump to line 475, because the condition on line 474 was never true
475 log.warning("Additional items unexpectedly encountered in file URI: %s", parsed.geturl())
477 return parsed, dirLike
479 @contextlib.contextmanager
480 def open(
481 self,
482 mode: str = "r",
483 *,
484 encoding: Optional[str] = None,
485 prefer_file_temporary: bool = False,
486 ) -> Iterator[IO]:
487 # Docstring inherited.
488 with open(self.ospath, mode=mode, encoding=encoding) as buffer:
489 yield buffer