Coverage for python/lsst/resources/file.py: 95%
196 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-17 10:49 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-17 10:49 +0000
1# This file is part of lsst-resources.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# Use of this source code is governed by a 3-clause BSD-style
10# license that can be found in the LICENSE file.
12from __future__ import annotations
14__all__ = ("FileResourcePath",)
16import contextlib
17import copy
18import logging
19import os
20import os.path
21import posixpath
22import re
23import shutil
24import urllib.parse
25from collections.abc import Iterator
26from typing import IO, TYPE_CHECKING
28from ._resourceHandles._fileResourceHandle import FileResourceHandle
29from ._resourcePath import ResourcePath
30from .utils import NoTransaction, ensure_directory_is_writeable, os2posix, posix2os
32if TYPE_CHECKING:
33 from .utils import TransactionProtocol
36log = logging.getLogger(__name__)
39class FileResourcePath(ResourcePath):
40 """Path for explicit ``file`` URI scheme."""
42 transferModes = ("copy", "link", "symlink", "hardlink", "relsymlink", "auto", "move")
43 transferDefault: str = "link"
45 # By definition refers to a local file
46 isLocal = True
48 @property
49 def ospath(self) -> str:
50 """Path component of the URI localized to current OS.
52 Will unquote URI path since a formal URI must include the quoting.
53 """
54 return urllib.parse.unquote(posix2os(self._uri.path))
56 def exists(self) -> bool:
57 """Indicate that the file exists."""
58 # Uses os.path.exists so if there is a soft link that points
59 # to a file that no longer exists this will return False
60 return os.path.exists(self.ospath)
62 def size(self) -> int:
63 """Return the size of the file in bytes."""
64 if not os.path.isdir(self.ospath):
65 stat = os.stat(self.ospath)
66 sz = stat.st_size
67 else:
68 sz = 0
69 return sz
71 def remove(self) -> None:
72 """Remove the resource."""
73 os.remove(self.ospath)
75 def _as_local(self) -> tuple[str, bool]:
76 """Return the local path of the file.
78 This is an internal helper for ``as_local()``.
80 Returns
81 -------
82 path : `str`
83 The local path to this file.
84 temporary : `bool`
85 Always returns the temporary nature of the input file resource.
86 """
87 return self.ospath, self.isTemporary
89 def read(self, size: int = -1) -> bytes:
90 with open(self.ospath, "rb") as fh:
91 return fh.read(size)
93 def write(self, data: bytes, overwrite: bool = True) -> None:
94 dir = os.path.dirname(self.ospath)
95 if not os.path.exists(dir):
96 _create_directories(dir)
97 mode = "wb" if overwrite else "xb"
98 with open(self.ospath, mode) as f:
99 f.write(data)
101 def mkdir(self) -> None:
102 """Make the directory associated with this URI.
104 An attempt will be made to create the directory even if the URI
105 looks like a file.
107 Raises
108 ------
109 NotADirectoryError:
110 Raised if a non-directory already exists.
111 """
112 try:
113 _create_directories(self.ospath)
114 except FileExistsError:
115 raise NotADirectoryError(f"{self.ospath} exists but is not a directory.") from None
117 def isdir(self) -> bool:
118 """Return whether this URI is a directory.
120 Returns
121 -------
122 isdir : `bool`
123 `True` if this URI is a directory or looks like a directory,
124 else `False`.
125 """
126 return self.dirLike or os.path.isdir(self.ospath)
128 def transfer_from(
129 self,
130 src: ResourcePath,
131 transfer: str,
132 overwrite: bool = False,
133 transaction: TransactionProtocol | None = None,
134 ) -> None:
135 """Transfer the current resource to a local file.
137 Parameters
138 ----------
139 src : `ResourcePath`
140 Source URI.
141 transfer : `str`
142 Mode to use for transferring the resource. Supports the following
143 options: copy, link, symlink, hardlink, relsymlink.
144 overwrite : `bool`, optional
145 Allow an existing file to be overwritten. Defaults to `False`.
146 transaction : `~lsst.resources.utils.TransactionProtocol`, optional
147 If a transaction is provided, undo actions will be registered.
148 """
149 # Fail early to prevent delays if remote resources are requested
150 if transfer not in self.transferModes:
151 raise ValueError(f"Transfer mode '{transfer}' not supported by URI scheme {self.scheme}")
153 # Existence checks can take time so only try if the log message
154 # will be issued.
155 if log.isEnabledFor(logging.DEBUG): 155 ↛ 167line 155 didn't jump to line 167, because the condition on line 155 was never false
156 log.debug(
157 "Transferring %s [exists: %s] -> %s [exists: %s] (transfer=%s)",
158 src,
159 src.exists(),
160 self,
161 self.exists(),
162 transfer,
163 )
165 # We do not have to special case FileResourcePath here because
166 # as_local handles that.
167 with src.as_local() as local_uri:
168 is_temporary = local_uri.isTemporary
169 local_src = local_uri.ospath
171 # Short circuit if the URIs are identical immediately.
172 if self == local_uri:
173 log.debug(
174 "Target and destination URIs are identical: %s, returning immediately."
175 " No further action required.",
176 self,
177 )
178 return
180 # Default transfer mode depends on whether we have a temporary
181 # file or not.
182 if transfer == "auto":
183 transfer = self.transferDefault if not is_temporary else "copy"
185 if not os.path.exists(local_src):
186 if is_temporary:
187 if src == local_uri: 187 ↛ 191line 187 didn't jump to line 191, because the condition on line 187 was never false
188 msg = f"Local temporary file {src} has gone missing."
189 else:
190 # This will not happen in normal scenarios.
191 msg = f"Local file {local_uri} downloaded from {src} has gone missing"
192 else:
193 msg = f"Source URI {src} does not exist"
194 raise FileNotFoundError(msg)
196 # Follow soft links
197 local_src = os.path.realpath(os.path.normpath(local_src))
199 # Creating a symlink to a local copy of a remote resource
200 # should never work. Creating a hardlink will work but should
201 # not be allowed since it is highly unlikely that this is ever
202 # an intended option and depends on the local target being
203 # on the same file system as was used for the temporary file
204 # download.
205 # If a symlink is being requested for a local temporary file
206 # that is likely undesirable but should not be refused.
207 if is_temporary and src != local_uri and "link" in transfer:
208 raise RuntimeError(
209 f"Can not use local file system transfer mode {transfer} for remote resource ({src})"
210 )
211 elif is_temporary and src == local_uri and "symlink" in transfer:
212 log.debug(
213 "Using a symlink for a temporary resource may lead to unexpected downstream failures."
214 )
216 # For temporary files we can own them if we created it.
217 requested_transfer = transfer
218 if src != local_uri and is_temporary and transfer == "copy":
219 transfer = "move"
221 # The output location should not exist unless overwrite=True.
222 # Rather than use `exists()`, use os.stat since we might need
223 # the full answer later.
224 dest_stat: os.stat_result | None
225 try:
226 # Do not read through links of the file itself.
227 dest_stat = os.lstat(self.ospath)
228 except FileNotFoundError:
229 dest_stat = None
231 # It is possible that the source URI and target URI refer
232 # to the same file. This can happen for a number of reasons
233 # (such as soft links in the path, or they really are the same).
234 # In that case log a message and return as if the transfer
235 # completed (it technically did). A temporary file download
236 # can't be the same so the test can be skipped.
237 if dest_stat and not is_temporary:
238 # Be consistent and use lstat here (even though realpath
239 # has been called). It does not harm.
240 local_src_stat = os.lstat(local_src)
241 if dest_stat.st_ino == local_src_stat.st_ino and dest_stat.st_dev == local_src_stat.st_dev:
242 log.debug(
243 "Destination URI %s is the same file as source URI %s, returning immediately."
244 " No further action required.",
245 self,
246 local_uri,
247 )
248 return
250 if not overwrite and dest_stat:
251 raise FileExistsError(
252 f"Destination path '{self}' already exists. Transfer from {src} cannot be completed."
253 )
255 # Make the path absolute (but don't follow links since that
256 # would possibly cause us to end up in the wrong place if the
257 # file existed already as a soft link)
258 newFullPath = os.path.abspath(self.ospath)
259 outputDir = os.path.dirname(newFullPath)
260 if not os.path.isdir(outputDir):
261 # Must create the directory -- this can not be rolled back
262 # since another transfer running concurrently may
263 # be relying on this existing.
264 _create_directories(outputDir)
266 if transaction is None: 266 ↛ 273line 266 didn't jump to line 273, because the condition on line 266 was never false
267 # Use a no-op transaction to reduce code duplication
268 transaction = NoTransaction()
270 # For links the OS doesn't let us overwrite so if something does
271 # exist we have to remove it before we do the actual "transfer"
272 # below
273 if "link" in transfer and overwrite and dest_stat:
274 with contextlib.suppress(Exception):
275 # If this fails we ignore it since it's a problem
276 # that will manifest immediately below with a more relevant
277 # error message
278 self.remove()
280 if transfer == "move":
281 # If a rename works we try that since that is guaranteed to
282 # be atomic. If that fails we copy and rename. We do this
283 # in case other processes are trying to move to the same
284 # file and we want the "winner" to not be corrupted.
285 try:
286 with transaction.undoWith(f"move from {local_src}", os.rename, newFullPath, local_src):
287 os.rename(local_src, newFullPath)
288 except OSError:
289 with self.temporary_uri(prefix=self.parent(), suffix=self.getExtension()) as temp_copy:
290 shutil.copy(local_src, temp_copy.ospath)
291 with transaction.undoWith(
292 f"move from {local_src}",
293 shutil.move,
294 newFullPath,
295 local_src,
296 copy_function=shutil.copy,
297 ):
298 os.rename(temp_copy.ospath, newFullPath)
299 os.remove(local_src)
300 elif transfer == "copy":
301 # We want atomic copies so first copy to a temp location in
302 # the same output directory. This at least guarantees that
303 # if multiple processes are writing to the same file
304 # simultaneously the file we end up with will not be corrupt.
305 with self.temporary_uri(prefix=self.parent(), suffix=self.getExtension()) as temp_copy:
306 shutil.copy(local_src, temp_copy.ospath)
307 with transaction.undoWith(f"copy from {local_src}", os.remove, newFullPath):
308 # os.rename works even if the file exists.
309 # It's possible that another process has copied a file
310 # in whilst this one was copying. If overwrite
311 # protection is needed then another stat() call should
312 # happen here.
313 os.rename(temp_copy.ospath, newFullPath)
314 elif transfer == "link":
315 # Try hard link and if that fails use a symlink
316 with transaction.undoWith(f"link to {local_src}", os.remove, newFullPath):
317 try:
318 os.link(local_src, newFullPath)
319 except OSError:
320 # Read through existing symlinks
321 os.symlink(local_src, newFullPath)
322 elif transfer == "hardlink":
323 with transaction.undoWith(f"hardlink to {local_src}", os.remove, newFullPath):
324 os.link(local_src, newFullPath)
325 elif transfer == "symlink":
326 # Read through existing symlinks
327 with transaction.undoWith(f"symlink to {local_src}", os.remove, newFullPath):
328 os.symlink(local_src, newFullPath)
329 elif transfer == "relsymlink":
330 # This is a standard symlink but using a relative path
331 # Need the directory name to give to relative root
332 # A full file path confuses it into an extra ../
333 newFullPathRoot = os.path.dirname(newFullPath)
334 relPath = os.path.relpath(local_src, newFullPathRoot)
335 with transaction.undoWith(f"relsymlink to {local_src}", os.remove, newFullPath):
336 os.symlink(relPath, newFullPath)
337 else:
338 raise NotImplementedError(f"Transfer type '{transfer}' not supported.")
340 # This was an explicit move requested from a remote resource
341 # try to remove that remote resource. We check is_temporary because
342 # the local file would have been moved by shutil.move already.
343 if requested_transfer == "move" and is_temporary and src != local_uri:
344 # Transactions do not work here
345 src.remove()
347 def walk(
348 self, file_filter: str | re.Pattern | None = None
349 ) -> Iterator[list | tuple[ResourcePath, list[str], list[str]]]:
350 """Walk the directory tree returning matching files and directories.
352 Parameters
353 ----------
354 file_filter : `str` or `re.Pattern`, optional
355 Regex to filter out files from the list before it is returned.
357 Yields
358 ------
359 dirpath : `ResourcePath`
360 Current directory being examined.
361 dirnames : `list` of `str`
362 Names of subdirectories within dirpath.
363 filenames : `list` of `str`
364 Names of all the files within dirpath.
365 """
366 if not self.isdir():
367 raise ValueError("Can not walk a non-directory URI")
369 if isinstance(file_filter, str): 369 ↛ 370line 369 didn't jump to line 370, because the condition on line 369 was never true
370 file_filter = re.compile(file_filter)
372 for root, dirs, files in os.walk(self.ospath, followlinks=True):
373 # Filter by the regex
374 if file_filter is not None:
375 files = [f for f in files if file_filter.search(f)]
376 yield type(self)(root, forceAbsolute=False, forceDirectory=True), dirs, files
378 @classmethod
379 def _fixupPathUri(
380 cls,
381 parsed: urllib.parse.ParseResult,
382 root: ResourcePath | None = None,
383 forceAbsolute: bool = False,
384 forceDirectory: bool = False,
385 ) -> tuple[urllib.parse.ParseResult, bool]:
386 """Fix up relative paths in URI instances.
388 Parameters
389 ----------
390 parsed : `~urllib.parse.ParseResult`
391 The result from parsing a URI using `urllib.parse`.
392 root : `ResourcePath`, optional
393 Path to use as root when converting relative to absolute.
394 If `None`, it will be the current working directory. It is only
395 used if a file-scheme is used incorrectly with a relative path.
396 forceAbsolute : `bool`, ignored
397 Has no effect for this subclass. ``file`` URIs are always
398 absolute.
399 forceDirectory : `bool`, optional
400 If `True` forces the URI to end with a separator, otherwise given
401 URI is interpreted as is.
403 Returns
404 -------
405 modified : `~urllib.parse.ParseResult`
406 Update result if a URI is being handled.
407 dirLike : `bool`
408 `True` if given parsed URI has a trailing separator or
409 forceDirectory is True. Otherwise `False`.
411 Notes
412 -----
413 Relative paths are explicitly not supported by RFC8089 but `urllib`
414 does accept URIs of the form ``file:relative/path.ext``. They need
415 to be turned into absolute paths before they can be used. This is
416 always done regardless of the ``forceAbsolute`` parameter.
417 """
418 # assume we are not dealing with a directory like URI
419 dirLike = False
421 # file URI implies POSIX path separators so split as POSIX,
422 # then join as os, and convert to abspath. Do not handle
423 # home directories since "file" scheme is explicitly documented
424 # to not do tilde expansion.
425 sep = posixpath.sep
427 # For local file system we can explicitly check to see if this
428 # really is a directory. The URI might point to a location that
429 # does not exists yet but all that matters is if it is a directory
430 # then we make sure use that fact. No need to do the check if
431 # we are already being told.
432 if not forceDirectory and posixpath.isdir(parsed.path):
433 forceDirectory = True
435 # For an absolute path all we need to do is check if we need
436 # to force the directory separator
437 if posixpath.isabs(parsed.path):
438 if forceDirectory:
439 if not parsed.path.endswith(sep):
440 parsed = parsed._replace(path=parsed.path + sep)
441 dirLike = True
442 return copy.copy(parsed), dirLike
444 # Relative path so must fix it to be compliant with the standard
446 # Replacement values for the URI
447 replacements = {}
449 if root is None:
450 root_str = os.path.abspath(os.path.curdir)
451 else:
452 if root.scheme and root.scheme != "file": 452 ↛ 453line 452 didn't jump to line 453, because the condition on line 452 was never true
453 raise RuntimeError(f"The override root must be a file URI not {root.scheme}")
454 root_str = os.path.abspath(root.ospath)
456 replacements["path"] = posixpath.normpath(posixpath.join(os2posix(root_str), parsed.path))
458 # normpath strips trailing "/" so put it back if necessary
459 # Acknowledge that trailing separator exists.
460 if forceDirectory or (parsed.path.endswith(sep) and not replacements["path"].endswith(sep)):
461 replacements["path"] += sep
462 dirLike = True
464 # ParseResult is a NamedTuple so _replace is standard API
465 parsed = parsed._replace(**replacements)
467 if parsed.params or parsed.query: 467 ↛ 468line 467 didn't jump to line 468, because the condition on line 467 was never true
468 log.warning("Additional items unexpectedly encountered in file URI: %s", parsed.geturl())
470 return parsed, dirLike
472 @contextlib.contextmanager
473 def _openImpl(
474 self,
475 mode: str = "r",
476 *,
477 encoding: str | None = None,
478 ) -> Iterator[IO]:
479 with FileResourceHandle(mode=mode, log=log, filename=self.ospath, encoding=encoding) as buffer:
480 yield buffer # type: ignore
483def _create_directories(name: str | bytes) -> None:
484 """Create a directory and all of its parent directories that don't yet
485 exist.
487 Parameters
488 ----------
489 name : `str` or `bytes`
490 Path to the directory to be created
492 Notes
493 -----
494 The code in this function is duplicated from the Python standard library
495 function os.makedirs with one change: if the user has set a process umask
496 that prevents us from creating/accessing files in the newly created
497 directories, the permissions of the directories are altered to allow
498 owner-write and owner-traverse so that they can be used.
499 """
500 # These are optional parameters in the original function, but they can be
501 # constant here.
502 mode = 0o777
503 exist_ok = True
505 head, tail = os.path.split(name)
506 if not tail:
507 head, tail = os.path.split(head)
508 if head and tail and not os.path.exists(head):
509 try:
510 _create_directories(head)
511 except FileExistsError:
512 # Defeats race condition when another thread created the path
513 pass
514 cdir: str | bytes = os.curdir
515 if isinstance(tail, bytes): 515 ↛ 516line 515 didn't jump to line 516, because the condition on line 515 was never true
516 cdir = bytes(os.curdir, "ASCII")
517 if tail == cdir: # xxx/newdir/. exists if xxx/newdir exists 517 ↛ 518line 517 didn't jump to line 518, because the condition on line 517 was never true
518 return
519 try:
520 os.mkdir(name, mode)
521 # This is the portion that is modified relative to the standard library
522 # version of the function.
523 ensure_directory_is_writeable(name)
524 # end modified portion
525 except OSError:
526 # Cannot rely on checking for EEXIST, since the operating system
527 # could give priority to other errors like EACCES or EROFS
528 if not exist_ok or not os.path.isdir(name):
529 raise