Coverage for python/lsst/resources/file.py: 95%
198 statements
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-01 11:14 +0000
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-01 11:14 +0000
1# This file is part of lsst-resources.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# Use of this source code is governed by a 3-clause BSD-style
10# license that can be found in the LICENSE file.
12from __future__ import annotations
14__all__ = ("FileResourcePath",)
16import contextlib
17import copy
18import logging
19import os
20import os.path
21import posixpath
22import re
23import shutil
24import urllib.parse
25from collections.abc import Iterator
26from typing import IO, TYPE_CHECKING
28from ._resourceHandles._fileResourceHandle import FileResourceHandle
29from ._resourcePath import ResourcePath
30from .utils import NoTransaction, ensure_directory_is_writeable, os2posix, posix2os
32if TYPE_CHECKING:
33 from .utils import TransactionProtocol
36log = logging.getLogger(__name__)
39class FileResourcePath(ResourcePath):
40 """Path for explicit ``file`` URI scheme."""
42 transferModes = ("copy", "link", "symlink", "hardlink", "relsymlink", "auto", "move")
43 transferDefault: str = "link"
45 # By definition refers to a local file
46 isLocal = True
48 @property
49 def ospath(self) -> str:
50 """Path component of the URI localized to current OS.
52 Will unquote URI path since a formal URI must include the quoting.
53 """
54 return urllib.parse.unquote(posix2os(self._uri.path))
56 def exists(self) -> bool:
57 """Indicate that the file exists."""
58 # Uses os.path.exists so if there is a soft link that points
59 # to a file that no longer exists this will return False
60 return os.path.exists(self.ospath)
62 def size(self) -> int:
63 """Return the size of the file in bytes."""
64 if not os.path.isdir(self.ospath):
65 stat = os.stat(self.ospath)
66 sz = stat.st_size
67 else:
68 sz = 0
69 return sz
71 def remove(self) -> None:
72 """Remove the resource."""
73 os.remove(self.ospath)
75 def _as_local(self) -> tuple[str, bool]:
76 """Return the local path of the file.
78 This is an internal helper for ``as_local()``.
80 Returns
81 -------
82 path : `str`
83 The local path to this file.
84 temporary : `bool`
85 Always returns the temporary nature of the input file resource.
86 """
87 return self.ospath, self.isTemporary
89 def read(self, size: int = -1) -> bytes:
90 with open(self.ospath, "rb") as fh:
91 return fh.read(size)
93 def write(self, data: bytes, overwrite: bool = True) -> None:
94 dir = os.path.dirname(self.ospath)
95 if not os.path.exists(dir):
96 _create_directories(dir)
97 mode = "wb" if overwrite else "xb"
98 with open(self.ospath, mode) as f:
99 f.write(data)
101 def mkdir(self) -> None:
102 """Make the directory associated with this URI.
104 An attempt will be made to create the directory even if the URI
105 looks like a file.
107 Raises
108 ------
109 NotADirectoryError:
110 Raised if a non-directory already exists.
111 """
112 try:
113 _create_directories(self.ospath)
114 except FileExistsError:
115 raise NotADirectoryError(f"{self.ospath} exists but is not a directory.") from None
117 def isdir(self) -> bool:
118 """Return whether this URI is a directory.
120 Returns
121 -------
122 isdir : `bool`
123 `True` if this URI is a directory or looks like a directory,
124 else `False`.
125 """
126 if self.dirLike is None:
127 # Cache state for next time.
128 self.dirLike = os.path.isdir(self.ospath)
129 return self.dirLike
131 def transfer_from(
132 self,
133 src: ResourcePath,
134 transfer: str,
135 overwrite: bool = False,
136 transaction: TransactionProtocol | None = None,
137 ) -> None:
138 """Transfer the current resource to a local file.
140 Parameters
141 ----------
142 src : `ResourcePath`
143 Source URI.
144 transfer : `str`
145 Mode to use for transferring the resource. Supports the following
146 options: copy, link, symlink, hardlink, relsymlink.
147 overwrite : `bool`, optional
148 Allow an existing file to be overwritten. Defaults to `False`.
149 transaction : `~lsst.resources.utils.TransactionProtocol`, optional
150 If a transaction is provided, undo actions will be registered.
151 """
152 # Fail early to prevent delays if remote resources are requested
153 if transfer not in self.transferModes:
154 raise ValueError(f"Transfer mode '{transfer}' not supported by URI scheme {self.scheme}")
156 # Existence checks can take time so only try if the log message
157 # will be issued.
158 if log.isEnabledFor(logging.DEBUG): 158 ↛ 170line 158 didn't jump to line 170, because the condition on line 158 was never false
159 log.debug(
160 "Transferring %s [exists: %s] -> %s [exists: %s] (transfer=%s)",
161 src,
162 src.exists(),
163 self,
164 self.exists(),
165 transfer,
166 )
168 # We do not have to special case FileResourcePath here because
169 # as_local handles that.
170 with src.as_local() as local_uri:
171 is_temporary = local_uri.isTemporary
172 local_src = local_uri.ospath
174 # Short circuit if the URIs are identical immediately.
175 if self == local_uri:
176 log.debug(
177 "Target and destination URIs are identical: %s, returning immediately."
178 " No further action required.",
179 self,
180 )
181 return
183 # Default transfer mode depends on whether we have a temporary
184 # file or not.
185 if transfer == "auto":
186 transfer = self.transferDefault if not is_temporary else "copy"
188 if not os.path.exists(local_src):
189 if is_temporary:
190 if src == local_uri: 190 ↛ 194line 190 didn't jump to line 194, because the condition on line 190 was never false
191 msg = f"Local temporary file {src} has gone missing."
192 else:
193 # This will not happen in normal scenarios.
194 msg = f"Local file {local_uri} downloaded from {src} has gone missing"
195 else:
196 msg = f"Source URI {src} does not exist"
197 raise FileNotFoundError(msg)
199 # Follow soft links
200 local_src = os.path.realpath(os.path.normpath(local_src))
202 # Creating a symlink to a local copy of a remote resource
203 # should never work. Creating a hardlink will work but should
204 # not be allowed since it is highly unlikely that this is ever
205 # an intended option and depends on the local target being
206 # on the same file system as was used for the temporary file
207 # download.
208 # If a symlink is being requested for a local temporary file
209 # that is likely undesirable but should not be refused.
210 if is_temporary and src != local_uri and "link" in transfer:
211 raise RuntimeError(
212 f"Can not use local file system transfer mode {transfer} for remote resource ({src})"
213 )
214 elif is_temporary and src == local_uri and "symlink" in transfer:
215 log.debug(
216 "Using a symlink for a temporary resource may lead to unexpected downstream failures."
217 )
219 # For temporary files we can own them if we created it.
220 requested_transfer = transfer
221 if src != local_uri and is_temporary and transfer == "copy":
222 transfer = "move"
224 # The output location should not exist unless overwrite=True.
225 # Rather than use `exists()`, use os.stat since we might need
226 # the full answer later.
227 dest_stat: os.stat_result | None
228 try:
229 # Do not read through links of the file itself.
230 dest_stat = os.lstat(self.ospath)
231 except FileNotFoundError:
232 dest_stat = None
234 # It is possible that the source URI and target URI refer
235 # to the same file. This can happen for a number of reasons
236 # (such as soft links in the path, or they really are the same).
237 # In that case log a message and return as if the transfer
238 # completed (it technically did). A temporary file download
239 # can't be the same so the test can be skipped.
240 if dest_stat and not is_temporary:
241 # Be consistent and use lstat here (even though realpath
242 # has been called). It does not harm.
243 local_src_stat = os.lstat(local_src)
244 if dest_stat.st_ino == local_src_stat.st_ino and dest_stat.st_dev == local_src_stat.st_dev:
245 log.debug(
246 "Destination URI %s is the same file as source URI %s, returning immediately."
247 " No further action required.",
248 self,
249 local_uri,
250 )
251 return
253 if not overwrite and dest_stat:
254 raise FileExistsError(
255 f"Destination path '{self}' already exists. Transfer from {src} cannot be completed."
256 )
258 # Make the path absolute (but don't follow links since that
259 # would possibly cause us to end up in the wrong place if the
260 # file existed already as a soft link)
261 newFullPath = os.path.abspath(self.ospath)
262 outputDir = os.path.dirname(newFullPath)
263 if not os.path.isdir(outputDir):
264 # Must create the directory -- this can not be rolled back
265 # since another transfer running concurrently may
266 # be relying on this existing.
267 _create_directories(outputDir)
269 if transaction is None: 269 ↛ 276line 269 didn't jump to line 276, because the condition on line 269 was never false
270 # Use a no-op transaction to reduce code duplication
271 transaction = NoTransaction()
273 # For links the OS doesn't let us overwrite so if something does
274 # exist we have to remove it before we do the actual "transfer"
275 # below
276 if "link" in transfer and overwrite and dest_stat:
277 with contextlib.suppress(Exception):
278 # If this fails we ignore it since it's a problem
279 # that will manifest immediately below with a more relevant
280 # error message
281 self.remove()
283 if transfer == "move":
284 # If a rename works we try that since that is guaranteed to
285 # be atomic. If that fails we copy and rename. We do this
286 # in case other processes are trying to move to the same
287 # file and we want the "winner" to not be corrupted.
288 try:
289 with transaction.undoWith(f"move from {local_src}", os.rename, newFullPath, local_src):
290 os.rename(local_src, newFullPath)
291 except OSError:
292 with self.temporary_uri(prefix=self.parent(), suffix=self.getExtension()) as temp_copy:
293 shutil.copy(local_src, temp_copy.ospath)
294 with transaction.undoWith(
295 f"move from {local_src}",
296 shutil.move,
297 newFullPath,
298 local_src,
299 copy_function=shutil.copy,
300 ):
301 os.rename(temp_copy.ospath, newFullPath)
302 os.remove(local_src)
303 elif transfer == "copy":
304 # We want atomic copies so first copy to a temp location in
305 # the same output directory. This at least guarantees that
306 # if multiple processes are writing to the same file
307 # simultaneously the file we end up with will not be corrupt.
308 with self.temporary_uri(prefix=self.parent(), suffix=self.getExtension()) as temp_copy:
309 shutil.copy(local_src, temp_copy.ospath)
310 with transaction.undoWith(f"copy from {local_src}", os.remove, newFullPath):
311 # os.rename works even if the file exists.
312 # It's possible that another process has copied a file
313 # in whilst this one was copying. If overwrite
314 # protection is needed then another stat() call should
315 # happen here.
316 os.rename(temp_copy.ospath, newFullPath)
317 elif transfer == "link":
318 # Try hard link and if that fails use a symlink
319 with transaction.undoWith(f"link to {local_src}", os.remove, newFullPath):
320 try:
321 os.link(local_src, newFullPath)
322 except OSError:
323 # Read through existing symlinks
324 os.symlink(local_src, newFullPath)
325 elif transfer == "hardlink":
326 with transaction.undoWith(f"hardlink to {local_src}", os.remove, newFullPath):
327 os.link(local_src, newFullPath)
328 elif transfer == "symlink":
329 # Read through existing symlinks
330 with transaction.undoWith(f"symlink to {local_src}", os.remove, newFullPath):
331 os.symlink(local_src, newFullPath)
332 elif transfer == "relsymlink":
333 # This is a standard symlink but using a relative path
334 # Need the directory name to give to relative root
335 # A full file path confuses it into an extra ../
336 newFullPathRoot = os.path.dirname(newFullPath)
337 relPath = os.path.relpath(local_src, newFullPathRoot)
338 with transaction.undoWith(f"relsymlink to {local_src}", os.remove, newFullPath):
339 os.symlink(relPath, newFullPath)
340 else:
341 raise NotImplementedError(f"Transfer type '{transfer}' not supported.")
343 # This was an explicit move requested from a remote resource
344 # try to remove that remote resource. We check is_temporary because
345 # the local file would have been moved by shutil.move already.
346 if requested_transfer == "move" and is_temporary and src != local_uri:
347 # Transactions do not work here
348 src.remove()
350 def walk(
351 self, file_filter: str | re.Pattern | None = None
352 ) -> Iterator[list | tuple[ResourcePath, list[str], list[str]]]:
353 """Walk the directory tree returning matching files and directories.
355 Parameters
356 ----------
357 file_filter : `str` or `re.Pattern`, optional
358 Regex to filter out files from the list before it is returned.
360 Yields
361 ------
362 dirpath : `ResourcePath`
363 Current directory being examined.
364 dirnames : `list` of `str`
365 Names of subdirectories within dirpath.
366 filenames : `list` of `str`
367 Names of all the files within dirpath.
368 """
369 if not self.isdir():
370 raise ValueError("Can not walk a non-directory URI")
372 if isinstance(file_filter, str): 372 ↛ 373line 372 didn't jump to line 373, because the condition on line 372 was never true
373 file_filter = re.compile(file_filter)
375 for root, dirs, files in os.walk(self.ospath, followlinks=True):
376 # Filter by the regex
377 if file_filter is not None:
378 files = [f for f in files if file_filter.search(f)]
379 yield type(self)(root, forceAbsolute=False, forceDirectory=True), dirs, files
381 @classmethod
382 def _fixupPathUri(
383 cls,
384 parsed: urllib.parse.ParseResult,
385 root: ResourcePath | None = None,
386 forceAbsolute: bool = False,
387 forceDirectory: bool | None = None,
388 ) -> tuple[urllib.parse.ParseResult, bool | None]:
389 """Fix up relative paths in URI instances.
391 Parameters
392 ----------
393 parsed : `~urllib.parse.ParseResult`
394 The result from parsing a URI using `urllib.parse`.
395 root : `ResourcePath`, optional
396 Path to use as root when converting relative to absolute.
397 If `None`, it will be the current working directory. It is only
398 used if a file-scheme is used incorrectly with a relative path.
399 forceAbsolute : `bool`, ignored
400 Has no effect for this subclass. ``file`` URIs are always
401 absolute.
402 forceDirectory : `bool`, optional
403 If `True` forces the URI to end with a separator, otherwise given
404 URI is interpreted as is.
406 Returns
407 -------
408 modified : `~urllib.parse.ParseResult`
409 Update result if a URI is being handled.
410 dirLike : `bool` or `None`
411 `True` if given parsed URI has a trailing separator or
412 ``forceDirectory`` is `True`. Otherwise can return the given
413 value of ``forceDirectory``.
415 Notes
416 -----
417 Relative paths are explicitly not supported by RFC8089 but `urllib`
418 does accept URIs of the form ``file:relative/path.ext``. They need
419 to be turned into absolute paths before they can be used. This is
420 always done regardless of the ``forceAbsolute`` parameter.
421 """
422 # assume we are not dealing with a directory like URI
423 dirLike = forceDirectory
425 # file URI implies POSIX path separators so split as POSIX,
426 # then join as os, and convert to abspath. Do not handle
427 # home directories since "file" scheme is explicitly documented
428 # to not do tilde expansion.
429 sep = posixpath.sep
431 # Consistency check.
432 if forceDirectory is False and parsed.path.endswith(sep):
433 raise ValueError(
434 f"URI {parsed.geturl()} ends with {sep} but "
435 "forceDirectory parameter declares it to be a file."
436 )
438 # For an absolute path all we need to do is check if we need
439 # to force the directory separator
440 if posixpath.isabs(parsed.path):
441 if forceDirectory:
442 if not parsed.path.endswith(sep):
443 parsed = parsed._replace(path=parsed.path + sep)
444 dirLike = True
445 return copy.copy(parsed), dirLike
447 # Relative path so must fix it to be compliant with the standard
449 # Replacement values for the URI
450 replacements = {}
452 if root is None:
453 root_str = os.path.abspath(os.path.curdir)
454 else:
455 if root.scheme and root.scheme != "file": 455 ↛ 456line 455 didn't jump to line 456, because the condition on line 455 was never true
456 raise RuntimeError(f"The override root must be a file URI not {root.scheme}")
457 root_str = os.path.abspath(root.ospath)
459 replacements["path"] = posixpath.normpath(posixpath.join(os2posix(root_str), parsed.path))
461 # normpath strips trailing "/" so put it back if necessary
462 # Acknowledge that trailing separator exists.
463 if forceDirectory or (parsed.path.endswith(sep) and not replacements["path"].endswith(sep)):
464 replacements["path"] += sep
465 dirLike = True
467 # ParseResult is a NamedTuple so _replace is standard API
468 parsed = parsed._replace(**replacements)
470 if parsed.params or parsed.query: 470 ↛ 471line 470 didn't jump to line 471, because the condition on line 470 was never true
471 log.warning("Additional items unexpectedly encountered in file URI: %s", parsed.geturl())
473 return parsed, dirLike
475 @contextlib.contextmanager
476 def _openImpl(
477 self,
478 mode: str = "r",
479 *,
480 encoding: str | None = None,
481 ) -> Iterator[IO]:
482 with FileResourceHandle(mode=mode, log=log, filename=self.ospath, encoding=encoding) as buffer:
483 yield buffer # type: ignore
486def _create_directories(name: str | bytes) -> None:
487 """Create a directory and all of its parent directories that don't yet
488 exist.
490 Parameters
491 ----------
492 name : `str` or `bytes`
493 Path to the directory to be created
495 Notes
496 -----
497 The code in this function is duplicated from the Python standard library
498 function os.makedirs with one change: if the user has set a process umask
499 that prevents us from creating/accessing files in the newly created
500 directories, the permissions of the directories are altered to allow
501 owner-write and owner-traverse so that they can be used.
502 """
503 # These are optional parameters in the original function, but they can be
504 # constant here.
505 mode = 0o777
506 exist_ok = True
508 head, tail = os.path.split(name)
509 if not tail:
510 head, tail = os.path.split(head)
511 if head and tail and not os.path.exists(head):
512 try:
513 _create_directories(head)
514 except FileExistsError:
515 # Defeats race condition when another thread created the path
516 pass
517 cdir: str | bytes = os.curdir
518 if isinstance(tail, bytes): 518 ↛ 519line 518 didn't jump to line 519, because the condition on line 518 was never true
519 cdir = bytes(os.curdir, "ASCII")
520 if tail == cdir: # xxx/newdir/. exists if xxx/newdir exists 520 ↛ 521line 520 didn't jump to line 521, because the condition on line 520 was never true
521 return
522 try:
523 os.mkdir(name, mode)
524 # This is the portion that is modified relative to the standard library
525 # version of the function.
526 ensure_directory_is_writeable(name)
527 # end modified portion
528 except OSError:
529 # Cannot rely on checking for EEXIST, since the operating system
530 # could give priority to other errors like EACCES or EROFS
531 if not exist_ok or not os.path.isdir(name):
532 raise