Coverage for python/lsst/resources/file.py: 96%
174 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-13 09:44 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-13 09:44 +0000
1# This file is part of lsst-resources.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# Use of this source code is governed by a 3-clause BSD-style
10# license that can be found in the LICENSE file.
12from __future__ import annotations
14__all__ = ("FileResourcePath",)
16import contextlib
17import copy
18import logging
19import os
20import os.path
21import posixpath
22import re
23import shutil
24import urllib.parse
25from collections.abc import Iterator
26from typing import IO, TYPE_CHECKING
28from ._resourceHandles._fileResourceHandle import FileResourceHandle
29from ._resourcePath import ResourcePath
30from .utils import NoTransaction, os2posix, posix2os
32if TYPE_CHECKING:
33 from .utils import TransactionProtocol
36log = logging.getLogger(__name__)
39class FileResourcePath(ResourcePath):
40 """Path for explicit ``file`` URI scheme."""
42 transferModes = ("copy", "link", "symlink", "hardlink", "relsymlink", "auto", "move")
43 transferDefault: str = "link"
45 # By definition refers to a local file
46 isLocal = True
48 @property
49 def ospath(self) -> str:
50 """Path component of the URI localized to current OS.
52 Will unquote URI path since a formal URI must include the quoting.
53 """
54 return urllib.parse.unquote(posix2os(self._uri.path))
56 def exists(self) -> bool:
57 """Indicate that the file exists."""
58 # Uses os.path.exists so if there is a soft link that points
59 # to a file that no longer exists this will return False
60 return os.path.exists(self.ospath)
62 def size(self) -> int:
63 """Return the size of the file in bytes."""
64 if not os.path.isdir(self.ospath):
65 stat = os.stat(self.ospath)
66 sz = stat.st_size
67 else:
68 sz = 0
69 return sz
71 def remove(self) -> None:
72 """Remove the resource."""
73 os.remove(self.ospath)
75 def _as_local(self) -> tuple[str, bool]:
76 """Return the local path of the file.
78 This is an internal helper for ``as_local()``.
80 Returns
81 -------
82 path : `str`
83 The local path to this file.
84 temporary : `bool`
85 Always returns the temporary nature of the input file resource.
86 """
87 return self.ospath, self.isTemporary
89 def read(self, size: int = -1) -> bytes:
90 """Return the entire content of the file as bytes."""
91 with open(self.ospath, "rb") as fh:
92 return fh.read(size)
94 def write(self, data: bytes, overwrite: bool = True) -> None:
95 """Write the supplied data to the file."""
96 dir = os.path.dirname(self.ospath)
97 if not os.path.exists(dir):
98 os.makedirs(dir, exist_ok=True)
99 mode = "wb" if overwrite else "xb"
100 with open(self.ospath, mode) as f:
101 f.write(data)
103 def mkdir(self) -> None:
104 """Make the directory associated with this URI.
106 An attempt will be made to create the directory even if the URI
107 looks like a file.
109 Raises
110 ------
111 NotADirectoryError:
112 Raised if a non-directory already exists.
113 """
114 try:
115 os.makedirs(self.ospath, exist_ok=True)
116 except FileExistsError:
117 raise NotADirectoryError(f"{self.ospath} exists but is not a directory.") from None
119 def isdir(self) -> bool:
120 """Return whether this URI is a directory.
122 Returns
123 -------
124 isdir : `bool`
125 `True` if this URI is a directory or looks like a directory,
126 else `False`.
127 """
128 return self.dirLike or os.path.isdir(self.ospath)
130 def transfer_from(
131 self,
132 src: ResourcePath,
133 transfer: str,
134 overwrite: bool = False,
135 transaction: TransactionProtocol | None = None,
136 ) -> None:
137 """Transfer the current resource to a local file.
139 Parameters
140 ----------
141 src : `ResourcePath`
142 Source URI.
143 transfer : `str`
144 Mode to use for transferring the resource. Supports the following
145 options: copy, link, symlink, hardlink, relsymlink.
146 overwrite : `bool`, optional
147 Allow an existing file to be overwritten. Defaults to `False`.
148 transaction : `~lsst.resources.utils.TransactionProtocol`, optional
149 If a transaction is provided, undo actions will be registered.
150 """
151 # Fail early to prevent delays if remote resources are requested
152 if transfer not in self.transferModes:
153 raise ValueError(f"Transfer mode '{transfer}' not supported by URI scheme {self.scheme}")
155 # Existence checks can take time so only try if the log message
156 # will be issued.
157 if log.isEnabledFor(logging.DEBUG): 157 ↛ 169line 157 didn't jump to line 169, because the condition on line 157 was never false
158 log.debug(
159 "Transferring %s [exists: %s] -> %s [exists: %s] (transfer=%s)",
160 src,
161 src.exists(),
162 self,
163 self.exists(),
164 transfer,
165 )
167 # We do not have to special case FileResourcePath here because
168 # as_local handles that.
169 with src.as_local() as local_uri:
170 is_temporary = local_uri.isTemporary
171 local_src = local_uri.ospath
173 # Short circuit if the URIs are identical immediately.
174 if self == local_uri:
175 log.debug(
176 "Target and destination URIs are identical: %s, returning immediately."
177 " No further action required.",
178 self,
179 )
180 return
182 # Default transfer mode depends on whether we have a temporary
183 # file or not.
184 if transfer == "auto":
185 transfer = self.transferDefault if not is_temporary else "copy"
187 if not os.path.exists(local_src):
188 if is_temporary:
189 if src == local_uri: 189 ↛ 193line 189 didn't jump to line 193, because the condition on line 189 was never false
190 msg = f"Local temporary file {src} has gone missing."
191 else:
192 # This will not happen in normal scenarios.
193 msg = f"Local file {local_uri} downloaded from {src} has gone missing"
194 else:
195 msg = f"Source URI {src} does not exist"
196 raise FileNotFoundError(msg)
198 # Follow soft links
199 local_src = os.path.realpath(os.path.normpath(local_src))
201 # Creating a symlink to a local copy of a remote resource
202 # should never work. Creating a hardlink will work but should
203 # not be allowed since it is highly unlikely that this is ever
204 # an intended option and depends on the local target being
205 # on the same file system as was used for the temporary file
206 # download.
207 # If a symlink is being requested for a local temporary file
208 # that is likely undesirable but should not be refused.
209 if is_temporary and src != local_uri and "link" in transfer:
210 raise RuntimeError(
211 f"Can not use local file system transfer mode {transfer} for remote resource ({src})"
212 )
213 elif is_temporary and src == local_uri and "symlink" in transfer:
214 log.debug(
215 "Using a symlink for a temporary resource may lead to unexpected downstream failures."
216 )
218 # For temporary files we can own them if we created it.
219 requested_transfer = transfer
220 if src != local_uri and is_temporary and transfer == "copy":
221 transfer = "move"
223 # The output location should not exist unless overwrite=True.
224 # Rather than use `exists()`, use os.stat since we might need
225 # the full answer later.
226 dest_stat: os.stat_result | None
227 try:
228 # Do not read through links of the file itself.
229 dest_stat = os.lstat(self.ospath)
230 except FileNotFoundError:
231 dest_stat = None
233 # It is possible that the source URI and target URI refer
234 # to the same file. This can happen for a number of reasons
235 # (such as soft links in the path, or they really are the same).
236 # In that case log a message and return as if the transfer
237 # completed (it technically did). A temporary file download
238 # can't be the same so the test can be skipped.
239 if dest_stat and not is_temporary:
240 # Be consistent and use lstat here (even though realpath
241 # has been called). It does not harm.
242 local_src_stat = os.lstat(local_src)
243 if dest_stat.st_ino == local_src_stat.st_ino and dest_stat.st_dev == local_src_stat.st_dev:
244 log.debug(
245 "Destination URI %s is the same file as source URI %s, returning immediately."
246 " No further action required.",
247 self,
248 local_uri,
249 )
250 return
252 if not overwrite and dest_stat:
253 raise FileExistsError(
254 f"Destination path '{self}' already exists. Transfer from {src} cannot be completed."
255 )
257 # Make the path absolute (but don't follow links since that
258 # would possibly cause us to end up in the wrong place if the
259 # file existed already as a soft link)
260 newFullPath = os.path.abspath(self.ospath)
261 outputDir = os.path.dirname(newFullPath)
262 if not os.path.isdir(outputDir):
263 # Must create the directory -- this can not be rolled back
264 # since another transfer running concurrently may
265 # be relying on this existing.
266 os.makedirs(outputDir, exist_ok=True)
268 if transaction is None: 268 ↛ 275line 268 didn't jump to line 275, because the condition on line 268 was never false
269 # Use a no-op transaction to reduce code duplication
270 transaction = NoTransaction()
272 # For links the OS doesn't let us overwrite so if something does
273 # exist we have to remove it before we do the actual "transfer"
274 # below
275 if "link" in transfer and overwrite and dest_stat:
276 with contextlib.suppress(Exception):
277 # If this fails we ignore it since it's a problem
278 # that will manifest immediately below with a more relevant
279 # error message
280 self.remove()
282 if transfer == "move":
283 # If a rename works we try that since that is guaranteed to
284 # be atomic. If that fails we copy and rename. We do this
285 # in case other processes are trying to move to the same
286 # file and we want the "winner" to not be corrupted.
287 try:
288 with transaction.undoWith(f"move from {local_src}", os.rename, newFullPath, local_src):
289 os.rename(local_src, newFullPath)
290 except OSError:
291 with self.temporary_uri(prefix=self.parent(), suffix=self.getExtension()) as temp_copy:
292 shutil.copy(local_src, temp_copy.ospath)
293 with transaction.undoWith(
294 f"move from {local_src}",
295 shutil.move,
296 newFullPath,
297 local_src,
298 copy_function=shutil.copy,
299 ):
300 os.rename(temp_copy.ospath, newFullPath)
301 os.remove(local_src)
302 elif transfer == "copy":
303 # We want atomic copies so first copy to a temp location in
304 # the same output directory. This at least guarantees that
305 # if multiple processes are writing to the same file
306 # simultaneously the file we end up with will not be corrupt.
307 with self.temporary_uri(prefix=self.parent(), suffix=self.getExtension()) as temp_copy:
308 shutil.copy(local_src, temp_copy.ospath)
309 with transaction.undoWith(f"copy from {local_src}", os.remove, newFullPath):
310 # os.rename works even if the file exists.
311 # It's possible that another process has copied a file
312 # in whilst this one was copying. If overwrite
313 # protection is needed then another stat() call should
314 # happen here.
315 os.rename(temp_copy.ospath, newFullPath)
316 elif transfer == "link":
317 # Try hard link and if that fails use a symlink
318 with transaction.undoWith(f"link to {local_src}", os.remove, newFullPath):
319 try:
320 os.link(local_src, newFullPath)
321 except OSError:
322 # Read through existing symlinks
323 os.symlink(local_src, newFullPath)
324 elif transfer == "hardlink":
325 with transaction.undoWith(f"hardlink to {local_src}", os.remove, newFullPath):
326 os.link(local_src, newFullPath)
327 elif transfer == "symlink":
328 # Read through existing symlinks
329 with transaction.undoWith(f"symlink to {local_src}", os.remove, newFullPath):
330 os.symlink(local_src, newFullPath)
331 elif transfer == "relsymlink":
332 # This is a standard symlink but using a relative path
333 # Need the directory name to give to relative root
334 # A full file path confuses it into an extra ../
335 newFullPathRoot = os.path.dirname(newFullPath)
336 relPath = os.path.relpath(local_src, newFullPathRoot)
337 with transaction.undoWith(f"relsymlink to {local_src}", os.remove, newFullPath):
338 os.symlink(relPath, newFullPath)
339 else:
340 raise NotImplementedError(f"Transfer type '{transfer}' not supported.")
342 # This was an explicit move requested from a remote resource
343 # try to remove that remote resource. We check is_temporary because
344 # the local file would have been moved by shutil.move already.
345 if requested_transfer == "move" and is_temporary and src != local_uri:
346 # Transactions do not work here
347 src.remove()
349 def walk(
350 self, file_filter: str | re.Pattern | None = None
351 ) -> Iterator[list | tuple[ResourcePath, list[str], list[str]]]:
352 """Walk the directory tree returning matching files and directories.
354 Parameters
355 ----------
356 file_filter : `str` or `re.Pattern`, optional
357 Regex to filter out files from the list before it is returned.
359 Yields
360 ------
361 dirpath : `ResourcePath`
362 Current directory being examined.
363 dirnames : `list` of `str`
364 Names of subdirectories within dirpath.
365 filenames : `list` of `str`
366 Names of all the files within dirpath.
367 """
368 if not self.isdir():
369 raise ValueError("Can not walk a non-directory URI")
371 if isinstance(file_filter, str): 371 ↛ 372line 371 didn't jump to line 372, because the condition on line 371 was never true
372 file_filter = re.compile(file_filter)
374 for root, dirs, files in os.walk(self.ospath, followlinks=True):
375 # Filter by the regex
376 if file_filter is not None:
377 files = [f for f in files if file_filter.search(f)]
378 yield type(self)(root, forceAbsolute=False, forceDirectory=True), dirs, files
380 @classmethod
381 def _fixupPathUri(
382 cls,
383 parsed: urllib.parse.ParseResult,
384 root: ResourcePath | None = None,
385 forceAbsolute: bool = False,
386 forceDirectory: bool = False,
387 ) -> tuple[urllib.parse.ParseResult, bool]:
388 """Fix up relative paths in URI instances.
390 Parameters
391 ----------
392 parsed : `~urllib.parse.ParseResult`
393 The result from parsing a URI using `urllib.parse`.
394 root : `ResourcePath`, optional
395 Path to use as root when converting relative to absolute.
396 If `None`, it will be the current working directory. It is only
397 used if a file-scheme is used incorrectly with a relative path.
398 forceAbsolute : `bool`, ignored
399 Has no effect for this subclass. ``file`` URIs are always
400 absolute.
401 forceDirectory : `bool`, optional
402 If `True` forces the URI to end with a separator, otherwise given
403 URI is interpreted as is.
405 Returns
406 -------
407 modified : `~urllib.parse.ParseResult`
408 Update result if a URI is being handled.
409 dirLike : `bool`
410 `True` if given parsed URI has a trailing separator or
411 forceDirectory is True. Otherwise `False`.
413 Notes
414 -----
415 Relative paths are explicitly not supported by RFC8089 but `urllib`
416 does accept URIs of the form ``file:relative/path.ext``. They need
417 to be turned into absolute paths before they can be used. This is
418 always done regardless of the ``forceAbsolute`` parameter.
419 """
420 # assume we are not dealing with a directory like URI
421 dirLike = False
423 # file URI implies POSIX path separators so split as POSIX,
424 # then join as os, and convert to abspath. Do not handle
425 # home directories since "file" scheme is explicitly documented
426 # to not do tilde expansion.
427 sep = posixpath.sep
429 # For local file system we can explicitly check to see if this
430 # really is a directory. The URI might point to a location that
431 # does not exists yet but all that matters is if it is a directory
432 # then we make sure use that fact. No need to do the check if
433 # we are already being told.
434 if not forceDirectory and posixpath.isdir(parsed.path):
435 forceDirectory = True
437 # For an absolute path all we need to do is check if we need
438 # to force the directory separator
439 if posixpath.isabs(parsed.path):
440 if forceDirectory:
441 if not parsed.path.endswith(sep):
442 parsed = parsed._replace(path=parsed.path + sep)
443 dirLike = True
444 return copy.copy(parsed), dirLike
446 # Relative path so must fix it to be compliant with the standard
448 # Replacement values for the URI
449 replacements = {}
451 if root is None:
452 root_str = os.path.abspath(os.path.curdir)
453 else:
454 if root.scheme and root.scheme != "file": 454 ↛ 455line 454 didn't jump to line 455, because the condition on line 454 was never true
455 raise RuntimeError(f"The override root must be a file URI not {root.scheme}")
456 root_str = os.path.abspath(root.ospath)
458 replacements["path"] = posixpath.normpath(posixpath.join(os2posix(root_str), parsed.path))
460 # normpath strips trailing "/" so put it back if necessary
461 # Acknowledge that trailing separator exists.
462 if forceDirectory or (parsed.path.endswith(sep) and not replacements["path"].endswith(sep)):
463 replacements["path"] += sep
464 dirLike = True
466 # ParseResult is a NamedTuple so _replace is standard API
467 parsed = parsed._replace(**replacements)
469 if parsed.params or parsed.query: 469 ↛ 470line 469 didn't jump to line 470, because the condition on line 469 was never true
470 log.warning("Additional items unexpectedly encountered in file URI: %s", parsed.geturl())
472 return parsed, dirLike
474 @contextlib.contextmanager
475 def _openImpl(
476 self,
477 mode: str = "r",
478 *,
479 encoding: str | None = None,
480 ) -> Iterator[IO]:
481 with FileResourceHandle(mode=mode, log=log, filename=self.ospath, encoding=encoding) as buffer:
482 yield buffer # type: ignore