Coverage for python/lsst/resources/file.py: 80%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of lsst-resources.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# Use of this source code is governed by a 3-clause BSD-style
10# license that can be found in the LICENSE file.
12from __future__ import annotations
14import contextlib
15import copy
16import logging
17import os
18import os.path
19import posixpath
20import re
21import shutil
22import urllib.parse
24__all__ = ("FileResourcePath",)
26from typing import IO, TYPE_CHECKING, Iterator, List, Optional, Tuple, Union
28from ._resourcePath import ResourcePath
29from .utils import NoTransaction, os2posix, posix2os
31if TYPE_CHECKING: 31 ↛ 32line 31 didn't jump to line 32, because the condition on line 31 was never true
32 from .utils import TransactionProtocol
35log = logging.getLogger(__name__)
38class FileResourcePath(ResourcePath):
39 """Path for explicit ``file`` URI scheme."""
41 transferModes = ("copy", "link", "symlink", "hardlink", "relsymlink", "auto", "move")
42 transferDefault: str = "link"
44 # By definition refers to a local file
45 isLocal = True
47 @property
48 def ospath(self) -> str:
49 """Path component of the URI localized to current OS.
51 Will unquote URI path since a formal URI must include the quoting.
52 """
53 return urllib.parse.unquote(posix2os(self._uri.path))
55 def exists(self) -> bool:
56 """Indicate that the file exists."""
57 # Uses os.path.exists so if there is a soft link that points
58 # to a file that no longer exists this will return False
59 return os.path.exists(self.ospath)
61 def size(self) -> int:
62 """Return the size of the file in bytes."""
63 if not os.path.isdir(self.ospath): 63 ↛ 67line 63 didn't jump to line 67, because the condition on line 63 was never false
64 stat = os.stat(self.ospath)
65 sz = stat.st_size
66 else:
67 sz = 0
68 return sz
70 def remove(self) -> None:
71 """Remove the resource."""
72 os.remove(self.ospath)
74 def _as_local(self) -> Tuple[str, bool]:
75 """Return the local path of the file.
77 This is an internal helper for ``as_local()``.
79 Returns
80 -------
81 path : `str`
82 The local path to this file.
83 temporary : `bool`
84 Always returns `False` (this is not a temporary file).
85 """
86 return self.ospath, False
88 def read(self, size: int = -1) -> bytes:
89 """Return the entire content of the file as bytes."""
90 with open(self.ospath, "rb") as fh:
91 return fh.read(size)
93 def write(self, data: bytes, overwrite: bool = True) -> None:
94 """Write the supplied data to the file."""
95 dir = os.path.dirname(self.ospath)
96 if not os.path.exists(dir):
97 os.makedirs(dir, exist_ok=True)
98 if overwrite: 98 ↛ 101line 98 didn't jump to line 101, because the condition on line 98 was never false
99 mode = "wb"
100 else:
101 mode = "xb"
102 with open(self.ospath, mode) as f:
103 f.write(data)
105 def mkdir(self) -> None:
106 """Make the directory associated with this URI."""
107 if not os.path.exists(self.ospath): 107 ↛ 109line 107 didn't jump to line 109, because the condition on line 107 was never false
108 os.makedirs(self.ospath, exist_ok=True)
109 elif not os.path.isdir(self.ospath):
110 raise FileExistsError(f"URI {self} exists but is not a directory!")
112 def isdir(self) -> bool:
113 """Return whether this URI is a directory.
115 Returns
116 -------
117 isdir : `bool`
118 `True` if this URI is a directory or looks like a directory,
119 else `False`.
120 """
121 return self.dirLike or os.path.isdir(self.ospath)
123 def transfer_from(
124 self,
125 src: ResourcePath,
126 transfer: str,
127 overwrite: bool = False,
128 transaction: Optional[TransactionProtocol] = None,
129 ) -> None:
130 """Transfer the current resource to a local file.
132 Parameters
133 ----------
134 src : `ResourcePath`
135 Source URI.
136 transfer : `str`
137 Mode to use for transferring the resource. Supports the following
138 options: copy, link, symlink, hardlink, relsymlink.
139 overwrite : `bool`, optional
140 Allow an existing file to be overwritten. Defaults to `False`.
141 transaction : `~lsst.resources.utils.TransactionProtocol`, optional
142 If a transaction is provided, undo actions will be registered.
143 """
144 # Fail early to prevent delays if remote resources are requested
145 if transfer not in self.transferModes:
146 raise ValueError(f"Transfer mode '{transfer}' not supported by URI scheme {self.scheme}")
148 # Existence checks can take time so only try if the log message
149 # will be issued.
150 if log.isEnabledFor(logging.DEBUG): 150 ↛ 151line 150 didn't jump to line 151, because the condition on line 150 was never true
151 log.debug(
152 "Transferring %s [exists: %s] -> %s [exists: %s] (transfer=%s)",
153 src,
154 src.exists(),
155 self,
156 self.exists(),
157 transfer,
158 )
160 # We do not have to special case FileResourcePath here because
161 # as_local handles that.
162 with src.as_local() as local_uri:
163 is_temporary = local_uri.isTemporary
164 local_src = local_uri.ospath
166 # Short circuit if the URIs are identical immediately.
167 if self == local_uri: 167 ↛ 168line 167 didn't jump to line 168, because the condition on line 167 was never true
168 log.debug(
169 "Target and destination URIs are identical: %s, returning immediately."
170 " No further action required.",
171 self,
172 )
173 return
175 # Default transfer mode depends on whether we have a temporary
176 # file or not.
177 if transfer == "auto": 177 ↛ 178line 177 didn't jump to line 178, because the condition on line 177 was never true
178 transfer = self.transferDefault if not is_temporary else "copy"
180 if not os.path.exists(local_src): 180 ↛ 181line 180 didn't jump to line 181, because the condition on line 180 was never true
181 if is_temporary:
182 msg = f"Local file {local_uri} downloaded from {src} has gone missing"
183 else:
184 msg = f"Source URI {src} does not exist"
185 raise FileNotFoundError(msg)
187 # Follow soft links
188 local_src = os.path.realpath(os.path.normpath(local_src))
190 # All the modes involving linking use "link" somewhere
191 if "link" in transfer and is_temporary: 191 ↛ 192line 191 didn't jump to line 192, because the condition on line 191 was never true
192 raise RuntimeError(
193 f"Can not use local file system transfer mode {transfer} for remote resource ({src})"
194 )
196 # For temporary files we can own them
197 requested_transfer = transfer
198 if is_temporary and transfer == "copy":
199 transfer = "move"
201 # The output location should not exist unless overwrite=True.
202 # Rather than use `exists()`, use os.stat since we might need
203 # the full answer later.
204 dest_stat: Optional[os.stat_result]
205 try:
206 # Do not read through links of the file itself.
207 dest_stat = os.lstat(self.ospath)
208 except FileNotFoundError:
209 dest_stat = None
211 # It is possible that the source URI and target URI refer
212 # to the same file. This can happen for a number of reasons
213 # (such as soft links in the path, or they really are the same).
214 # In that case log a message and return as if the transfer
215 # completed (it technically did). A temporary file download
216 # can't be the same so the test can be skipped.
217 if dest_stat and not is_temporary:
218 # Be consistent and use lstat here (even though realpath
219 # has been called). It does not harm.
220 local_src_stat = os.lstat(local_src)
221 if dest_stat.st_ino == local_src_stat.st_ino and dest_stat.st_dev == local_src_stat.st_dev:
222 log.debug(
223 "Destination URI %s is the same file as source URI %s, returning immediately."
224 " No further action required.",
225 self,
226 local_uri,
227 )
228 return
230 if not overwrite and dest_stat:
231 raise FileExistsError(
232 f"Destination path '{self}' already exists. Transfer from {src} cannot be completed."
233 )
235 # Make the path absolute (but don't follow links since that
236 # would possibly cause us to end up in the wrong place if the
237 # file existed already as a soft link)
238 newFullPath = os.path.abspath(self.ospath)
239 outputDir = os.path.dirname(newFullPath)
240 if not os.path.isdir(outputDir): 240 ↛ 244line 240 didn't jump to line 244, because the condition on line 240 was never true
241 # Must create the directory -- this can not be rolled back
242 # since another transfer running concurrently may
243 # be relying on this existing.
244 os.makedirs(outputDir, exist_ok=True)
246 if transaction is None: 246 ↛ 253line 246 didn't jump to line 253, because the condition on line 246 was never false
247 # Use a no-op transaction to reduce code duplication
248 transaction = NoTransaction()
250 # For links the OS doesn't let us overwrite so if something does
251 # exist we have to remove it before we do the actual "transfer"
252 # below
253 if "link" in transfer and overwrite and dest_stat:
254 try:
255 self.remove()
256 except Exception:
257 # If this fails we ignore it since it's a problem
258 # that will manifest immediately below with a more relevant
259 # error message
260 pass
262 if transfer == "move":
263 with transaction.undoWith(
264 f"move from {local_src}", shutil.move, newFullPath, local_src, copy_function=shutil.copy
265 ):
266 shutil.move(local_src, newFullPath, copy_function=shutil.copy)
267 elif transfer == "copy":
268 with transaction.undoWith(f"copy from {local_src}", os.remove, newFullPath):
269 shutil.copy(local_src, newFullPath)
270 elif transfer == "link":
271 # Try hard link and if that fails use a symlink
272 with transaction.undoWith(f"link to {local_src}", os.remove, newFullPath):
273 try:
274 os.link(local_src, newFullPath)
275 except OSError:
276 # Read through existing symlinks
277 os.symlink(local_src, newFullPath)
278 elif transfer == "hardlink":
279 with transaction.undoWith(f"hardlink to {local_src}", os.remove, newFullPath):
280 os.link(local_src, newFullPath)
281 elif transfer == "symlink":
282 # Read through existing symlinks
283 with transaction.undoWith(f"symlink to {local_src}", os.remove, newFullPath):
284 os.symlink(local_src, newFullPath)
285 elif transfer == "relsymlink": 285 ↛ 294line 285 didn't jump to line 294, because the condition on line 285 was never false
286 # This is a standard symlink but using a relative path
287 # Need the directory name to give to relative root
288 # A full file path confuses it into an extra ../
289 newFullPathRoot = os.path.dirname(newFullPath)
290 relPath = os.path.relpath(local_src, newFullPathRoot)
291 with transaction.undoWith(f"relsymlink to {local_src}", os.remove, newFullPath):
292 os.symlink(relPath, newFullPath)
293 else:
294 raise NotImplementedError("Transfer type '{}' not supported.".format(transfer))
296 # This was an explicit move requested from a remote resource
297 # try to remove that remote resource. We check is_temporary because
298 # the local file would have been moved by shutil.move already.
299 if requested_transfer == "move" and is_temporary: 299 ↛ 301line 299 didn't jump to line 301, because the condition on line 299 was never true
300 # Transactions do not work here
301 src.remove()
303 def walk(
304 self, file_filter: Optional[Union[str, re.Pattern]] = None
305 ) -> Iterator[Union[List, Tuple[ResourcePath, List[str], List[str]]]]:
306 """Walk the directory tree returning matching files and directories.
308 Parameters
309 ----------
310 file_filter : `str` or `re.Pattern`, optional
311 Regex to filter out files from the list before it is returned.
313 Yields
314 ------
315 dirpath : `ResourcePath`
316 Current directory being examined.
317 dirnames : `list` of `str`
318 Names of subdirectories within dirpath.
319 filenames : `list` of `str`
320 Names of all the files within dirpath.
321 """
322 if not self.isdir():
323 raise ValueError("Can not walk a non-directory URI")
325 if isinstance(file_filter, str): 325 ↛ 326line 325 didn't jump to line 326, because the condition on line 325 was never true
326 file_filter = re.compile(file_filter)
328 for root, dirs, files in os.walk(self.ospath):
329 # Filter by the regex
330 if file_filter is not None:
331 files = [f for f in files if file_filter.search(f)]
332 yield type(self)(root, forceAbsolute=False, forceDirectory=True), dirs, files
334 @classmethod
335 def _fixupPathUri(
336 cls,
337 parsed: urllib.parse.ParseResult,
338 root: Optional[Union[str, ResourcePath]] = None,
339 forceAbsolute: bool = False,
340 forceDirectory: bool = False,
341 ) -> Tuple[urllib.parse.ParseResult, bool]:
342 """Fix up relative paths in URI instances.
344 Parameters
345 ----------
346 parsed : `~urllib.parse.ParseResult`
347 The result from parsing a URI using `urllib.parse`.
348 root : `str` or `ResourcePath`, optional
349 Path to use as root when converting relative to absolute.
350 If `None`, it will be the current working directory. This
351 is a local file system path, or a file URI. It is only used if
352 a file-scheme is used incorrectly with a relative path.
353 forceAbsolute : `bool`, ignored
354 Has no effect for this subclass. ``file`` URIs are always
355 absolute.
356 forceDirectory : `bool`, optional
357 If `True` forces the URI to end with a separator, otherwise given
358 URI is interpreted as is.
360 Returns
361 -------
362 modified : `~urllib.parse.ParseResult`
363 Update result if a URI is being handled.
364 dirLike : `bool`
365 `True` if given parsed URI has a trailing separator or
366 forceDirectory is True. Otherwise `False`.
368 Notes
369 -----
370 Relative paths are explicitly not supported by RFC8089 but `urllib`
371 does accept URIs of the form ``file:relative/path.ext``. They need
372 to be turned into absolute paths before they can be used. This is
373 always done regardless of the ``forceAbsolute`` parameter.
374 """
375 # assume we are not dealing with a directory like URI
376 dirLike = False
378 # file URI implies POSIX path separators so split as POSIX,
379 # then join as os, and convert to abspath. Do not handle
380 # home directories since "file" scheme is explicitly documented
381 # to not do tilde expansion.
382 sep = posixpath.sep
384 # For local file system we can explicitly check to see if this
385 # really is a directory. The URI might point to a location that
386 # does not exists yet but all that matters is if it is a directory
387 # then we make sure use that fact. No need to do the check if
388 # we are already being told.
389 if not forceDirectory and posixpath.isdir(parsed.path): 389 ↛ 390line 389 didn't jump to line 390, because the condition on line 389 was never true
390 forceDirectory = True
392 # For an absolute path all we need to do is check if we need
393 # to force the directory separator
394 if posixpath.isabs(parsed.path):
395 if forceDirectory:
396 if not parsed.path.endswith(sep):
397 parsed = parsed._replace(path=parsed.path + sep)
398 dirLike = True
399 return copy.copy(parsed), dirLike
401 # Relative path so must fix it to be compliant with the standard
403 # Replacement values for the URI
404 replacements = {}
406 if root is None:
407 root = os.path.abspath(os.path.curdir)
408 elif isinstance(root, ResourcePath): 408 ↛ 409line 408 didn't jump to line 409, because the condition on line 408 was never true
409 if root.scheme and root.scheme != "file":
410 raise RuntimeError(f"The override root must be a file URI not {root.scheme}")
411 root = os.path.abspath(root.ospath)
413 replacements["path"] = posixpath.normpath(posixpath.join(os2posix(root), parsed.path))
415 # normpath strips trailing "/" so put it back if necessary
416 # Acknowledge that trailing separator exists.
417 if forceDirectory or (parsed.path.endswith(sep) and not replacements["path"].endswith(sep)):
418 replacements["path"] += sep
419 dirLike = True
421 # ParseResult is a NamedTuple so _replace is standard API
422 parsed = parsed._replace(**replacements)
424 if parsed.params or parsed.query: 424 ↛ 425line 424 didn't jump to line 425, because the condition on line 424 was never true
425 log.warning("Additional items unexpectedly encountered in file URI: %s", parsed.geturl())
427 return parsed, dirLike
429 @contextlib.contextmanager
430 def open(
431 self,
432 mode: str = "r",
433 *,
434 encoding: Optional[str] = None,
435 prefer_file_temporary: bool = False,
436 ) -> Iterator[IO]:
437 # Docstring inherited.
438 with open(self.ospath, mode=mode, encoding=encoding) as buffer:
439 yield buffer