Coverage for python/lsst/resources/file.py: 80%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of lsst-resources.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# Use of this source code is governed by a 3-clause BSD-style
10# license that can be found in the LICENSE file.
12from __future__ import annotations
14import copy
15import logging
16import os
17import os.path
18import posixpath
19import re
20import shutil
21import urllib.parse
23__all__ = ("FileResourcePath",)
25from typing import TYPE_CHECKING, Iterator, List, Optional, Tuple, Union
27from ._resourcePath import ResourcePath
28from .utils import NoTransaction, os2posix, posix2os
30if TYPE_CHECKING: 30 ↛ 31line 30 didn't jump to line 31, because the condition on line 30 was never true
31 from .utils import TransactionProtocol
34log = logging.getLogger(__name__)
37class FileResourcePath(ResourcePath):
38 """Path for explicit ``file`` URI scheme."""
40 transferModes = ("copy", "link", "symlink", "hardlink", "relsymlink", "auto", "move")
41 transferDefault: str = "link"
43 # By definition refers to a local file
44 isLocal = True
46 @property
47 def ospath(self) -> str:
48 """Path component of the URI localized to current OS.
50 Will unquote URI path since a formal URI must include the quoting.
51 """
52 return urllib.parse.unquote(posix2os(self._uri.path))
54 def exists(self) -> bool:
55 """Indicate that the file exists."""
56 # Uses os.path.exists so if there is a soft link that points
57 # to a file that no longer exists this will return False
58 return os.path.exists(self.ospath)
60 def size(self) -> int:
61 """Return the size of the file in bytes."""
62 if not os.path.isdir(self.ospath): 62 ↛ 66line 62 didn't jump to line 66, because the condition on line 62 was never false
63 stat = os.stat(self.ospath)
64 sz = stat.st_size
65 else:
66 sz = 0
67 return sz
69 def remove(self) -> None:
70 """Remove the resource."""
71 os.remove(self.ospath)
73 def _as_local(self) -> Tuple[str, bool]:
74 """Return the local path of the file.
76 This is an internal helper for ``as_local()``.
78 Returns
79 -------
80 path : `str`
81 The local path to this file.
82 temporary : `bool`
83 Always returns `False` (this is not a temporary file).
84 """
85 return self.ospath, False
87 def read(self, size: int = -1) -> bytes:
88 """Return the entire content of the file as bytes."""
89 with open(self.ospath, "rb") as fh:
90 return fh.read(size)
92 def write(self, data: bytes, overwrite: bool = True) -> None:
93 """Write the supplied data to the file."""
94 dir = os.path.dirname(self.ospath)
95 if not os.path.exists(dir):
96 os.makedirs(dir, exist_ok=True)
97 if overwrite: 97 ↛ 100line 97 didn't jump to line 100, because the condition on line 97 was never false
98 mode = "wb"
99 else:
100 mode = "xb"
101 with open(self.ospath, mode) as f:
102 f.write(data)
104 def mkdir(self) -> None:
105 """Make the directory associated with this URI."""
106 if not os.path.exists(self.ospath): 106 ↛ 108line 106 didn't jump to line 108, because the condition on line 106 was never false
107 os.makedirs(self.ospath, exist_ok=True)
108 elif not os.path.isdir(self.ospath):
109 raise FileExistsError(f"URI {self} exists but is not a directory!")
111 def isdir(self) -> bool:
112 """Return whether this URI is a directory.
114 Returns
115 -------
116 isdir : `bool`
117 `True` if this URI is a directory or looks like a directory,
118 else `False`.
119 """
120 return self.dirLike or os.path.isdir(self.ospath)
122 def transfer_from(
123 self,
124 src: ResourcePath,
125 transfer: str,
126 overwrite: bool = False,
127 transaction: Optional[TransactionProtocol] = None,
128 ) -> None:
129 """Transfer the current resource to a local file.
131 Parameters
132 ----------
133 src : `ResourcePath`
134 Source URI.
135 transfer : `str`
136 Mode to use for transferring the resource. Supports the following
137 options: copy, link, symlink, hardlink, relsymlink.
138 overwrite : `bool`, optional
139 Allow an existing file to be overwritten. Defaults to `False`.
140 transaction : `~lsst.resources.utils.TransactionProtocol`, optional
141 If a transaction is provided, undo actions will be registered.
142 """
143 # Fail early to prevent delays if remote resources are requested
144 if transfer not in self.transferModes:
145 raise ValueError(f"Transfer mode '{transfer}' not supported by URI scheme {self.scheme}")
147 # Existence checks can take time so only try if the log message
148 # will be issued.
149 if log.isEnabledFor(logging.DEBUG): 149 ↛ 150line 149 didn't jump to line 150, because the condition on line 149 was never true
150 log.debug(
151 "Transferring %s [exists: %s] -> %s [exists: %s] (transfer=%s)",
152 src,
153 src.exists(),
154 self,
155 self.exists(),
156 transfer,
157 )
159 # We do not have to special case FileResourcePath here because
160 # as_local handles that.
161 with src.as_local() as local_uri:
162 is_temporary = local_uri.isTemporary
163 local_src = local_uri.ospath
165 # Short circuit if the URIs are identical immediately.
166 if self == local_uri: 166 ↛ 167line 166 didn't jump to line 167, because the condition on line 166 was never true
167 log.debug(
168 "Target and destination URIs are identical: %s, returning immediately."
169 " No further action required.",
170 self,
171 )
172 return
174 # Default transfer mode depends on whether we have a temporary
175 # file or not.
176 if transfer == "auto": 176 ↛ 177line 176 didn't jump to line 177, because the condition on line 176 was never true
177 transfer = self.transferDefault if not is_temporary else "copy"
179 if not os.path.exists(local_src): 179 ↛ 180line 179 didn't jump to line 180, because the condition on line 179 was never true
180 if is_temporary:
181 msg = f"Local file {local_uri} downloaded from {src} has gone missing"
182 else:
183 msg = f"Source URI {src} does not exist"
184 raise FileNotFoundError(msg)
186 # Follow soft links
187 local_src = os.path.realpath(os.path.normpath(local_src))
189 # All the modes involving linking use "link" somewhere
190 if "link" in transfer and is_temporary: 190 ↛ 191line 190 didn't jump to line 191, because the condition on line 190 was never true
191 raise RuntimeError(
192 f"Can not use local file system transfer mode {transfer} for remote resource ({src})"
193 )
195 # For temporary files we can own them
196 requested_transfer = transfer
197 if is_temporary and transfer == "copy":
198 transfer = "move"
200 # The output location should not exist unless overwrite=True.
201 # Rather than use `exists()`, use os.stat since we might need
202 # the full answer later.
203 dest_stat: Optional[os.stat_result]
204 try:
205 # Do not read through links of the file itself.
206 dest_stat = os.lstat(self.ospath)
207 except FileNotFoundError:
208 dest_stat = None
210 # It is possible that the source URI and target URI refer
211 # to the same file. This can happen for a number of reasons
212 # (such as soft links in the path, or they really are the same).
213 # In that case log a message and return as if the transfer
214 # completed (it technically did). A temporary file download
215 # can't be the same so the test can be skipped.
216 if dest_stat and not is_temporary:
217 # Be consistent and use lstat here (even though realpath
218 # has been called). It does not harm.
219 local_src_stat = os.lstat(local_src)
220 if dest_stat.st_ino == local_src_stat.st_ino and dest_stat.st_dev == local_src_stat.st_dev:
221 log.debug(
222 "Destination URI %s is the same file as source URI %s, returning immediately."
223 " No further action required.",
224 self,
225 local_uri,
226 )
227 return
229 if not overwrite and dest_stat:
230 raise FileExistsError(
231 f"Destination path '{self}' already exists. Transfer from {src} cannot be completed."
232 )
234 # Make the path absolute (but don't follow links since that
235 # would possibly cause us to end up in the wrong place if the
236 # file existed already as a soft link)
237 newFullPath = os.path.abspath(self.ospath)
238 outputDir = os.path.dirname(newFullPath)
239 if not os.path.isdir(outputDir): 239 ↛ 243line 239 didn't jump to line 243, because the condition on line 239 was never true
240 # Must create the directory -- this can not be rolled back
241 # since another transfer running concurrently may
242 # be relying on this existing.
243 os.makedirs(outputDir, exist_ok=True)
245 if transaction is None: 245 ↛ 252line 245 didn't jump to line 252, because the condition on line 245 was never false
246 # Use a no-op transaction to reduce code duplication
247 transaction = NoTransaction()
249 # For links the OS doesn't let us overwrite so if something does
250 # exist we have to remove it before we do the actual "transfer"
251 # below
252 if "link" in transfer and overwrite and dest_stat:
253 try:
254 self.remove()
255 except Exception:
256 # If this fails we ignore it since it's a problem
257 # that will manifest immediately below with a more relevant
258 # error message
259 pass
261 if transfer == "move":
262 with transaction.undoWith(f"move from {local_src}", shutil.move, newFullPath, local_src):
263 shutil.move(local_src, newFullPath)
264 elif transfer == "copy":
265 with transaction.undoWith(f"copy from {local_src}", os.remove, newFullPath):
266 shutil.copy(local_src, newFullPath)
267 elif transfer == "link":
268 # Try hard link and if that fails use a symlink
269 with transaction.undoWith(f"link to {local_src}", os.remove, newFullPath):
270 try:
271 os.link(local_src, newFullPath)
272 except OSError:
273 # Read through existing symlinks
274 os.symlink(local_src, newFullPath)
275 elif transfer == "hardlink":
276 with transaction.undoWith(f"hardlink to {local_src}", os.remove, newFullPath):
277 os.link(local_src, newFullPath)
278 elif transfer == "symlink":
279 # Read through existing symlinks
280 with transaction.undoWith(f"symlink to {local_src}", os.remove, newFullPath):
281 os.symlink(local_src, newFullPath)
282 elif transfer == "relsymlink": 282 ↛ 291line 282 didn't jump to line 291, because the condition on line 282 was never false
283 # This is a standard symlink but using a relative path
284 # Need the directory name to give to relative root
285 # A full file path confuses it into an extra ../
286 newFullPathRoot = os.path.dirname(newFullPath)
287 relPath = os.path.relpath(local_src, newFullPathRoot)
288 with transaction.undoWith(f"relsymlink to {local_src}", os.remove, newFullPath):
289 os.symlink(relPath, newFullPath)
290 else:
291 raise NotImplementedError("Transfer type '{}' not supported.".format(transfer))
293 # This was an explicit move requested from a remote resource
294 # try to remove that remote resource. We check is_temporary because
295 # the local file would have been moved by shutil.move already.
296 if requested_transfer == "move" and is_temporary: 296 ↛ 298line 296 didn't jump to line 298, because the condition on line 296 was never true
297 # Transactions do not work here
298 src.remove()
300 def walk(
301 self, file_filter: Optional[Union[str, re.Pattern]] = None
302 ) -> Iterator[Union[List, Tuple[ResourcePath, List[str], List[str]]]]:
303 """Walk the directory tree returning matching files and directories.
305 Parameters
306 ----------
307 file_filter : `str` or `re.Pattern`, optional
308 Regex to filter out files from the list before it is returned.
310 Yields
311 ------
312 dirpath : `ResourcePath`
313 Current directory being examined.
314 dirnames : `list` of `str`
315 Names of subdirectories within dirpath.
316 filenames : `list` of `str`
317 Names of all the files within dirpath.
318 """
319 if not self.isdir():
320 raise ValueError("Can not walk a non-directory URI")
322 if isinstance(file_filter, str): 322 ↛ 323line 322 didn't jump to line 323, because the condition on line 322 was never true
323 file_filter = re.compile(file_filter)
325 for root, dirs, files in os.walk(self.ospath):
326 # Filter by the regex
327 if file_filter is not None:
328 files = [f for f in files if file_filter.search(f)]
329 yield type(self)(root, forceAbsolute=False, forceDirectory=True), dirs, files
331 @classmethod
332 def _fixupPathUri(
333 cls,
334 parsed: urllib.parse.ParseResult,
335 root: Optional[Union[str, ResourcePath]] = None,
336 forceAbsolute: bool = False,
337 forceDirectory: bool = False,
338 ) -> Tuple[urllib.parse.ParseResult, bool]:
339 """Fix up relative paths in URI instances.
341 Parameters
342 ----------
343 parsed : `~urllib.parse.ParseResult`
344 The result from parsing a URI using `urllib.parse`.
345 root : `str` or `ResourcePath`, optional
346 Path to use as root when converting relative to absolute.
347 If `None`, it will be the current working directory. This
348 is a local file system path, or a file URI. It is only used if
349 a file-scheme is used incorrectly with a relative path.
350 forceAbsolute : `bool`, ignored
351 Has no effect for this subclass. ``file`` URIs are always
352 absolute.
353 forceDirectory : `bool`, optional
354 If `True` forces the URI to end with a separator, otherwise given
355 URI is interpreted as is.
357 Returns
358 -------
359 modified : `~urllib.parse.ParseResult`
360 Update result if a URI is being handled.
361 dirLike : `bool`
362 `True` if given parsed URI has a trailing separator or
363 forceDirectory is True. Otherwise `False`.
365 Notes
366 -----
367 Relative paths are explicitly not supported by RFC8089 but `urllib`
368 does accept URIs of the form ``file:relative/path.ext``. They need
369 to be turned into absolute paths before they can be used. This is
370 always done regardless of the ``forceAbsolute`` parameter.
371 """
372 # assume we are not dealing with a directory like URI
373 dirLike = False
375 # file URI implies POSIX path separators so split as POSIX,
376 # then join as os, and convert to abspath. Do not handle
377 # home directories since "file" scheme is explicitly documented
378 # to not do tilde expansion.
379 sep = posixpath.sep
381 # For local file system we can explicitly check to see if this
382 # really is a directory. The URI might point to a location that
383 # does not exists yet but all that matters is if it is a directory
384 # then we make sure use that fact. No need to do the check if
385 # we are already being told.
386 if not forceDirectory and posixpath.isdir(parsed.path): 386 ↛ 387line 386 didn't jump to line 387, because the condition on line 386 was never true
387 forceDirectory = True
389 # For an absolute path all we need to do is check if we need
390 # to force the directory separator
391 if posixpath.isabs(parsed.path):
392 if forceDirectory:
393 if not parsed.path.endswith(sep):
394 parsed = parsed._replace(path=parsed.path + sep)
395 dirLike = True
396 return copy.copy(parsed), dirLike
398 # Relative path so must fix it to be compliant with the standard
400 # Replacement values for the URI
401 replacements = {}
403 if root is None:
404 root = os.path.abspath(os.path.curdir)
405 elif isinstance(root, ResourcePath): 405 ↛ 406line 405 didn't jump to line 406, because the condition on line 405 was never true
406 if root.scheme and root.scheme != "file":
407 raise RuntimeError(f"The override root must be a file URI not {root.scheme}")
408 root = os.path.abspath(root.ospath)
410 replacements["path"] = posixpath.normpath(posixpath.join(os2posix(root), parsed.path))
412 # normpath strips trailing "/" so put it back if necessary
413 # Acknowledge that trailing separator exists.
414 if forceDirectory or (parsed.path.endswith(sep) and not replacements["path"].endswith(sep)):
415 replacements["path"] += sep
416 dirLike = True
418 # ParseResult is a NamedTuple so _replace is standard API
419 parsed = parsed._replace(**replacements)
421 if parsed.params or parsed.query: 421 ↛ 422line 421 didn't jump to line 422, because the condition on line 421 was never true
422 log.warning("Additional items unexpectedly encountered in file URI: %s", parsed.geturl())
424 return parsed, dirLike