Coverage for python/lsst/daf/butler/core/_butlerUri/file.py: 15%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24import os
25import os.path
26import shutil
27import urllib.parse
28import posixpath
29import copy
30import logging
31import re
33__all__ = ('ButlerFileURI',)
35from typing import (
36 TYPE_CHECKING,
37 Iterator,
38 List,
39 Optional,
40 Tuple,
41 Union,
42)
44from .utils import NoTransaction, os2posix, posix2os
45from ._butlerUri import ButlerURI
48if TYPE_CHECKING: 48 ↛ 49line 48 didn't jump to line 49, because the condition on line 48 was never true
49 from ..datastore import DatastoreTransaction
52log = logging.getLogger(__name__)
55class ButlerFileURI(ButlerURI):
56 """URI for explicit ``file`` scheme."""
58 transferModes = ("copy", "link", "symlink", "hardlink", "relsymlink", "auto", "move")
59 transferDefault: str = "link"
61 # By definition refers to a local file
62 isLocal = True
64 @property
65 def ospath(self) -> str:
66 """Path component of the URI localized to current OS.
68 Will unquote URI path since a formal URI must include the quoting.
69 """
70 return urllib.parse.unquote(posix2os(self._uri.path))
72 def exists(self) -> bool:
73 """Indicate that the file exists."""
74 # Uses os.path.exists so if there is a soft link that points
75 # to a file that no longer exists this will return False
76 return os.path.exists(self.ospath)
78 def size(self) -> int:
79 """Return the size of the file in bytes."""
80 if not os.path.isdir(self.ospath):
81 stat = os.stat(self.ospath)
82 sz = stat.st_size
83 else:
84 sz = 0
85 return sz
87 def remove(self) -> None:
88 """Remove the resource."""
89 os.remove(self.ospath)
91 def _as_local(self) -> Tuple[str, bool]:
92 """Return the local path of the file.
94 This is an internal helper for ``as_local()``.
96 Returns
97 -------
98 path : `str`
99 The local path to this file.
100 temporary : `bool`
101 Always returns `False` (this is not a temporary file).
102 """
103 return self.ospath, False
105 def read(self, size: int = -1) -> bytes:
106 """Return the entire content of the file as bytes."""
107 with open(self.ospath, "rb") as fh:
108 return fh.read(size)
110 def write(self, data: bytes, overwrite: bool = True) -> None:
111 """Write the supplied data to the file."""
112 dir = os.path.dirname(self.ospath)
113 if not os.path.exists(dir):
114 os.makedirs(dir, exist_ok=True)
115 if overwrite:
116 mode = "wb"
117 else:
118 mode = "xb"
119 with open(self.ospath, mode) as f:
120 f.write(data)
122 def mkdir(self) -> None:
123 """Make the directory associated with this URI."""
124 if not os.path.exists(self.ospath):
125 os.makedirs(self.ospath, exist_ok=True)
126 elif not os.path.isdir(self.ospath):
127 raise FileExistsError(f"URI {self} exists but is not a directory!")
129 def isdir(self) -> bool:
130 """Return whether this URI is a directory.
132 Returns
133 -------
134 isdir : `bool`
135 `True` if this URI is a directory or looks like a directory,
136 else `False`.
137 """
138 return self.dirLike or os.path.isdir(self.ospath)
140 def transfer_from(self, src: ButlerURI, transfer: str,
141 overwrite: bool = False,
142 transaction: Optional[Union[DatastoreTransaction, NoTransaction]] = None) -> None:
143 """Transfer the current resource to a local file.
145 Parameters
146 ----------
147 src : `ButlerURI`
148 Source URI.
149 transfer : `str`
150 Mode to use for transferring the resource. Supports the following
151 options: copy, link, symlink, hardlink, relsymlink.
152 overwrite : `bool`, optional
153 Allow an existing file to be overwritten. Defaults to `False`.
154 transaction : `DatastoreTransaction`, optional
155 If a transaction is provided, undo actions will be registered.
156 """
157 # Fail early to prevent delays if remote resources are requested
158 if transfer not in self.transferModes:
159 raise ValueError(f"Transfer mode '{transfer}' not supported by URI scheme {self.scheme}")
161 # Existence checks can take time so only try if the log message
162 # will be issued.
163 if log.isEnabledFor(logging.DEBUG):
164 log.debug("Transferring %s [exists: %s] -> %s [exists: %s] (transfer=%s)",
165 src, src.exists(), self, self.exists(), transfer)
167 # We do not have to special case ButlerFileURI here because
168 # as_local handles that.
169 with src.as_local() as local_uri:
170 is_temporary = local_uri.isTemporary
171 local_src = local_uri.ospath
173 # Short circuit if the URIs are identical immediately.
174 if self == local_uri:
175 log.debug("Target and destination URIs are identical: %s, returning immediately."
176 " No further action required.", self)
177 return
179 # Default transfer mode depends on whether we have a temporary
180 # file or not.
181 if transfer == "auto":
182 transfer = self.transferDefault if not is_temporary else "copy"
184 if not os.path.exists(local_src):
185 if is_temporary:
186 msg = f"Local file {local_uri} downloaded from {src} has gone missing"
187 else:
188 msg = f"Source URI {src} does not exist"
189 raise FileNotFoundError(msg)
191 # Follow soft links
192 local_src = os.path.realpath(os.path.normpath(local_src))
194 # All the modes involving linking use "link" somewhere
195 if "link" in transfer and is_temporary:
196 raise RuntimeError("Can not use local file system transfer mode"
197 f" {transfer} for remote resource ({src})")
199 # For temporary files we can own them
200 requested_transfer = transfer
201 if is_temporary and transfer == "copy":
202 transfer = "move"
204 # The output location should not exist unless overwrite=True.
205 # Rather than use `exists()`, use os.stat since we might need
206 # the full answer later.
207 dest_stat: Optional[os.stat_result]
208 try:
209 # Do not read through links of the file itself.
210 dest_stat = os.lstat(self.ospath)
211 except FileNotFoundError:
212 dest_stat = None
214 # It is possible that the source URI and target URI refer
215 # to the same file. This can happen for a number of reasons
216 # (such as soft links in the path, or they really are the same).
217 # In that case log a message and return as if the transfer
218 # completed (it technically did). A temporary file download
219 # can't be the same so the test can be skipped.
220 if dest_stat and not is_temporary:
221 # Be consistent and use lstat here (even though realpath
222 # has been called). It does not harm.
223 local_src_stat = os.lstat(local_src)
224 if (dest_stat.st_ino == local_src_stat.st_ino
225 and dest_stat.st_dev == local_src_stat.st_dev):
226 log.debug("Destination URI %s is the same file as source URI %s, returning immediately."
227 " No further action required.", self, local_uri)
228 return
230 if not overwrite and dest_stat:
231 raise FileExistsError(f"Destination path '{self}' already exists. Transfer "
232 f"from {src} cannot be completed.")
234 # Make the path absolute (but don't follow links since that
235 # would possibly cause us to end up in the wrong place if the
236 # file existed already as a soft link)
237 newFullPath = os.path.abspath(self.ospath)
238 outputDir = os.path.dirname(newFullPath)
239 if not os.path.isdir(outputDir):
240 # Must create the directory -- this can not be rolled back
241 # since another transfer running concurrently may
242 # be relying on this existing.
243 os.makedirs(outputDir, exist_ok=True)
245 if transaction is None:
246 # Use a no-op transaction to reduce code duplication
247 transaction = NoTransaction()
249 # For links the OS doesn't let us overwrite so if something does
250 # exist we have to remove it before we do the actual "transfer"
251 # below
252 if "link" in transfer and overwrite and dest_stat:
253 try:
254 self.remove()
255 except Exception:
256 # If this fails we ignore it since it's a problem
257 # that will manifest immediately below with a more relevant
258 # error message
259 pass
261 if transfer == "move":
262 with transaction.undoWith(f"move from {local_src}", shutil.move, newFullPath, local_src):
263 shutil.move(local_src, newFullPath)
264 elif transfer == "copy":
265 with transaction.undoWith(f"copy from {local_src}", os.remove, newFullPath):
266 shutil.copy(local_src, newFullPath)
267 elif transfer == "link":
268 # Try hard link and if that fails use a symlink
269 with transaction.undoWith(f"link to {local_src}", os.remove, newFullPath):
270 try:
271 os.link(local_src, newFullPath)
272 except OSError:
273 # Read through existing symlinks
274 os.symlink(local_src, newFullPath)
275 elif transfer == "hardlink":
276 with transaction.undoWith(f"hardlink to {local_src}", os.remove, newFullPath):
277 os.link(local_src, newFullPath)
278 elif transfer == "symlink":
279 # Read through existing symlinks
280 with transaction.undoWith(f"symlink to {local_src}", os.remove, newFullPath):
281 os.symlink(local_src, newFullPath)
282 elif transfer == "relsymlink":
283 # This is a standard symlink but using a relative path
284 # Need the directory name to give to relative root
285 # A full file path confuses it into an extra ../
286 newFullPathRoot = os.path.dirname(newFullPath)
287 relPath = os.path.relpath(local_src, newFullPathRoot)
288 with transaction.undoWith(f"relsymlink to {local_src}", os.remove, newFullPath):
289 os.symlink(relPath, newFullPath)
290 else:
291 raise NotImplementedError("Transfer type '{}' not supported.".format(transfer))
293 # This was an explicit move requested from a remote resource
294 # try to remove that remote resource. We check is_temporary because
295 # the local file would have been moved by shutil.move already.
296 if requested_transfer == "move" and is_temporary:
297 # Transactions do not work here
298 src.remove()
300 def walk(self, file_filter: Optional[Union[str, re.Pattern]] = None) -> Iterator[Union[List,
301 Tuple[ButlerURI,
302 List[str],
303 List[str]]]]:
304 """Walk the directory tree returning matching files and directories.
306 Parameters
307 ----------
308 file_filter : `str` or `re.Pattern`, optional
309 Regex to filter out files from the list before it is returned.
311 Yields
312 ------
313 dirpath : `ButlerURI`
314 Current directory being examined.
315 dirnames : `list` of `str`
316 Names of subdirectories within dirpath.
317 filenames : `list` of `str`
318 Names of all the files within dirpath.
319 """
320 if not self.isdir():
321 raise ValueError("Can not walk a non-directory URI")
323 if isinstance(file_filter, str):
324 file_filter = re.compile(file_filter)
326 for root, dirs, files in os.walk(self.ospath):
327 # Filter by the regex
328 if file_filter is not None:
329 files = [f for f in files if file_filter.search(f)]
330 yield type(self)(root, forceAbsolute=False, forceDirectory=True), dirs, files
332 @classmethod
333 def _fixupPathUri(cls, parsed: urllib.parse.ParseResult, root: Optional[Union[str, ButlerURI]] = None,
334 forceAbsolute: bool = False,
335 forceDirectory: bool = False) -> Tuple[urllib.parse.ParseResult, bool]:
336 """Fix up relative paths in URI instances.
338 Parameters
339 ----------
340 parsed : `~urllib.parse.ParseResult`
341 The result from parsing a URI using `urllib.parse`.
342 root : `str` or `ButlerURI`, optional
343 Path to use as root when converting relative to absolute.
344 If `None`, it will be the current working directory. This
345 is a local file system path, or a file URI. It is only used if
346 a file-scheme is used incorrectly with a relative path.
347 forceAbsolute : `bool`, ignored
348 Has no effect for this subclass. ``file`` URIs are always
349 absolute.
350 forceDirectory : `bool`, optional
351 If `True` forces the URI to end with a separator, otherwise given
352 URI is interpreted as is.
354 Returns
355 -------
356 modified : `~urllib.parse.ParseResult`
357 Update result if a URI is being handled.
358 dirLike : `bool`
359 `True` if given parsed URI has a trailing separator or
360 forceDirectory is True. Otherwise `False`.
362 Notes
363 -----
364 Relative paths are explicitly not supported by RFC8089 but `urllib`
365 does accept URIs of the form ``file:relative/path.ext``. They need
366 to be turned into absolute paths before they can be used. This is
367 always done regardless of the ``forceAbsolute`` parameter.
368 """
369 # assume we are not dealing with a directory like URI
370 dirLike = False
372 # file URI implies POSIX path separators so split as POSIX,
373 # then join as os, and convert to abspath. Do not handle
374 # home directories since "file" scheme is explicitly documented
375 # to not do tilde expansion.
376 sep = posixpath.sep
378 # For local file system we can explicitly check to see if this
379 # really is a directory. The URI might point to a location that
380 # does not exists yet but all that matters is if it is a directory
381 # then we make sure use that fact. No need to do the check if
382 # we are already being told.
383 if not forceDirectory and posixpath.isdir(parsed.path):
384 forceDirectory = True
386 # For an absolute path all we need to do is check if we need
387 # to force the directory separator
388 if posixpath.isabs(parsed.path):
389 if forceDirectory:
390 if not parsed.path.endswith(sep):
391 parsed = parsed._replace(path=parsed.path+sep)
392 dirLike = True
393 return copy.copy(parsed), dirLike
395 # Relative path so must fix it to be compliant with the standard
397 # Replacement values for the URI
398 replacements = {}
400 if root is None:
401 root = os.path.abspath(os.path.curdir)
402 elif isinstance(root, ButlerURI):
403 if root.scheme and root.scheme != "file":
404 raise RuntimeError(f"The override root must be a file URI not {root.scheme}")
405 root = os.path.abspath(root.ospath)
407 replacements["path"] = posixpath.normpath(posixpath.join(os2posix(root), parsed.path))
409 # normpath strips trailing "/" so put it back if necessary
410 # Acknowledge that trailing separator exists.
411 if forceDirectory or (parsed.path.endswith(sep) and not replacements["path"].endswith(sep)):
412 replacements["path"] += sep
413 dirLike = True
415 # ParseResult is a NamedTuple so _replace is standard API
416 parsed = parsed._replace(**replacements)
418 if parsed.params or parsed.query:
419 log.warning("Additional items unexpectedly encountered in file URI: %s", parsed.geturl())
421 return parsed, dirLike