Coverage for python/lsst/daf/butler/core/_butlerUri/file.py : 16%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24import os
25import os.path
26import shutil
27import urllib.parse
28import posixpath
29import copy
30import logging
31import re
33__all__ = ('ButlerFileURI',)
35from typing import (
36 TYPE_CHECKING,
37 Iterator,
38 List,
39 Optional,
40 Tuple,
41 Union,
42)
44from ..utils import safeMakeDir
45from .utils import NoTransaction, os2posix, posix2os
46from ._butlerUri import ButlerURI
49if TYPE_CHECKING: 49 ↛ 50line 49 didn't jump to line 50, because the condition on line 49 was never true
50 from ..datastore import DatastoreTransaction
53log = logging.getLogger(__name__)
56class ButlerFileURI(ButlerURI):
57 """URI for explicit ``file`` scheme."""
59 transferModes = ("copy", "link", "symlink", "hardlink", "relsymlink", "auto", "move")
60 transferDefault: str = "link"
62 # By definition refers to a local file
63 isLocal = True
65 @property
66 def ospath(self) -> str:
67 """Path component of the URI localized to current OS.
69 Will unquote URI path since a formal URI must include the quoting.
70 """
71 return urllib.parse.unquote(posix2os(self._uri.path))
73 def exists(self) -> bool:
74 """Indicate that the file exists."""
75 # Uses os.path.exists so if there is a soft link that points
76 # to a file that no longer exists this will return False
77 return os.path.exists(self.ospath)
79 def size(self) -> int:
80 """Return the size of the file in bytes."""
81 if not os.path.isdir(self.ospath):
82 stat = os.stat(self.ospath)
83 sz = stat.st_size
84 else:
85 sz = 0
86 return sz
88 def remove(self) -> None:
89 """Remove the resource."""
90 os.remove(self.ospath)
92 def _as_local(self) -> Tuple[str, bool]:
93 """Return the local path of the file.
95 This is an internal helper for ``as_local()``.
97 Returns
98 -------
99 path : `str`
100 The local path to this file.
101 temporary : `bool`
102 Always returns `False` (this is not a temporary file).
103 """
104 return self.ospath, False
106 def read(self, size: int = -1) -> bytes:
107 """Return the entire content of the file as bytes."""
108 with open(self.ospath, "rb") as fh:
109 return fh.read(size)
111 def write(self, data: bytes, overwrite: bool = True) -> None:
112 """Write the supplied data to the file."""
113 dir = os.path.dirname(self.ospath)
114 if not os.path.exists(dir):
115 safeMakeDir(dir)
116 if overwrite:
117 mode = "wb"
118 else:
119 mode = "xb"
120 with open(self.ospath, mode) as f:
121 f.write(data)
123 def mkdir(self) -> None:
124 """Make the directory associated with this URI."""
125 if not os.path.exists(self.ospath):
126 safeMakeDir(self.ospath)
127 elif not os.path.isdir(self.ospath):
128 raise FileExistsError(f"URI {self} exists but is not a directory!")
130 def isdir(self) -> bool:
131 """Return whether this URI is a directory.
133 Returns
134 -------
135 isdir : `bool`
136 `True` if this URI is a directory or looks like a directory,
137 else `False`.
138 """
139 return self.dirLike or os.path.isdir(self.ospath)
141 def transfer_from(self, src: ButlerURI, transfer: str,
142 overwrite: bool = False,
143 transaction: Optional[Union[DatastoreTransaction, NoTransaction]] = None) -> None:
144 """Transfer the current resource to a local file.
146 Parameters
147 ----------
148 src : `ButlerURI`
149 Source URI.
150 transfer : `str`
151 Mode to use for transferring the resource. Supports the following
152 options: copy, link, symlink, hardlink, relsymlink.
153 overwrite : `bool`, optional
154 Allow an existing file to be overwritten. Defaults to `False`.
155 transaction : `DatastoreTransaction`, optional
156 If a transaction is provided, undo actions will be registered.
157 """
158 # Fail early to prevent delays if remote resources are requested
159 if transfer not in self.transferModes:
160 raise ValueError(f"Transfer mode '{transfer}' not supported by URI scheme {self.scheme}")
162 # Existence checks can take time so only try if the log message
163 # will be issued.
164 if log.isEnabledFor(logging.DEBUG):
165 log.debug("Transferring %s [exists: %s] -> %s [exists: %s] (transfer=%s)",
166 src, src.exists(), self, self.exists(), transfer)
168 # We do not have to special case ButlerFileURI here because
169 # as_local handles that.
170 with src.as_local() as local_uri:
171 is_temporary = local_uri.isTemporary
172 local_src = local_uri.ospath
174 # Default transfer mode depends on whether we have a temporary
175 # file or not.
176 if transfer == "auto":
177 transfer = self.transferDefault if not is_temporary else "copy"
179 if not os.path.exists(local_src):
180 if is_temporary:
181 msg = f"Local file {local_uri} downloaded from {src} has gone missing"
182 else:
183 msg = f"Source URI {src} does not exist"
184 raise FileNotFoundError(msg)
186 # Follow soft links
187 local_src = os.path.realpath(os.path.normpath(local_src))
189 # All the modes involving linking use "link" somewhere
190 if "link" in transfer and is_temporary:
191 raise RuntimeError("Can not use local file system transfer mode"
192 f" {transfer} for remote resource ({src})")
194 # For temporary files we can own them
195 requested_transfer = transfer
196 if is_temporary and transfer == "copy":
197 transfer = "move"
199 # The output location should not exist
200 dest_exists = self.exists()
201 if not overwrite and dest_exists:
202 raise FileExistsError(f"Destination path '{self}' already exists. Transfer "
203 f"from {src} cannot be completed.")
205 # Make the path absolute (but don't follow links since that
206 # would possibly cause us to end up in the wrong place if the
207 # file existed already as a soft link)
208 newFullPath = os.path.abspath(self.ospath)
209 outputDir = os.path.dirname(newFullPath)
210 if not os.path.isdir(outputDir):
211 # Must create the directory -- this can not be rolled back
212 # since another transfer running concurrently may
213 # be relying on this existing.
214 safeMakeDir(outputDir)
216 if transaction is None:
217 # Use a no-op transaction to reduce code duplication
218 transaction = NoTransaction()
220 # For links the OS doesn't let us overwrite so if something does
221 # exist we have to remove it before we do the actual "transfer"
222 # below
223 if "link" in transfer and overwrite and dest_exists:
224 try:
225 self.remove()
226 except Exception:
227 # If this fails we ignore it since it's a problem
228 # that will manifest immediately below with a more relevant
229 # error message
230 pass
232 if transfer == "move":
233 with transaction.undoWith(f"move from {local_src}", shutil.move, newFullPath, local_src):
234 shutil.move(local_src, newFullPath)
235 elif transfer == "copy":
236 with transaction.undoWith(f"copy from {local_src}", os.remove, newFullPath):
237 shutil.copy(local_src, newFullPath)
238 elif transfer == "link":
239 # Try hard link and if that fails use a symlink
240 with transaction.undoWith(f"link to {local_src}", os.remove, newFullPath):
241 try:
242 os.link(local_src, newFullPath)
243 except OSError:
244 # Read through existing symlinks
245 os.symlink(local_src, newFullPath)
246 elif transfer == "hardlink":
247 with transaction.undoWith(f"hardlink to {local_src}", os.remove, newFullPath):
248 os.link(local_src, newFullPath)
249 elif transfer == "symlink":
250 # Read through existing symlinks
251 with transaction.undoWith(f"symlink to {local_src}", os.remove, newFullPath):
252 os.symlink(local_src, newFullPath)
253 elif transfer == "relsymlink":
254 # This is a standard symlink but using a relative path
255 # Need the directory name to give to relative root
256 # A full file path confuses it into an extra ../
257 newFullPathRoot = os.path.dirname(newFullPath)
258 relPath = os.path.relpath(local_src, newFullPathRoot)
259 with transaction.undoWith(f"relsymlink to {local_src}", os.remove, newFullPath):
260 os.symlink(relPath, newFullPath)
261 else:
262 raise NotImplementedError("Transfer type '{}' not supported.".format(transfer))
264 # This was an explicit move requested from a remote resource
265 # try to remove that remote resource. We check is_temporary because
266 # the local file would have been moved by shutil.move already.
267 if requested_transfer == "move" and is_temporary:
268 # Transactions do not work here
269 src.remove()
271 def walk(self, file_filter: Optional[Union[str, re.Pattern]] = None) -> Iterator[Union[List,
272 Tuple[ButlerURI,
273 List[str],
274 List[str]]]]:
275 """Walk the directory tree returning matching files and directories.
277 Parameters
278 ----------
279 file_filter : `str` or `re.Pattern`, optional
280 Regex to filter out files from the list before it is returned.
282 Yields
283 ------
284 dirpath : `ButlerURI`
285 Current directory being examined.
286 dirnames : `list` of `str`
287 Names of subdirectories within dirpath.
288 filenames : `list` of `str`
289 Names of all the files within dirpath.
290 """
291 if not self.isdir():
292 raise ValueError("Can not walk a non-directory URI")
294 if isinstance(file_filter, str):
295 file_filter = re.compile(file_filter)
297 for root, dirs, files in os.walk(self.ospath):
298 # Filter by the regex
299 if file_filter is not None:
300 files = [f for f in files if file_filter.search(f)]
301 yield type(self)(root, forceAbsolute=False, forceDirectory=True), dirs, files
303 @classmethod
304 def _fixupPathUri(cls, parsed: urllib.parse.ParseResult, root: Optional[Union[str, ButlerURI]] = None,
305 forceAbsolute: bool = False,
306 forceDirectory: bool = False) -> Tuple[urllib.parse.ParseResult, bool]:
307 """Fix up relative paths in URI instances.
309 Parameters
310 ----------
311 parsed : `~urllib.parse.ParseResult`
312 The result from parsing a URI using `urllib.parse`.
313 root : `str` or `ButlerURI`, optional
314 Path to use as root when converting relative to absolute.
315 If `None`, it will be the current working directory. This
316 is a local file system path, or a file URI. It is only used if
317 a file-scheme is used incorrectly with a relative path.
318 forceAbsolute : `bool`, ignored
319 Has no effect for this subclass. ``file`` URIs are always
320 absolute.
321 forceDirectory : `bool`, optional
322 If `True` forces the URI to end with a separator, otherwise given
323 URI is interpreted as is.
325 Returns
326 -------
327 modified : `~urllib.parse.ParseResult`
328 Update result if a URI is being handled.
329 dirLike : `bool`
330 `True` if given parsed URI has a trailing separator or
331 forceDirectory is True. Otherwise `False`.
333 Notes
334 -----
335 Relative paths are explicitly not supported by RFC8089 but `urllib`
336 does accept URIs of the form ``file:relative/path.ext``. They need
337 to be turned into absolute paths before they can be used. This is
338 always done regardless of the ``forceAbsolute`` parameter.
339 """
340 # assume we are not dealing with a directory like URI
341 dirLike = False
343 # file URI implies POSIX path separators so split as POSIX,
344 # then join as os, and convert to abspath. Do not handle
345 # home directories since "file" scheme is explicitly documented
346 # to not do tilde expansion.
347 sep = posixpath.sep
349 # For local file system we can explicitly check to see if this
350 # really is a directory. The URI might point to a location that
351 # does not exists yet but all that matters is if it is a directory
352 # then we make sure use that fact. No need to do the check if
353 # we are already being told.
354 if not forceDirectory and posixpath.isdir(parsed.path):
355 forceDirectory = True
357 # For an absolute path all we need to do is check if we need
358 # to force the directory separator
359 if posixpath.isabs(parsed.path):
360 if forceDirectory:
361 if not parsed.path.endswith(sep):
362 parsed = parsed._replace(path=parsed.path+sep)
363 dirLike = True
364 return copy.copy(parsed), dirLike
366 # Relative path so must fix it to be compliant with the standard
368 # Replacement values for the URI
369 replacements = {}
371 if root is None:
372 root = os.path.abspath(os.path.curdir)
373 elif isinstance(root, ButlerURI):
374 if root.scheme and root.scheme != "file":
375 raise RuntimeError(f"The override root must be a file URI not {root.scheme}")
376 root = os.path.abspath(root.ospath)
378 replacements["path"] = posixpath.normpath(posixpath.join(os2posix(root), parsed.path))
380 # normpath strips trailing "/" so put it back if necessary
381 # Acknowledge that trailing separator exists.
382 if forceDirectory or (parsed.path.endswith(sep) and not replacements["path"].endswith(sep)):
383 replacements["path"] += sep
384 dirLike = True
386 # ParseResult is a NamedTuple so _replace is standard API
387 parsed = parsed._replace(**replacements)
389 if parsed.params or parsed.query:
390 log.warning("Additional items unexpectedly encountered in file URI: %s", parsed.geturl())
392 return parsed, dirLike