Coverage for python/lsst/daf/butler/core/_butlerUri/file.py : 15%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24import os
25import os.path
26import shutil
27import urllib.parse
28import posixpath
29import copy
30import logging
31import re
33__all__ = ('ButlerFileURI',)
35from typing import (
36 TYPE_CHECKING,
37 cast,
38 Iterator,
39 List,
40 Optional,
41 Tuple,
42 Union,
43)
45from ..utils import safeMakeDir
46from .utils import NoTransaction, os2posix, posix2os
47from ._butlerUri import ButlerURI
50if TYPE_CHECKING: 50 ↛ 51line 50 didn't jump to line 51, because the condition on line 50 was never true
51 from ..datastore import DatastoreTransaction
54log = logging.getLogger(__name__)
57class ButlerFileURI(ButlerURI):
58 """URI for explicit ``file`` scheme."""
60 transferModes = ("copy", "link", "symlink", "hardlink", "relsymlink", "auto", "move")
61 transferDefault: str = "link"
63 # By definition refers to a local file
64 isLocal = True
66 @property
67 def ospath(self) -> str:
68 """Path component of the URI localized to current OS.
70 Will unquote URI path since a formal URI must include the quoting.
71 """
72 return urllib.parse.unquote(posix2os(self._uri.path))
74 def exists(self) -> bool:
75 """Indicate that the file exists."""
76 # Uses os.path.exists so if there is a soft link that points
77 # to a file that no longer exists this will return False
78 return os.path.exists(self.ospath)
80 def size(self) -> int:
81 """Return the size of the file in bytes."""
82 if not os.path.isdir(self.ospath):
83 stat = os.stat(self.ospath)
84 sz = stat.st_size
85 else:
86 sz = 0
87 return sz
89 def remove(self) -> None:
90 """Remove the resource."""
91 os.remove(self.ospath)
93 def _as_local(self) -> Tuple[str, bool]:
94 """Return the local path of the file.
96 This is an internal helper for ``as_local()``.
98 Returns
99 -------
100 path : `str`
101 The local path to this file.
102 temporary : `bool`
103 Always returns `False` (this is not a temporary file).
104 """
105 return self.ospath, False
107 def relative_to(self, other: ButlerURI) -> Optional[str]:
108 """Return the relative path from this URI to the other URI.
110 Parameters
111 ----------
112 other : `ButlerURI`
113 URI to use to calculate the relative path. Must be a parent
114 of this URI.
116 Returns
117 -------
118 subpath : `str`
119 The sub path of this URI relative to the supplied other URI.
120 Returns `None` if there is no parent child relationship.
121 Scheme and netloc must match but for file URIs schemeless
122 is also used. If this URI is a relative URI but the other is
123 absolute, it is assumed to be in the parent completely unless it
124 starts with ".." (in which case the path is combined and tested).
125 If both URIs are relative, the relative paths are compared
126 for commonality.
128 Notes
129 -----
130 By definition a relative path will be relative to the enclosing
131 absolute parent URI. It will be returned unchanged if it does not
132 use a parent directory specification.
133 """
134 # We know self is a file so check the other. Anything other than
135 # file or schemeless means by definition these have no paths in common
136 if other.scheme and other.scheme != "file":
137 return None
139 # for case where both URIs are relative use the normal logic
140 # where a/b/c.txt and a/b/ returns c.txt.
141 if not self.isabs() and not other.isabs():
142 return super().relative_to(other)
144 # if we have a relative path convert it to absolute
145 # relative to the supplied parent. This is solely to handle
146 # the case where the relative path includes ".." but somehow
147 # then goes back inside the directory of the parent
148 if not self.isabs():
149 childUri = other.join(self.path)
150 return childUri.relative_to(other)
152 # By this point if the schemes are identical we can use the
153 # base class implementation.
154 if self.scheme == other.scheme:
155 return super().relative_to(other)
157 # if one is schemeless and the other is not the base implementation
158 # will fail so we need to fix that -- they are both absolute so
159 # forcing to file is fine.
160 # Use a cast to convince mypy that other has to be a ButlerFileURI
161 # in order to get to this part of the code.
162 return self.abspath().relative_to(cast(ButlerFileURI, other).abspath())
164 def read(self, size: int = -1) -> bytes:
165 """Return the entire content of the file as bytes."""
166 with open(self.ospath, "rb") as fh:
167 return fh.read(size)
169 def write(self, data: bytes, overwrite: bool = True) -> None:
170 """Write the supplied data to the file."""
171 dir = os.path.dirname(self.ospath)
172 if not os.path.exists(dir):
173 safeMakeDir(dir)
174 if overwrite:
175 mode = "wb"
176 else:
177 mode = "xb"
178 with open(self.ospath, mode) as f:
179 f.write(data)
181 def mkdir(self) -> None:
182 """Make the directory associated with this URI."""
183 if not os.path.exists(self.ospath):
184 safeMakeDir(self.ospath)
185 elif not os.path.isdir(self.ospath):
186 raise FileExistsError(f"URI {self} exists but is not a directory!")
188 def isdir(self) -> bool:
189 """Return whether this URI is a directory.
191 Returns
192 -------
193 isdir : `bool`
194 `True` if this URI is a directory or looks like a directory,
195 else `False`.
196 """
197 return self.dirLike or os.path.isdir(self.ospath)
199 def transfer_from(self, src: ButlerURI, transfer: str,
200 overwrite: bool = False,
201 transaction: Optional[Union[DatastoreTransaction, NoTransaction]] = None) -> None:
202 """Transfer the current resource to a local file.
204 Parameters
205 ----------
206 src : `ButlerURI`
207 Source URI.
208 transfer : `str`
209 Mode to use for transferring the resource. Supports the following
210 options: copy, link, symlink, hardlink, relsymlink.
211 overwrite : `bool`, optional
212 Allow an existing file to be overwritten. Defaults to `False`.
213 transaction : `DatastoreTransaction`, optional
214 If a transaction is provided, undo actions will be registered.
215 """
216 # Fail early to prevent delays if remote resources are requested
217 if transfer not in self.transferModes:
218 raise ValueError(f"Transfer mode '{transfer}' not supported by URI scheme {self.scheme}")
220 log.debug(f"Transferring {src} [exists: {src.exists()}] -> "
221 f"{self} [exists: {self.exists()}] (transfer={transfer})")
223 # We do not have to special case ButlerFileURI here because
224 # as_local handles that.
225 with src.as_local() as local_uri:
226 is_temporary = local_uri.isTemporary
227 local_src = local_uri.ospath
229 # Default transfer mode depends on whether we have a temporary
230 # file or not.
231 if transfer == "auto":
232 transfer = self.transferDefault if not is_temporary else "copy"
234 if not os.path.exists(local_src):
235 if is_temporary:
236 msg = f"Local file {local_uri} downloaded from {src} has gone missing"
237 else:
238 msg = f"Source URI {src} does not exist"
239 raise FileNotFoundError(msg)
241 # Follow soft links
242 local_src = os.path.realpath(os.path.normpath(local_src))
244 # All the modes involving linking use "link" somewhere
245 if "link" in transfer and is_temporary:
246 raise RuntimeError("Can not use local file system transfer mode"
247 f" {transfer} for remote resource ({src})")
249 # For temporary files we can own them
250 requested_transfer = transfer
251 if is_temporary and transfer == "copy":
252 transfer = "move"
254 # The output location should not exist
255 dest_exists = self.exists()
256 if not overwrite and dest_exists:
257 raise FileExistsError(f"Destination path '{self}' already exists. Transfer "
258 f"from {src} cannot be completed.")
260 # Make the path absolute (but don't follow links since that
261 # would possibly cause us to end up in the wrong place if the
262 # file existed already as a soft link)
263 newFullPath = os.path.abspath(self.ospath)
264 outputDir = os.path.dirname(newFullPath)
265 if not os.path.isdir(outputDir):
266 # Must create the directory -- this can not be rolled back
267 # since another transfer running concurrently may
268 # be relying on this existing.
269 safeMakeDir(outputDir)
271 if transaction is None:
272 # Use a no-op transaction to reduce code duplication
273 transaction = NoTransaction()
275 # For links the OS doesn't let us overwrite so if something does
276 # exist we have to remove it before we do the actual "transfer"
277 # below
278 if "link" in transfer and overwrite and dest_exists:
279 try:
280 self.remove()
281 except Exception:
282 # If this fails we ignore it since it's a problem
283 # that will manifest immediately below with a more relevant
284 # error message
285 pass
287 if transfer == "move":
288 with transaction.undoWith(f"move from {local_src}", shutil.move, newFullPath, local_src):
289 shutil.move(local_src, newFullPath)
290 elif transfer == "copy":
291 with transaction.undoWith(f"copy from {local_src}", os.remove, newFullPath):
292 shutil.copy(local_src, newFullPath)
293 elif transfer == "link":
294 # Try hard link and if that fails use a symlink
295 with transaction.undoWith(f"link to {local_src}", os.remove, newFullPath):
296 try:
297 os.link(local_src, newFullPath)
298 except OSError:
299 # Read through existing symlinks
300 os.symlink(local_src, newFullPath)
301 elif transfer == "hardlink":
302 with transaction.undoWith(f"hardlink to {local_src}", os.remove, newFullPath):
303 os.link(local_src, newFullPath)
304 elif transfer == "symlink":
305 # Read through existing symlinks
306 with transaction.undoWith(f"symlink to {local_src}", os.remove, newFullPath):
307 os.symlink(local_src, newFullPath)
308 elif transfer == "relsymlink":
309 # This is a standard symlink but using a relative path
310 # Need the directory name to give to relative root
311 # A full file path confuses it into an extra ../
312 newFullPathRoot = os.path.dirname(newFullPath)
313 relPath = os.path.relpath(local_src, newFullPathRoot)
314 with transaction.undoWith(f"relsymlink to {local_src}", os.remove, newFullPath):
315 os.symlink(relPath, newFullPath)
316 else:
317 raise NotImplementedError("Transfer type '{}' not supported.".format(transfer))
319 # This was an explicit move requested from a remote resource
320 # try to remove that remote resource. We check is_temporary because
321 # the local file would have been moved by shutil.move already.
322 if requested_transfer == "move" and is_temporary:
323 # Transactions do not work here
324 src.remove()
326 def walk(self, file_filter: Optional[Union[str, re.Pattern]] = None) -> Iterator[Union[List,
327 Tuple[ButlerURI,
328 List[str],
329 List[str]]]]:
330 """Walk the directory tree returning matching files and directories.
332 Parameters
333 ----------
334 file_filter : `str` or `re.Pattern`, optional
335 Regex to filter out files from the list before it is returned.
337 Yields
338 ------
339 dirpath : `ButlerURI`
340 Current directory being examined.
341 dirnames : `list` of `str`
342 Names of subdirectories within dirpath.
343 filenames : `list` of `str`
344 Names of all the files within dirpath.
345 """
346 if not self.isdir():
347 raise ValueError("Can not walk a non-directory URI")
349 if isinstance(file_filter, str):
350 file_filter = re.compile(file_filter)
352 for root, dirs, files in os.walk(self.ospath):
353 # Filter by the regex
354 if file_filter is not None:
355 files = [f for f in files if file_filter.search(f)]
356 yield type(self)(root, forceAbsolute=False, forceDirectory=True), dirs, files
358 @classmethod
359 def _fixupPathUri(cls, parsed: urllib.parse.ParseResult, root: Optional[Union[str, ButlerURI]] = None,
360 forceAbsolute: bool = False,
361 forceDirectory: bool = False) -> Tuple[urllib.parse.ParseResult, bool]:
362 """Fix up relative paths in URI instances.
364 Parameters
365 ----------
366 parsed : `~urllib.parse.ParseResult`
367 The result from parsing a URI using `urllib.parse`.
368 root : `str` or `ButlerURI`, optional
369 Path to use as root when converting relative to absolute.
370 If `None`, it will be the current working directory. This
371 is a local file system path, or a file URI. It is only used if
372 a file-scheme is used incorrectly with a relative path.
373 forceAbsolute : `bool`, ignored
374 Has no effect for this subclass. ``file`` URIs are always
375 absolute.
376 forceDirectory : `bool`, optional
377 If `True` forces the URI to end with a separator, otherwise given
378 URI is interpreted as is.
380 Returns
381 -------
382 modified : `~urllib.parse.ParseResult`
383 Update result if a URI is being handled.
384 dirLike : `bool`
385 `True` if given parsed URI has a trailing separator or
386 forceDirectory is True. Otherwise `False`.
388 Notes
389 -----
390 Relative paths are explicitly not supported by RFC8089 but `urllib`
391 does accept URIs of the form ``file:relative/path.ext``. They need
392 to be turned into absolute paths before they can be used. This is
393 always done regardless of the ``forceAbsolute`` parameter.
394 """
395 # assume we are not dealing with a directory like URI
396 dirLike = False
398 # file URI implies POSIX path separators so split as POSIX,
399 # then join as os, and convert to abspath. Do not handle
400 # home directories since "file" scheme is explicitly documented
401 # to not do tilde expansion.
402 sep = posixpath.sep
404 # For local file system we can explicitly check to see if this
405 # really is a directory. The URI might point to a location that
406 # does not exists yet but all that matters is if it is a directory
407 # then we make sure use that fact. No need to do the check if
408 # we are already being told.
409 if not forceDirectory and posixpath.isdir(parsed.path):
410 forceDirectory = True
412 # For an absolute path all we need to do is check if we need
413 # to force the directory separator
414 if posixpath.isabs(parsed.path):
415 if forceDirectory:
416 if not parsed.path.endswith(sep):
417 parsed = parsed._replace(path=parsed.path+sep)
418 dirLike = True
419 return copy.copy(parsed), dirLike
421 # Relative path so must fix it to be compliant with the standard
423 # Replacement values for the URI
424 replacements = {}
426 if root is None:
427 root = os.path.abspath(os.path.curdir)
428 elif isinstance(root, ButlerURI):
429 if root.scheme and root.scheme != "file":
430 raise RuntimeError(f"The override root must be a file URI not {root.scheme}")
431 root = os.path.abspath(root.ospath)
433 replacements["path"] = posixpath.normpath(posixpath.join(os2posix(root), parsed.path))
435 # normpath strips trailing "/" so put it back if necessary
436 # Acknowledge that trailing separator exists.
437 if forceDirectory or (parsed.path.endswith(sep) and not replacements["path"].endswith(sep)):
438 replacements["path"] += sep
439 dirLike = True
441 # ParseResult is a NamedTuple so _replace is standard API
442 parsed = parsed._replace(**replacements)
444 if parsed.params or parsed.query:
445 log.warning("Additional items unexpectedly encountered in file URI: %s", parsed.geturl())
447 return parsed, dirLike