Coverage for python/lsst/daf/butler/core/_butlerUri/file.py : 15%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24import os
25import os.path
26import shutil
27import urllib
28import posixpath
29import copy
30import logging
32__all__ = ('ButlerFileURI',)
34from typing import (
35 TYPE_CHECKING,
36 cast,
37 Optional,
38 Tuple,
39 Union,
40)
42from ..utils import safeMakeDir
43from .utils import NoTransaction, os2posix, posix2os
44from ._butlerUri import ButlerURI
47if TYPE_CHECKING: 47 ↛ 48line 47 didn't jump to line 48, because the condition on line 47 was never true
48 from ..datastore import DatastoreTransaction
51log = logging.getLogger(__name__)
54class ButlerFileURI(ButlerURI):
55 """URI for explicit ``file`` scheme."""
57 transferModes = ("copy", "link", "symlink", "hardlink", "relsymlink", "auto", "move")
58 transferDefault: str = "link"
60 @property
61 def ospath(self) -> str:
62 """Path component of the URI localized to current OS.
64 Will unquote URI path since a formal URI must include the quoting.
65 """
66 return urllib.parse.unquote(posix2os(self._uri.path))
68 def exists(self) -> bool:
69 # Uses os.path.exists so if there is a soft link that points
70 # to a file that no longer exists this will return False
71 return os.path.exists(self.ospath)
73 def size(self) -> int:
74 if not os.path.isdir(self.ospath):
75 stat = os.stat(self.ospath)
76 sz = stat.st_size
77 else:
78 sz = 0
79 return sz
81 def remove(self) -> None:
82 """Remove the resource."""
83 os.remove(self.ospath)
85 def as_local(self) -> Tuple[str, bool]:
86 """Return the local path of the file.
88 Returns
89 -------
90 path : `str`
91 The local path to this file.
92 temporary : `bool`
93 Always returns `False` (this is not a temporary file).
94 """
95 return self.ospath, False
97 def _force_to_file(self) -> ButlerFileURI:
98 """Force a schemeless URI to a file URI and returns a new URI.
100 Returns
101 -------
102 file : `ButlerFileURI`
103 A copy of the URI using file scheme. If already a file scheme
104 the copy will be identical.
106 Raises
107 ------
108 ValueError
109 Raised if this URI is schemeless and relative path and so can
110 not be forced to file absolute path without context.
111 """
112 # This is always a file scheme so always return copy
113 return copy.copy(self)
115 def relative_to(self, other: ButlerURI) -> Optional[str]:
116 """Return the relative path from this URI to the other URI.
118 Parameters
119 ----------
120 other : `ButlerURI`
121 URI to use to calculate the relative path. Must be a parent
122 of this URI.
124 Returns
125 -------
126 subpath : `str`
127 The sub path of this URI relative to the supplied other URI.
128 Returns `None` if there is no parent child relationship.
129 Scheme and netloc must match but for file URIs schemeless
130 is also used. If this URI is a relative URI but the other is
131 absolute, it is assumed to be in the parent completely unless it
132 starts with ".." (in which case the path is combined and tested).
133 If both URIs are relative, the relative paths are compared
134 for commonality.
136 Notes
137 -----
138 By definition a relative path will be relative to the enclosing
139 absolute parent URI. It will be returned unchanged if it does not
140 use a parent directory specification.
141 """
142 # We know self is a file so check the other. Anything other than
143 # file or schemeless means by definition these have no paths in common
144 if other.scheme and other.scheme != "file":
145 return None
147 # for case where both URIs are relative use the normal logic
148 # where a/b/c.txt and a/b/ returns c.txt.
149 if not self.isabs() and not other.isabs():
150 return super().relative_to(other)
152 # if we have a relative path convert it to absolute
153 # relative to the supplied parent. This is solely to handle
154 # the case where the relative path includes ".." but somehow
155 # then goes back inside the directory of the parent
156 if not self.isabs():
157 childUri = other.join(self.path)
158 return childUri.relative_to(other)
160 # By this point if the schemes are identical we can use the
161 # base class implementation.
162 if self.scheme == other.scheme:
163 return super().relative_to(other)
165 # if one is schemeless and the other is not the base implementation
166 # will fail so we need to fix that -- they are both absolute so
167 # forcing to file is fine.
168 # Use a cast to convince mypy that other has to be a ButlerFileURI
169 # in order to get to this part of the code.
170 return self._force_to_file().relative_to(cast(ButlerFileURI, other)._force_to_file())
172 def read(self, size: int = -1) -> bytes:
173 # Docstring inherits
174 with open(self.ospath, "rb") as fh:
175 return fh.read(size)
177 def write(self, data: bytes, overwrite: bool = True) -> None:
178 dir = os.path.dirname(self.ospath)
179 if not os.path.exists(dir):
180 safeMakeDir(dir)
181 if overwrite:
182 mode = "wb"
183 else:
184 mode = "xb"
185 with open(self.ospath, mode) as f:
186 f.write(data)
188 def mkdir(self) -> None:
189 if not os.path.exists(self.ospath):
190 safeMakeDir(self.ospath)
191 elif not os.path.isdir(self.ospath):
192 raise FileExistsError(f"URI {self} exists but is not a directory!")
194 def transfer_from(self, src: ButlerURI, transfer: str,
195 overwrite: bool = False,
196 transaction: Optional[Union[DatastoreTransaction, NoTransaction]] = None) -> None:
197 """Transfer the current resource to a local file.
199 Parameters
200 ----------
201 src : `ButlerURI`
202 Source URI.
203 transfer : `str`
204 Mode to use for transferring the resource. Supports the following
205 options: copy, link, symlink, hardlink, relsymlink.
206 overwrite : `bool`, optional
207 Allow an existing file to be overwritten. Defaults to `False`.
208 transaction : `DatastoreTransaction`, optional
209 If a transaction is provided, undo actions will be registered.
210 """
211 # Fail early to prevent delays if remote resources are requested
212 if transfer not in self.transferModes:
213 raise ValueError(f"Transfer mode '{transfer}' not supported by URI scheme {self.scheme}")
215 log.debug(f"Transferring {src} [exists: {src.exists()}] -> "
216 f"{self} [exists: {self.exists()}] (transfer={transfer})")
218 # We do not have to special case ButlerFileURI here because
219 # as_local handles that.
220 local_src, is_temporary = src.as_local()
222 # Default transfer mode depends on whether we have a temporary
223 # file or not.
224 if transfer == "auto":
225 transfer = self.transferDefault if not is_temporary else "copy"
227 # Follow soft links
228 local_src = os.path.realpath(os.path.normpath(local_src))
230 if not os.path.exists(local_src):
231 raise FileNotFoundError(f"Source URI {src} does not exist")
233 # All the modes involving linking use "link" somewhere
234 if "link" in transfer and is_temporary:
235 raise RuntimeError("Can not use local file system transfer mode"
236 f" {transfer} for remote resource ({src})")
238 # For temporary files we can own them
239 requested_transfer = transfer
240 if is_temporary and transfer == "copy":
241 transfer = "move"
243 # The output location should not exist
244 dest_exists = self.exists()
245 if not overwrite and dest_exists:
246 raise FileExistsError(f"Destination path '{self}' already exists. Transfer "
247 f"from {src} cannot be completed.")
249 # Make the path absolute (but don't follow links since that
250 # would possibly cause us to end up in the wrong place if the
251 # file existed already as a soft link)
252 newFullPath = os.path.abspath(self.ospath)
253 outputDir = os.path.dirname(newFullPath)
254 if not os.path.isdir(outputDir):
255 # Must create the directory -- this can not be rolled back
256 # since another transfer running concurrently may
257 # be relying on this existing.
258 safeMakeDir(outputDir)
260 if transaction is None:
261 # Use a no-op transaction to reduce code duplication
262 transaction = NoTransaction()
264 # For links the OS doesn't let us overwrite so if something does
265 # exist we have to remove it before we do the actual "transfer" below
266 if "link" in transfer and overwrite and dest_exists:
267 try:
268 self.remove()
269 except Exception:
270 # If this fails we ignore it since it's a problem
271 # that will manifest immediately below with a more relevant
272 # error message
273 pass
275 if transfer == "move":
276 with transaction.undoWith(f"move from {local_src}", shutil.move, newFullPath, local_src):
277 shutil.move(local_src, newFullPath)
278 elif transfer == "copy":
279 with transaction.undoWith(f"copy from {local_src}", os.remove, newFullPath):
280 shutil.copy(local_src, newFullPath)
281 elif transfer == "link":
282 # Try hard link and if that fails use a symlink
283 with transaction.undoWith(f"link to {local_src}", os.remove, newFullPath):
284 try:
285 os.link(local_src, newFullPath)
286 except OSError:
287 # Read through existing symlinks
288 os.symlink(local_src, newFullPath)
289 elif transfer == "hardlink":
290 with transaction.undoWith(f"hardlink to {local_src}", os.remove, newFullPath):
291 os.link(local_src, newFullPath)
292 elif transfer == "symlink":
293 # Read through existing symlinks
294 with transaction.undoWith(f"symlink to {local_src}", os.remove, newFullPath):
295 os.symlink(local_src, newFullPath)
296 elif transfer == "relsymlink":
297 # This is a standard symlink but using a relative path
298 # Need the directory name to give to relative root
299 # A full file path confuses it into an extra ../
300 newFullPathRoot = os.path.dirname(newFullPath)
301 relPath = os.path.relpath(local_src, newFullPathRoot)
302 with transaction.undoWith(f"relsymlink to {local_src}", os.remove, newFullPath):
303 os.symlink(relPath, newFullPath)
304 else:
305 raise NotImplementedError("Transfer type '{}' not supported.".format(transfer))
307 # This was an explicit move requested from a remote resource
308 # try to remove that resource. We check is_temporary because
309 # the local file would have been moved by shutil.move already.
310 if requested_transfer == "move" and is_temporary:
311 # Transactions do not work here
312 src.remove()
314 if is_temporary and os.path.exists(local_src):
315 # This should never happen since we have moved it above
316 os.remove(local_src)
318 @staticmethod
319 def _fixupPathUri(parsed: urllib.parse.ParseResult, root: Optional[Union[str, ButlerURI]] = None,
320 forceAbsolute: bool = False,
321 forceDirectory: bool = False) -> Tuple[urllib.parse.ParseResult, bool]:
322 """Fix up relative paths in URI instances.
324 Parameters
325 ----------
326 parsed : `~urllib.parse.ParseResult`
327 The result from parsing a URI using `urllib.parse`.
328 root : `str` or `ButlerURI`, optional
329 Path to use as root when converting relative to absolute.
330 If `None`, it will be the current working directory. This
331 is a local file system path, or a file URI. It is only used if
332 a file-scheme is used incorrectly with a relative path.
333 forceAbsolute : `bool`, ignored
334 Has no effect for this subclass. ``file`` URIs are always
335 absolute.
336 forceDirectory : `bool`, optional
337 If `True` forces the URI to end with a separator, otherwise given
338 URI is interpreted as is.
340 Returns
341 -------
342 modified : `~urllib.parse.ParseResult`
343 Update result if a URI is being handled.
344 dirLike : `bool`
345 `True` if given parsed URI has a trailing separator or
346 forceDirectory is True. Otherwise `False`.
348 Notes
349 -----
350 Relative paths are explicitly not supported by RFC8089 but `urllib`
351 does accept URIs of the form ``file:relative/path.ext``. They need
352 to be turned into absolute paths before they can be used. This is
353 always done regardless of the ``forceAbsolute`` parameter.
354 """
355 # assume we are not dealing with a directory like URI
356 dirLike = False
358 # file URI implies POSIX path separators so split as POSIX,
359 # then join as os, and convert to abspath. Do not handle
360 # home directories since "file" scheme is explicitly documented
361 # to not do tilde expansion.
362 sep = posixpath.sep
364 # For local file system we can explicitly check to see if this
365 # really is a directory. The URI might point to a location that
366 # does not exists yet but all that matters is if it is a directory
367 # then we make sure use that fact. No need to do the check if
368 # we are already being told.
369 if not forceDirectory and posixpath.isdir(parsed.path):
370 forceDirectory = True
372 # For an absolute path all we need to do is check if we need
373 # to force the directory separator
374 if posixpath.isabs(parsed.path):
375 if forceDirectory:
376 if not parsed.path.endswith(sep):
377 parsed = parsed._replace(path=parsed.path+sep)
378 dirLike = True
379 return copy.copy(parsed), dirLike
381 # Relative path so must fix it to be compliant with the standard
383 # Replacement values for the URI
384 replacements = {}
386 if root is None:
387 root = os.path.abspath(os.path.curdir)
388 elif isinstance(root, ButlerURI):
389 if root.scheme and root.scheme != "file":
390 raise RuntimeError(f"The override root must be a file URI not {root.scheme}")
391 root = os.path.abspath(root.ospath)
393 replacements["path"] = posixpath.normpath(posixpath.join(os2posix(root), parsed.path))
395 # normpath strips trailing "/" so put it back if necessary
396 # Acknowledge that trailing separator exists.
397 if forceDirectory or (parsed.path.endswith(sep) and not replacements["path"].endswith(sep)):
398 replacements["path"] += sep
399 dirLike = True
401 # ParseResult is a NamedTuple so _replace is standard API
402 parsed = parsed._replace(**replacements)
404 if parsed.params or parsed.query:
405 log.warning("Additional items unexpectedly encountered in file URI: %s", parsed.geturl())
407 return parsed, dirLike