Coverage for python/lsst/daf/butler/core/_butlerUri.py : 16%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("ButlerURI",)
26import contextlib
27import os
28import os.path
29import shutil
30import urllib
31import pkg_resources
32import posixpath
33from pathlib import Path, PurePath, PurePosixPath
34import requests
35import tempfile
36import copy
37import logging
38import re
40from typing import (
41 TYPE_CHECKING,
42 Any,
43 Callable,
44 cast,
45 Iterator,
46 Optional,
47 Tuple,
48 Type,
49 Union,
50)
52from .utils import safeMakeDir
54if TYPE_CHECKING: 54 ↛ 55line 54 didn't jump to line 55, because the condition on line 54 was never true
55 try:
56 import boto3
57 except ImportError:
58 pass
59 from .datastore import DatastoreTransaction
62log = logging.getLogger(__name__)
64# Determine if the path separator for the OS looks like POSIX
65IS_POSIX = os.sep == posixpath.sep
67# Root path for this operating system
68OS_ROOT_PATH = Path().resolve().root
70# Regex for looking for URI escapes
71ESCAPES_RE = re.compile(r"%[A-F0-9]{2}")
74def os2posix(ospath: str) -> str:
75 """Convert a local path description to a POSIX path description.
77 Parameters
78 ----------
79 ospath : `str`
80 Path using the local path separator.
82 Returns
83 -------
84 posix : `str`
85 Path using POSIX path separator
86 """
87 if IS_POSIX:
88 return ospath
90 posix = PurePath(ospath).as_posix()
92 # PurePath strips trailing "/" from paths such that you can no
93 # longer tell if a path is meant to be referring to a directory
94 # Try to fix this.
95 if ospath.endswith(os.sep) and not posix.endswith(posixpath.sep):
96 posix += posixpath.sep
98 return posix
101def posix2os(posix: Union[PurePath, str]) -> str:
102 """Convert a POSIX path description to a local path description.
104 Parameters
105 ----------
106 posix : `str`, `PurePath`
107 Path using the POSIX path separator.
109 Returns
110 -------
111 ospath : `str`
112 Path using OS path separator
113 """
114 if IS_POSIX:
115 return str(posix)
117 posixPath = PurePosixPath(posix)
118 paths = list(posixPath.parts)
120 # Have to convert the root directory after splitting
121 if paths[0] == posixPath.root:
122 paths[0] = OS_ROOT_PATH
124 # Trailing "/" is stripped so we need to add back an empty path
125 # for consistency
126 if str(posix).endswith(posixpath.sep):
127 paths.append("")
129 return os.path.join(*paths)
132class NoTransaction:
133 """A simple emulation of the `DatastoreTransaction` class.
135 Does nothing.
136 """
138 def __init__(self) -> None:
139 return
141 @contextlib.contextmanager
142 def undoWith(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> Iterator[None]:
143 """No-op context manager to replace `DatastoreTransaction`
144 """
145 yield None
148class ButlerURI:
149 """Convenience wrapper around URI parsers.
151 Provides access to URI components and can convert file
152 paths into absolute path URIs. Scheme-less URIs are treated as if
153 they are local file system paths and are converted to absolute URIs.
155 A specialist subclass is created for each supported URI scheme.
157 Parameters
158 ----------
159 uri : `str` or `urllib.parse.ParseResult`
160 URI in string form. Can be scheme-less if referring to a local
161 filesystem path.
162 root : `str` or `ButlerURI`, optional
163 When fixing up a relative path in a ``file`` scheme or if scheme-less,
164 use this as the root. Must be absolute. If `None` the current
165 working directory will be used. Can be a file URI.
166 forceAbsolute : `bool`, optional
167 If `True`, scheme-less relative URI will be converted to an absolute
168 path using a ``file`` scheme. If `False` scheme-less URI will remain
169 scheme-less and will not be updated to ``file`` or absolute path.
170 forceDirectory: `bool`, optional
171 If `True` forces the URI to end with a separator, otherwise given URI
172 is interpreted as is.
173 """
175 _pathLib: Type[PurePath] = PurePosixPath
176 """Path library to use for this scheme."""
178 _pathModule = posixpath
179 """Path module to use for this scheme."""
181 transferModes: Tuple[str, ...] = ("copy", "auto", "move")
182 """Transfer modes supported by this implementation.
184 Move is special in that it is generally a copy followed by an unlink.
185 Whether that unlink works depends critically on whether the source URI
186 implements unlink. If it does not the move will be reported as a failure.
187 """
189 transferDefault: str = "copy"
190 """Default mode to use for transferring if ``auto`` is specified."""
192 quotePaths = True
193 """True if path-like elements modifying a URI should be quoted.
195 All non-schemeless URIs have to internally use quoted paths. Therefore
196 if a new file name is given (e.g. to updateFile or join) a decision must
197 be made whether to quote it to be consistent.
198 """
200 # This is not an ABC with abstract methods because the __new__ being
201 # a factory confuses mypy such that it assumes that every constructor
202 # returns a ButlerURI and then determines that all the abstract methods
203 # are still abstract. If they are not marked abstract but just raise
204 # mypy is fine with it.
206 # mypy is confused without this
207 _uri: urllib.parse.ParseResult
209 def __new__(cls, uri: Union[str, urllib.parse.ParseResult, ButlerURI],
210 root: Optional[Union[str, ButlerURI]] = None, forceAbsolute: bool = True,
211 forceDirectory: bool = False) -> ButlerURI:
212 parsed: urllib.parse.ParseResult
213 dirLike: bool
214 subclass: Optional[Type] = None
216 # Record if we need to post process the URI components
217 # or if the instance is already fully configured
218 if isinstance(uri, str):
219 # Since local file names can have special characters in them
220 # we need to quote them for the parser but we can unquote
221 # later. Assume that all other URI schemes are quoted.
222 # Since sometimes people write file:/a/b and not file:///a/b
223 # we should not quote in the explicit case of file:
224 if "://" not in uri and not uri.startswith("file:"):
225 if ESCAPES_RE.search(uri):
226 log.warning("Possible double encoding of %s", uri)
227 else:
228 uri = urllib.parse.quote(uri)
229 parsed = urllib.parse.urlparse(uri)
230 elif isinstance(uri, urllib.parse.ParseResult):
231 parsed = copy.copy(uri)
232 elif isinstance(uri, ButlerURI):
233 parsed = copy.copy(uri._uri)
234 dirLike = uri.dirLike
235 # No further parsing required and we know the subclass
236 subclass = type(uri)
237 else:
238 raise ValueError(f"Supplied URI must be string, ButlerURI, or ParseResult but got '{uri!r}'")
240 if subclass is None:
241 # Work out the subclass from the URI scheme
242 if not parsed.scheme:
243 subclass = ButlerSchemelessURI
244 elif parsed.scheme == "file":
245 subclass = ButlerFileURI
246 elif parsed.scheme == "s3":
247 subclass = ButlerS3URI
248 elif parsed.scheme.startswith("http"):
249 subclass = ButlerHttpURI
250 elif parsed.scheme == "resource":
251 # Rules for scheme names disasllow pkg_resource
252 subclass = ButlerPackageResourceURI
253 elif parsed.scheme == "mem":
254 # in-memory datastore object
255 subclass = ButlerInMemoryURI
256 else:
257 raise NotImplementedError(f"No URI support for scheme: '{parsed.scheme}'"
258 " in {parsed.geturl()}")
260 parsed, dirLike = subclass._fixupPathUri(parsed, root=root,
261 forceAbsolute=forceAbsolute,
262 forceDirectory=forceDirectory)
264 # It is possible for the class to change from schemeless
265 # to file so handle that
266 if parsed.scheme == "file":
267 subclass = ButlerFileURI
269 # Now create an instance of the correct subclass and set the
270 # attributes directly
271 self = object.__new__(subclass)
272 self._uri = parsed
273 self.dirLike = dirLike
274 return self
276 @property
277 def scheme(self) -> str:
278 """The URI scheme (``://`` is not part of the scheme)."""
279 return self._uri.scheme
281 @property
282 def netloc(self) -> str:
283 """The URI network location."""
284 return self._uri.netloc
286 @property
287 def path(self) -> str:
288 """The path component of the URI."""
289 return self._uri.path
291 @property
292 def unquoted_path(self) -> str:
293 """The path component of the URI with any URI quoting reversed."""
294 return urllib.parse.unquote(self._uri.path)
296 @property
297 def ospath(self) -> str:
298 """Path component of the URI localized to current OS."""
299 raise AttributeError(f"Non-file URI ({self}) has no local OS path.")
301 @property
302 def relativeToPathRoot(self) -> str:
303 """Returns path relative to network location.
305 Effectively, this is the path property with posix separator stripped
306 from the left hand side of the path.
308 Always unquotes.
309 """
310 p = self._pathLib(self.path)
311 relToRoot = str(p.relative_to(p.root))
312 if self.dirLike and not relToRoot.endswith("/"):
313 relToRoot += "/"
314 return urllib.parse.unquote(relToRoot)
316 @property
317 def is_root(self) -> bool:
318 """`True` if this URI points to the root of the network location.
320 This means that the path components refers to the top level.
321 """
322 relpath = self.relativeToPathRoot
323 if relpath == "./":
324 return True
325 return False
327 @property
328 def fragment(self) -> str:
329 """The fragment component of the URI."""
330 return self._uri.fragment
332 @property
333 def params(self) -> str:
334 """Any parameters included in the URI."""
335 return self._uri.params
337 @property
338 def query(self) -> str:
339 """Any query strings included in the URI."""
340 return self._uri.query
342 def geturl(self) -> str:
343 """Return the URI in string form.
345 Returns
346 -------
347 url : `str`
348 String form of URI.
349 """
350 return self._uri.geturl()
352 def split(self) -> Tuple[ButlerURI, str]:
353 """Splits URI into head and tail. Equivalent to os.path.split where
354 head preserves the URI components.
356 Returns
357 -------
358 head: `ButlerURI`
359 Everything leading up to tail, expanded and normalized as per
360 ButlerURI rules.
361 tail : `str`
362 Last `self.path` component. Tail will be empty if path ends on a
363 separator. Tail will never contain separators. It will be
364 unquoted.
365 """
366 head, tail = self._pathModule.split(self.path)
367 headuri = self._uri._replace(path=head)
369 # The file part should never include quoted metacharacters
370 tail = urllib.parse.unquote(tail)
372 # Schemeless is special in that it can be a relative path
373 # We need to ensure that it stays that way. All other URIs will
374 # be absolute already.
375 forceAbsolute = self._pathModule.isabs(self.path)
376 return ButlerURI(headuri, forceDirectory=True, forceAbsolute=forceAbsolute), tail
378 def basename(self) -> str:
379 """Returns the base name, last element of path, of the URI. If URI ends
380 on a slash returns an empty string. This is the second element returned
381 by split().
383 Equivalent of os.path.basename().
385 Returns
386 -------
387 tail : `str`
388 Last part of the path attribute. Trail will be empty if path ends
389 on a separator.
390 """
391 return self.split()[1]
393 def dirname(self) -> ButlerURI:
394 """Returns a ButlerURI containing all the directories of the path
395 attribute.
397 Equivalent of os.path.dirname()
399 Returns
400 -------
401 head : `ButlerURI`
402 Everything except the tail of path attribute, expanded and
403 normalized as per ButlerURI rules.
404 """
405 return self.split()[0]
407 def parent(self) -> ButlerURI:
408 """Returns a ButlerURI containing all the directories of the path
409 attribute, minus the last one.
411 Returns
412 -------
413 head : `ButlerURI`
414 Everything except the tail of path attribute, expanded and
415 normalized as per ButlerURI rules.
416 """
417 # When self is file-like, return self.dirname()
418 if not self.dirLike:
419 return self.dirname()
420 # When self is dir-like, return its parent directory,
421 # regardless of the presence of a trailing separator
422 originalPath = self._pathLib(self.path)
423 parentPath = originalPath.parent
424 parentURI = self._uri._replace(path=str(parentPath))
426 return ButlerURI(parentURI, forceDirectory=True)
428 def replace(self, **kwargs: Any) -> ButlerURI:
429 """Replace components in a URI with new values and return a new
430 instance.
432 Returns
433 -------
434 new : `ButlerURI`
435 New `ButlerURI` object with updated values.
436 """
437 return self.__class__(self._uri._replace(**kwargs))
439 def updateFile(self, newfile: str) -> None:
440 """Update in place the final component of the path with the supplied
441 file name.
443 Parameters
444 ----------
445 newfile : `str`
446 File name with no path component.
448 Notes
449 -----
450 Updates the URI in place.
451 Updates the ButlerURI.dirLike attribute. The new file path will
452 be quoted if necessary.
453 """
454 if self.quotePaths:
455 newfile = urllib.parse.quote(newfile)
456 dir, _ = self._pathModule.split(self.path)
457 newpath = self._pathModule.join(dir, newfile)
459 self.dirLike = False
460 self._uri = self._uri._replace(path=newpath)
462 def getExtension(self) -> str:
463 """Return the file extension(s) associated with this URI path.
465 Returns
466 -------
467 ext : `str`
468 The file extension (including the ``.``). Can be empty string
469 if there is no file extension. Usually returns only the last
470 file extension unless there is a special extension modifier
471 indicating file compression, in which case the combined
472 extension (e.g. ``.fits.gz``) will be returned.
473 """
474 special = {".gz", ".bz2", ".xz", ".fz"}
476 extensions = self._pathLib(self.path).suffixes
478 if not extensions:
479 return ""
481 ext = extensions.pop()
483 # Multiple extensions, decide whether to include the final two
484 if extensions and ext in special:
485 ext = f"{extensions[-1]}{ext}"
487 return ext
489 def join(self, path: str) -> ButlerURI:
490 """Create a new `ButlerURI` with additional path components including
491 a file.
493 Parameters
494 ----------
495 path : `str`
496 Additional file components to append to the current URI. Assumed
497 to include a file at the end. Will be quoted depending on the
498 associated URI scheme.
500 Returns
501 -------
502 new : `ButlerURI`
503 New URI with any file at the end replaced with the new path
504 components.
506 Notes
507 -----
508 Schemeless URIs assume local path separator but all other URIs assume
509 POSIX separator if the supplied path has directory structure. It
510 may be this never becomes a problem but datastore templates assume
511 POSIX separator is being used.
512 """
513 new = self.dirname() # By definition a directory URI
515 # new should be asked about quoting, not self, since dirname can
516 # change the URI scheme for schemeless -> file
517 if new.quotePaths:
518 path = urllib.parse.quote(path)
520 newpath = self._pathModule.normpath(self._pathModule.join(new.path, path))
521 new._uri = new._uri._replace(path=newpath)
522 # Declare the new URI not be dirLike unless path ended in /
523 if not path.endswith(self._pathModule.sep):
524 new.dirLike = False
525 return new
527 def relative_to(self, other: ButlerURI) -> Optional[str]:
528 """Return the relative path from this URI to the other URI.
530 Parameters
531 ----------
532 other : `ButlerURI`
533 URI to use to calculate the relative path. Must be a parent
534 of this URI.
536 Returns
537 -------
538 subpath : `str`
539 The sub path of this URI relative to the supplied other URI.
540 Returns `None` if there is no parent child relationship.
541 Scheme and netloc must match.
542 """
543 if self.scheme != other.scheme or self.netloc != other.netloc:
544 return None
546 enclosed_path = self._pathLib(self.relativeToPathRoot)
547 parent_path = other.relativeToPathRoot
548 subpath: Optional[str]
549 try:
550 subpath = str(enclosed_path.relative_to(parent_path))
551 except ValueError:
552 subpath = None
553 else:
554 subpath = urllib.parse.unquote(subpath)
555 return subpath
557 def exists(self) -> bool:
558 """Indicate that the resource is available.
560 Returns
561 -------
562 exists : `bool`
563 `True` if the resource exists.
564 """
565 raise NotImplementedError()
567 def remove(self) -> None:
568 """Remove the resource."""
569 raise NotImplementedError()
571 def isabs(self) -> bool:
572 """Indicate that the resource is fully specified.
574 For non-schemeless URIs this is always true.
576 Returns
577 -------
578 isabs : `bool`
579 `True` in all cases except schemeless URI.
580 """
581 return True
583 def as_local(self) -> Tuple[str, bool]:
584 """Return the location of the (possibly remote) resource in the
585 local file system.
587 Returns
588 -------
589 path : `str`
590 If this is a remote resource, it will be a copy of the resource
591 on the local file system, probably in a temporary directory.
592 For a local resource this should be the actual path to the
593 resource.
594 is_temporary : `bool`
595 Indicates if the local path is a temporary file or not.
596 """
597 raise NotImplementedError()
599 def read(self, size: int = -1) -> bytes:
600 """Open the resource and return the contents in bytes.
602 Parameters
603 ----------
604 size : `int`, optional
605 The number of bytes to read. Negative or omitted indicates
606 that all data should be read.
607 """
608 raise NotImplementedError()
610 def write(self, data: bytes, overwrite: bool = True) -> None:
611 """Write the supplied bytes to the new resource.
613 Parameters
614 ----------
615 data : `bytes`
616 The bytes to write to the resource. The entire contents of the
617 resource will be replaced.
618 overwrite : `bool`, optional
619 If `True` the resource will be overwritten if it exists. Otherwise
620 the write will fail.
621 """
622 raise NotImplementedError()
624 def mkdir(self) -> None:
625 """For a dir-like URI, create the directory resource if it does not
626 already exist.
627 """
628 raise NotImplementedError()
630 def size(self) -> int:
631 """For non-dir-like URI, return the size of the resource.
633 Returns
634 -------
635 sz : `int`
636 The size in bytes of the resource associated with this URI.
637 Returns 0 if dir-like.
638 """
639 raise NotImplementedError()
641 def __str__(self) -> str:
642 return self.geturl()
644 def __repr__(self) -> str:
645 return f'ButlerURI("{self.geturl()}")'
647 def __eq__(self, other: Any) -> bool:
648 if not isinstance(other, ButlerURI):
649 return False
650 return self.geturl() == other.geturl()
652 def __copy__(self) -> ButlerURI:
653 # Implement here because the __new__ method confuses things
654 return type(self)(str(self))
656 def __deepcopy__(self, memo: Any) -> ButlerURI:
657 # Implement here because the __new__ method confuses things
658 return self.__copy__()
660 def __getnewargs__(self) -> Tuple:
661 return (str(self),)
663 @staticmethod
664 def _fixupPathUri(parsed: urllib.parse.ParseResult, root: Optional[Union[str, ButlerURI]] = None,
665 forceAbsolute: bool = False,
666 forceDirectory: bool = False) -> Tuple[urllib.parse.ParseResult, bool]:
667 """Correct any issues with the supplied URI.
669 Parameters
670 ----------
671 parsed : `~urllib.parse.ParseResult`
672 The result from parsing a URI using `urllib.parse`.
673 root : `str` or `ButlerURI`, ignored
674 Not used by the this implementation since all URIs are
675 absolute except for those representing the local file system.
676 forceAbsolute : `bool`, ignored.
677 Not used by this implementation. URIs are generally always
678 absolute.
679 forceDirectory : `bool`, optional
680 If `True` forces the URI to end with a separator, otherwise given
681 URI is interpreted as is. Specifying that the URI is conceptually
682 equivalent to a directory can break some ambiguities when
683 interpreting the last element of a path.
685 Returns
686 -------
687 modified : `~urllib.parse.ParseResult`
688 Update result if a URI is being handled.
689 dirLike : `bool`
690 `True` if given parsed URI has a trailing separator or
691 forceDirectory is True. Otherwise `False`.
693 Notes
694 -----
695 Relative paths are explicitly not supported by RFC8089 but `urllib`
696 does accept URIs of the form ``file:relative/path.ext``. They need
697 to be turned into absolute paths before they can be used. This is
698 always done regardless of the ``forceAbsolute`` parameter.
700 AWS S3 differentiates between keys with trailing POSIX separators (i.e
701 `/dir` and `/dir/`) whereas POSIX does not neccessarily.
703 Scheme-less paths are normalized.
704 """
705 # assume we are not dealing with a directory like URI
706 dirLike = False
708 # URI is dir-like if explicitly stated or if it ends on a separator
709 endsOnSep = parsed.path.endswith(posixpath.sep)
710 if forceDirectory or endsOnSep:
711 dirLike = True
712 # only add the separator if it's not already there
713 if not endsOnSep:
714 parsed = parsed._replace(path=parsed.path+posixpath.sep)
716 return parsed, dirLike
718 def transfer_from(self, src: ButlerURI, transfer: str,
719 overwrite: bool = False,
720 transaction: Optional[Union[DatastoreTransaction, NoTransaction]] = None) -> None:
721 """Transfer the current resource to a new location.
723 Parameters
724 ----------
725 src : `ButlerURI`
726 Source URI.
727 transfer : `str`
728 Mode to use for transferring the resource. Generically there are
729 many standard options: copy, link, symlink, hardlink, relsymlink.
730 Not all URIs support all modes.
731 overwrite : `bool`, optional
732 Allow an existing file to be overwritten. Defaults to `False`.
733 transaction : `DatastoreTransaction`, optional
734 A transaction object that can (depending on implementation)
735 rollback transfers on error. Not guaranteed to be implemented.
737 Notes
738 -----
739 Conceptually this is hard to scale as the number of URI schemes
740 grow. The destination URI is more important than the source URI
741 since that is where all the transfer modes are relevant (with the
742 complication that "move" deletes the source).
744 Local file to local file is the fundamental use case but every
745 other scheme has to support "copy" to local file (with implicit
746 support for "move") and copy from local file.
747 All the "link" options tend to be specific to local file systems.
749 "move" is a "copy" where the remote resource is deleted at the end.
750 Whether this works depends on the source URI rather than the
751 destination URI. Reverting a move on transaction rollback is
752 expected to be problematic if a remote resource was involved.
753 """
754 raise NotImplementedError(f"No transfer modes supported by URI scheme {self.scheme}")
757class ButlerFileURI(ButlerURI):
758 """URI for explicit ``file`` scheme."""
760 transferModes = ("copy", "link", "symlink", "hardlink", "relsymlink", "auto", "move")
761 transferDefault: str = "link"
763 @property
764 def ospath(self) -> str:
765 """Path component of the URI localized to current OS.
767 Will unquote URI path since a formal URI must include the quoting.
768 """
769 return urllib.parse.unquote(posix2os(self._uri.path))
771 def exists(self) -> bool:
772 # Uses os.path.exists so if there is a soft link that points
773 # to a file that no longer exists this will return False
774 return os.path.exists(self.ospath)
776 def size(self) -> int:
777 if not os.path.isdir(self.ospath):
778 stat = os.stat(self.ospath)
779 sz = stat.st_size
780 else:
781 sz = 0
782 return sz
784 def remove(self) -> None:
785 """Remove the resource."""
786 os.remove(self.ospath)
788 def as_local(self) -> Tuple[str, bool]:
789 """Return the local path of the file.
791 Returns
792 -------
793 path : `str`
794 The local path to this file.
795 temporary : `bool`
796 Always returns `False` (this is not a temporary file).
797 """
798 return self.ospath, False
800 def _force_to_file(self) -> ButlerFileURI:
801 """Force a schemeless URI to a file URI and returns a new URI.
803 Returns
804 -------
805 file : `ButlerFileURI`
806 A copy of the URI using file scheme. If already a file scheme
807 the copy will be identical.
809 Raises
810 ------
811 ValueError
812 Raised if this URI is schemeless and relative path and so can
813 not be forced to file absolute path without context.
814 """
815 # This is always a file scheme so always return copy
816 return copy.copy(self)
818 def relative_to(self, other: ButlerURI) -> Optional[str]:
819 """Return the relative path from this URI to the other URI.
821 Parameters
822 ----------
823 other : `ButlerURI`
824 URI to use to calculate the relative path. Must be a parent
825 of this URI.
827 Returns
828 -------
829 subpath : `str`
830 The sub path of this URI relative to the supplied other URI.
831 Returns `None` if there is no parent child relationship.
832 Scheme and netloc must match but for file URIs schemeless
833 is also used. If this URI is a relative URI but the other is
834 absolute, it is assumed to be in the parent completely unless it
835 starts with ".." (in which case the path is combined and tested).
836 If both URIs are relative, the relative paths are compared
837 for commonality.
839 Notes
840 -----
841 By definition a relative path will be relative to the enclosing
842 absolute parent URI. It will be returned unchanged if it does not
843 use a parent directory specification.
844 """
845 # We know self is a file so check the other. Anything other than
846 # file or schemeless means by definition these have no paths in common
847 if other.scheme and other.scheme != "file":
848 return None
850 # for case where both URIs are relative use the normal logic
851 # where a/b/c.txt and a/b/ returns c.txt.
852 if not self.isabs() and not other.isabs():
853 return super().relative_to(other)
855 # if we have a relative path convert it to absolute
856 # relative to the supplied parent. This is solely to handle
857 # the case where the relative path includes ".." but somehow
858 # then goes back inside the directory of the parent
859 if not self.isabs():
860 childUri = other.join(self.path)
861 return childUri.relative_to(other)
863 # By this point if the schemes are identical we can use the
864 # base class implementation.
865 if self.scheme == other.scheme:
866 return super().relative_to(other)
868 # if one is schemeless and the other is not the base implementation
869 # will fail so we need to fix that -- they are both absolute so
870 # forcing to file is fine.
871 # Use a cast to convince mypy that other has to be a ButlerFileURI
872 # in order to get to this part of the code.
873 return self._force_to_file().relative_to(cast(ButlerFileURI, other)._force_to_file())
875 def read(self, size: int = -1) -> bytes:
876 # Docstring inherits
877 with open(self.ospath, "rb") as fh:
878 return fh.read(size)
880 def write(self, data: bytes, overwrite: bool = True) -> None:
881 dir = os.path.dirname(self.ospath)
882 if not os.path.exists(dir):
883 safeMakeDir(dir)
884 if overwrite:
885 mode = "wb"
886 else:
887 mode = "xb"
888 with open(self.ospath, mode) as f:
889 f.write(data)
891 def mkdir(self) -> None:
892 if not os.path.exists(self.ospath):
893 safeMakeDir(self.ospath)
894 elif not os.path.isdir(self.ospath):
895 raise FileExistsError(f"URI {self} exists but is not a directory!")
897 def transfer_from(self, src: ButlerURI, transfer: str,
898 overwrite: bool = False,
899 transaction: Optional[Union[DatastoreTransaction, NoTransaction]] = None) -> None:
900 """Transfer the current resource to a local file.
902 Parameters
903 ----------
904 src : `ButlerURI`
905 Source URI.
906 transfer : `str`
907 Mode to use for transferring the resource. Supports the following
908 options: copy, link, symlink, hardlink, relsymlink.
909 overwrite : `bool`, optional
910 Allow an existing file to be overwritten. Defaults to `False`.
911 transaction : `DatastoreTransaction`, optional
912 If a transaction is provided, undo actions will be registered.
913 """
914 # Fail early to prevent delays if remote resources are requested
915 if transfer not in self.transferModes:
916 raise ValueError(f"Transfer mode '{transfer}' not supported by URI scheme {self.scheme}")
918 log.debug(f"Transferring {src} [exists: {src.exists()}] -> "
919 f"{self} [exists: {self.exists()}] (transfer={transfer})")
921 # We do not have to special case ButlerFileURI here because
922 # as_local handles that.
923 local_src, is_temporary = src.as_local()
925 # Default transfer mode depends on whether we have a temporary
926 # file or not.
927 if transfer == "auto":
928 transfer = self.transferDefault if not is_temporary else "copy"
930 # Follow soft links
931 local_src = os.path.realpath(os.path.normpath(local_src))
933 if not os.path.exists(local_src):
934 raise FileNotFoundError(f"Source URI {src} does not exist")
936 # All the modes involving linking use "link" somewhere
937 if "link" in transfer and is_temporary:
938 raise RuntimeError("Can not use local file system transfer mode"
939 f" {transfer} for remote resource ({src})")
941 # For temporary files we can own them
942 requested_transfer = transfer
943 if is_temporary and transfer == "copy":
944 transfer = "move"
946 # The output location should not exist
947 dest_exists = self.exists()
948 if not overwrite and dest_exists:
949 raise FileExistsError(f"Destination path '{self}' already exists. Transfer "
950 f"from {src} cannot be completed.")
952 # Make the path absolute (but don't follow links since that
953 # would possibly cause us to end up in the wrong place if the
954 # file existed already as a soft link)
955 newFullPath = os.path.abspath(self.ospath)
956 outputDir = os.path.dirname(newFullPath)
957 if not os.path.isdir(outputDir):
958 # Must create the directory -- this can not be rolled back
959 # since another transfer running concurrently may
960 # be relying on this existing.
961 safeMakeDir(outputDir)
963 if transaction is None:
964 # Use a no-op transaction to reduce code duplication
965 transaction = NoTransaction()
967 # For links the OS doesn't let us overwrite so if something does
968 # exist we have to remove it before we do the actual "transfer" below
969 if "link" in transfer and overwrite and dest_exists:
970 try:
971 self.remove()
972 except Exception:
973 # If this fails we ignore it since it's a problem
974 # that will manifest immediately below with a more relevant
975 # error message
976 pass
978 if transfer == "move":
979 with transaction.undoWith(f"move from {local_src}", shutil.move, newFullPath, local_src):
980 shutil.move(local_src, newFullPath)
981 elif transfer == "copy":
982 with transaction.undoWith(f"copy from {local_src}", os.remove, newFullPath):
983 shutil.copy(local_src, newFullPath)
984 elif transfer == "link":
985 # Try hard link and if that fails use a symlink
986 with transaction.undoWith(f"link to {local_src}", os.remove, newFullPath):
987 try:
988 os.link(local_src, newFullPath)
989 except OSError:
990 # Read through existing symlinks
991 os.symlink(local_src, newFullPath)
992 elif transfer == "hardlink":
993 with transaction.undoWith(f"hardlink to {local_src}", os.remove, newFullPath):
994 os.link(local_src, newFullPath)
995 elif transfer == "symlink":
996 # Read through existing symlinks
997 with transaction.undoWith(f"symlink to {local_src}", os.remove, newFullPath):
998 os.symlink(local_src, newFullPath)
999 elif transfer == "relsymlink":
1000 # This is a standard symlink but using a relative path
1001 # Need the directory name to give to relative root
1002 # A full file path confuses it into an extra ../
1003 newFullPathRoot = os.path.dirname(newFullPath)
1004 relPath = os.path.relpath(local_src, newFullPathRoot)
1005 with transaction.undoWith(f"relsymlink to {local_src}", os.remove, newFullPath):
1006 os.symlink(relPath, newFullPath)
1007 else:
1008 raise NotImplementedError("Transfer type '{}' not supported.".format(transfer))
1010 # This was an explicit move requested from a remote resource
1011 # try to remove that resource. We check is_temporary because
1012 # the local file would have been moved by shutil.move already.
1013 if requested_transfer == "move" and is_temporary:
1014 # Transactions do not work here
1015 src.remove()
1017 if is_temporary and os.path.exists(local_src):
1018 # This should never happen since we have moved it above
1019 os.remove(local_src)
1021 @staticmethod
1022 def _fixupPathUri(parsed: urllib.parse.ParseResult, root: Optional[Union[str, ButlerURI]] = None,
1023 forceAbsolute: bool = False,
1024 forceDirectory: bool = False) -> Tuple[urllib.parse.ParseResult, bool]:
1025 """Fix up relative paths in URI instances.
1027 Parameters
1028 ----------
1029 parsed : `~urllib.parse.ParseResult`
1030 The result from parsing a URI using `urllib.parse`.
1031 root : `str` or `ButlerURI`, optional
1032 Path to use as root when converting relative to absolute.
1033 If `None`, it will be the current working directory. This
1034 is a local file system path, or a file URI. It is only used if
1035 a file-scheme is used incorrectly with a relative path.
1036 forceAbsolute : `bool`, ignored
1037 Has no effect for this subclass. ``file`` URIs are always
1038 absolute.
1039 forceDirectory : `bool`, optional
1040 If `True` forces the URI to end with a separator, otherwise given
1041 URI is interpreted as is.
1043 Returns
1044 -------
1045 modified : `~urllib.parse.ParseResult`
1046 Update result if a URI is being handled.
1047 dirLike : `bool`
1048 `True` if given parsed URI has a trailing separator or
1049 forceDirectory is True. Otherwise `False`.
1051 Notes
1052 -----
1053 Relative paths are explicitly not supported by RFC8089 but `urllib`
1054 does accept URIs of the form ``file:relative/path.ext``. They need
1055 to be turned into absolute paths before they can be used. This is
1056 always done regardless of the ``forceAbsolute`` parameter.
1057 """
1058 # assume we are not dealing with a directory like URI
1059 dirLike = False
1061 # file URI implies POSIX path separators so split as POSIX,
1062 # then join as os, and convert to abspath. Do not handle
1063 # home directories since "file" scheme is explicitly documented
1064 # to not do tilde expansion.
1065 sep = posixpath.sep
1067 # For local file system we can explicitly check to see if this
1068 # really is a directory. The URI might point to a location that
1069 # does not exists yet but all that matters is if it is a directory
1070 # then we make sure use that fact. No need to do the check if
1071 # we are already being told.
1072 if not forceDirectory and posixpath.isdir(parsed.path):
1073 forceDirectory = True
1075 # For an absolute path all we need to do is check if we need
1076 # to force the directory separator
1077 if posixpath.isabs(parsed.path):
1078 if forceDirectory:
1079 if not parsed.path.endswith(sep):
1080 parsed = parsed._replace(path=parsed.path+sep)
1081 dirLike = True
1082 return copy.copy(parsed), dirLike
1084 # Relative path so must fix it to be compliant with the standard
1086 # Replacement values for the URI
1087 replacements = {}
1089 if root is None:
1090 root = os.path.abspath(os.path.curdir)
1091 elif isinstance(root, ButlerURI):
1092 if root.scheme and root.scheme != "file":
1093 raise RuntimeError(f"The override root must be a file URI not {root.scheme}")
1094 root = os.path.abspath(root.ospath)
1096 replacements["path"] = posixpath.normpath(posixpath.join(os2posix(root), parsed.path))
1098 # normpath strips trailing "/" so put it back if necessary
1099 # Acknowledge that trailing separator exists.
1100 if forceDirectory or (parsed.path.endswith(sep) and not replacements["path"].endswith(sep)):
1101 replacements["path"] += sep
1102 dirLike = True
1104 # ParseResult is a NamedTuple so _replace is standard API
1105 parsed = parsed._replace(**replacements)
1107 if parsed.params or parsed.query:
1108 log.warning("Additional items unexpectedly encountered in file URI: %s", parsed.geturl())
1110 return parsed, dirLike
1113class ButlerS3URI(ButlerURI):
1114 """S3 URI"""
1116 @property
1117 def client(self) -> boto3.client:
1118 """Client object to address remote resource."""
1119 # Defer import for circular dependencies
1120 from .s3utils import getS3Client
1121 return getS3Client()
1123 def exists(self) -> bool:
1124 # s3utils itself imports ButlerURI so defer this import
1125 from .s3utils import s3CheckFileExists, bucketExists
1126 if self.is_root:
1127 # Only check for the bucket since the path is irrelevant
1128 return bucketExists(self.netloc)
1129 exists, _ = s3CheckFileExists(self, client=self.client)
1130 return exists
1132 def size(self) -> int:
1133 # s3utils itself imports ButlerURI so defer this import
1134 from .s3utils import s3CheckFileExists
1135 if self.dirLike:
1136 return 0
1137 _, sz = s3CheckFileExists(self, client=self.client)
1138 return sz
1140 def remove(self) -> None:
1141 """Remove the resource."""
1143 # https://github.com/boto/boto3/issues/507 - there is no
1144 # way of knowing if the file was actually deleted except
1145 # for checking all the keys again, reponse is HTTP 204 OK
1146 # response all the time
1147 self.client.delete_object(Bucket=self.netloc, Key=self.relativeToPathRoot)
1149 def read(self, size: int = -1) -> bytes:
1150 args = {}
1151 if size > 0:
1152 args["Range"] = f"bytes=0-{size-1}"
1153 try:
1154 response = self.client.get_object(Bucket=self.netloc,
1155 Key=self.relativeToPathRoot,
1156 **args)
1157 except (self.client.exceptions.NoSuchKey, self.client.exceptions.NoSuchBucket) as err:
1158 raise FileNotFoundError(f"No such resource: {self}") from err
1159 body = response["Body"].read()
1160 response["Body"].close()
1161 return body
1163 def write(self, data: bytes, overwrite: bool = True) -> None:
1164 if not overwrite:
1165 if self.exists():
1166 raise FileExistsError(f"Remote resource {self} exists and overwrite has been disabled")
1167 self.client.put_object(Bucket=self.netloc, Key=self.relativeToPathRoot,
1168 Body=data)
1170 def mkdir(self) -> None:
1171 # Defer import for circular dependencies
1172 from .s3utils import bucketExists
1173 if not bucketExists(self.netloc):
1174 raise ValueError(f"Bucket {self.netloc} does not exist for {self}!")
1176 if not self.dirLike:
1177 raise ValueError(f"Can not create a 'directory' for file-like URI {self}")
1179 # don't create S3 key when root is at the top-level of an Bucket
1180 if not self.path == "/":
1181 self.client.put_object(Bucket=self.netloc, Key=self.relativeToPathRoot)
1183 def as_local(self) -> Tuple[str, bool]:
1184 """Download object from S3 and place in temporary directory.
1186 Returns
1187 -------
1188 path : `str`
1189 Path to local temporary file.
1190 temporary : `bool`
1191 Always returns `True`. This is always a temporary file.
1192 """
1193 with tempfile.NamedTemporaryFile(suffix=self.getExtension(), delete=False) as tmpFile:
1194 self.client.download_fileobj(self.netloc, self.relativeToPathRoot, tmpFile)
1195 return tmpFile.name, True
1197 def transfer_from(self, src: ButlerURI, transfer: str = "copy",
1198 overwrite: bool = False,
1199 transaction: Optional[Union[DatastoreTransaction, NoTransaction]] = None) -> None:
1200 """Transfer the current resource to an S3 bucket.
1202 Parameters
1203 ----------
1204 src : `ButlerURI`
1205 Source URI.
1206 transfer : `str`
1207 Mode to use for transferring the resource. Supports the following
1208 options: copy.
1209 overwrite : `bool`, optional
1210 Allow an existing file to be overwritten. Defaults to `False`.
1211 transaction : `DatastoreTransaction`, optional
1212 Currently unused.
1213 """
1214 # Fail early to prevent delays if remote resources are requested
1215 if transfer not in self.transferModes:
1216 raise ValueError(f"Transfer mode '{transfer}' not supported by URI scheme {self.scheme}")
1218 log.debug(f"Transferring {src} [exists: {src.exists()}] -> "
1219 f"{self} [exists: {self.exists()}] (transfer={transfer})")
1221 if not overwrite and self.exists():
1222 raise FileExistsError(f"Destination path '{self}' already exists.")
1224 if transfer == "auto":
1225 transfer = self.transferDefault
1227 if isinstance(src, type(self)):
1228 # Looks like an S3 remote uri so we can use direct copy
1229 # note that boto3.resource.meta.copy is cleverer than the low
1230 # level copy_object
1231 copy_source = {
1232 "Bucket": src.netloc,
1233 "Key": src.relativeToPathRoot,
1234 }
1235 self.client.copy_object(CopySource=copy_source, Bucket=self.netloc, Key=self.relativeToPathRoot)
1236 else:
1237 # Use local file and upload it
1238 local_src, is_temporary = src.as_local()
1240 # resource.meta.upload_file seems like the right thing
1241 # but we have a low level client
1242 with open(local_src, "rb") as fh:
1243 self.client.put_object(Bucket=self.netloc,
1244 Key=self.relativeToPathRoot, Body=fh)
1245 if is_temporary:
1246 os.remove(local_src)
1248 # This was an explicit move requested from a remote resource
1249 # try to remove that resource
1250 if transfer == "move":
1251 # Transactions do not work here
1252 src.remove()
1255class ButlerPackageResourceURI(ButlerURI):
1256 """URI referring to a Python package resource.
1258 These URIs look like: ``resource://lsst.daf.butler/configs/file.yaml``
1259 where the network location is the Python package and the path is the
1260 resource name.
1261 """
1263 def exists(self) -> bool:
1264 """Check that the python resource exists."""
1265 return pkg_resources.resource_exists(self.netloc, self.relativeToPathRoot)
1267 def read(self, size: int = -1) -> bytes:
1268 with pkg_resources.resource_stream(self.netloc, self.relativeToPathRoot) as fh:
1269 return fh.read(size)
1272class ButlerHttpURI(ButlerURI):
1273 """General HTTP(S) resource."""
1274 _session = requests.Session()
1275 _sessionInitialized = False
1277 @property
1278 def session(self) -> requests.Session:
1279 """Client object to address remote resource."""
1280 from .webdavutils import refreshToken, isTokenAuth, getHttpSession, isWebdavEndpoint
1281 if ButlerHttpURI._sessionInitialized:
1282 if isTokenAuth():
1283 refreshToken(ButlerHttpURI._session)
1284 return ButlerHttpURI._session
1286 baseURL = self.scheme + "://" + self.netloc
1288 if isWebdavEndpoint(baseURL):
1289 log.debug("%s looks like a Webdav endpoint.", baseURL)
1290 s = getHttpSession()
1292 ButlerHttpURI._session = s
1293 ButlerHttpURI._sessionInitialized = True
1294 return s
1296 def exists(self) -> bool:
1297 """Check that a remote HTTP resource exists."""
1298 log.debug("Checking if resource exists: %s", self.geturl())
1299 r = self.session.head(self.geturl())
1301 return True if r.status_code == 200 else False
1303 def size(self) -> int:
1304 if self.dirLike:
1305 return 0
1306 r = self.session.head(self.geturl())
1307 if r.status_code == 200:
1308 return int(r.headers['Content-Length'])
1309 else:
1310 raise FileNotFoundError(f"Resource {self} does not exist")
1312 def mkdir(self) -> None:
1313 """For a dir-like URI, create the directory resource if it does not
1314 already exist.
1315 """
1316 if not self.dirLike:
1317 raise ValueError(f"Can not create a 'directory' for file-like URI {self}")
1319 if not self.exists():
1320 # We need to test the absence of the parent directory,
1321 # but also if parent URL is different from self URL,
1322 # otherwise we could be stuck in a recursive loop
1323 # where self == parent
1324 if not self.parent().exists() and self.parent().geturl() != self.geturl():
1325 self.parent().mkdir()
1326 log.debug("Creating new directory: %s", self.geturl())
1327 r = self.session.request("MKCOL", self.geturl())
1328 if r.status_code != 201:
1329 if r.status_code == 405:
1330 log.debug("Can not create directory: %s may already exist: skipping.", self.geturl())
1331 else:
1332 raise ValueError(f"Can not create directory {self}, status code: {r.status_code}")
1334 def remove(self) -> None:
1335 """Remove the resource."""
1336 log.debug("Removing resource: %s", self.geturl())
1337 r = self.session.delete(self.geturl())
1338 if r.status_code not in [200, 202, 204]:
1339 raise FileNotFoundError(f"Unable to delete resource {self}; status code: {r.status_code}")
1341 def as_local(self) -> Tuple[str, bool]:
1342 """Download object over HTTP and place in temporary directory.
1344 Returns
1345 -------
1346 path : `str`
1347 Path to local temporary file.
1348 temporary : `bool`
1349 Always returns `True`. This is always a temporary file.
1350 """
1351 log.debug("Downloading remote resource as local file: %s", self.geturl())
1352 r = self.session.get(self.geturl(), stream=True)
1353 if r.status_code != 200:
1354 raise FileNotFoundError(f"Unable to download resource {self}; status code: {r.status_code}")
1355 with tempfile.NamedTemporaryFile(suffix=self.getExtension(), delete=False) as tmpFile:
1356 for chunk in r.iter_content():
1357 tmpFile.write(chunk)
1358 return tmpFile.name, True
1360 def read(self, size: int = -1) -> bytes:
1361 """Open the resource and return the contents in bytes.
1363 Parameters
1364 ----------
1365 size : `int`, optional
1366 The number of bytes to read. Negative or omitted indicates
1367 that all data should be read.
1368 """
1369 log.debug("Reading from remote resource: %s", self.geturl())
1370 stream = True if size > 0 else False
1371 r = self.session.get(self.geturl(), stream=stream)
1372 if r.status_code != 200:
1373 raise FileNotFoundError(f"Unable to read resource {self}; status code: {r.status_code}")
1374 if not stream:
1375 return r.content
1376 else:
1377 return next(r.iter_content(chunk_size=size))
1379 def write(self, data: bytes, overwrite: bool = True) -> None:
1380 """Write the supplied bytes to the new resource.
1382 Parameters
1383 ----------
1384 data : `bytes`
1385 The bytes to write to the resource. The entire contents of the
1386 resource will be replaced.
1387 overwrite : `bool`, optional
1388 If `True` the resource will be overwritten if it exists. Otherwise
1389 the write will fail.
1390 """
1391 from .webdavutils import finalurl
1392 log.debug("Writing to remote resource: %s", self.geturl())
1393 if not overwrite:
1394 if self.exists():
1395 raise FileExistsError(f"Remote resource {self} exists and overwrite has been disabled")
1396 dest_url = finalurl(self._emptyPut())
1397 r = self.session.put(dest_url, data=data)
1398 if r.status_code not in [201, 202, 204]:
1399 raise ValueError(f"Can not write file {self}, status code: {r.status_code}")
1401 def transfer_from(self, src: ButlerURI, transfer: str = "copy",
1402 overwrite: bool = False,
1403 transaction: Optional[Union[DatastoreTransaction, NoTransaction]] = None) -> None:
1404 """Transfer the current resource to a Webdav repository.
1406 Parameters
1407 ----------
1408 src : `ButlerURI`
1409 Source URI.
1410 transfer : `str`
1411 Mode to use for transferring the resource. Supports the following
1412 options: copy.
1413 transaction : `DatastoreTransaction`, optional
1414 Currently unused.
1415 """
1416 from .webdavutils import finalurl
1417 # Fail early to prevent delays if remote resources are requested
1418 if transfer not in self.transferModes:
1419 raise ValueError(f"Transfer mode {transfer} not supported by URI scheme {self.scheme}")
1421 log.debug(f"Transferring {src} [exists: {src.exists()}] -> "
1422 f"{self} [exists: {self.exists()}] (transfer={transfer})")
1424 if self.exists():
1425 raise FileExistsError(f"Destination path {self} already exists.")
1427 if transfer == "auto":
1428 transfer = self.transferDefault
1430 if isinstance(src, type(self)):
1431 if transfer == "move":
1432 r = self.session.request("MOVE", src.geturl(), headers={"Destination": self.geturl()})
1433 log.debug("Running move via MOVE HTTP request.")
1434 else:
1435 r = self.session.request("COPY", src.geturl(), headers={"Destination": self.geturl()})
1436 log.debug("Running copy via COPY HTTP request.")
1437 else:
1438 # Use local file and upload it
1439 local_src, is_temporary = src.as_local()
1440 f = open(local_src, "rb")
1441 dest_url = finalurl(self._emptyPut())
1442 r = self.session.put(dest_url, data=f)
1443 f.close()
1444 if is_temporary:
1445 os.remove(local_src)
1446 log.debug("Running transfer from a local copy of the file.")
1448 if r.status_code not in [201, 202, 204]:
1449 raise ValueError(f"Can not transfer file {self}, status code: {r.status_code}")
1451 def _emptyPut(self) -> requests.Response:
1452 """Send an empty PUT request to current URL. This is used to detect
1453 if redirection is enabled before sending actual data.
1455 Returns
1456 -------
1457 response : `requests.Response`
1458 HTTP Response from the endpoint.
1459 """
1460 return self.session.put(self.geturl(), data=None,
1461 headers={"Content-Length": "0"}, allow_redirects=False)
1464class ButlerInMemoryURI(ButlerURI):
1465 """Internal in-memory datastore URI (`mem://`).
1467 Not used for any real purpose other than indicating that the dataset
1468 is in memory.
1469 """
1471 def exists(self) -> bool:
1472 """Test for existence and always return False."""
1473 return True
1475 def as_local(self) -> Tuple[str, bool]:
1476 raise RuntimeError(f"Do not know how to retrieve data for URI '{self}'")
1479class ButlerSchemelessURI(ButlerFileURI):
1480 """Scheme-less URI referring to the local file system"""
1482 _pathLib = PurePath
1483 _pathModule = os.path
1484 quotePaths = False
1486 @property
1487 def ospath(self) -> str:
1488 """Path component of the URI localized to current OS."""
1489 return self.path
1491 def isabs(self) -> bool:
1492 """Indicate that the resource is fully specified.
1494 For non-schemeless URIs this is always true.
1496 Returns
1497 -------
1498 isabs : `bool`
1499 `True` if the file is absolute, `False` otherwise.
1500 """
1501 return os.path.isabs(self.ospath)
1503 def _force_to_file(self) -> ButlerFileURI:
1504 """Force a schemeless URI to a file URI and returns a new URI.
1506 This will include URI quoting of the path.
1508 Returns
1509 -------
1510 file : `ButlerFileURI`
1511 A copy of the URI using file scheme. If already a file scheme
1512 the copy will be identical.
1514 Raises
1515 ------
1516 ValueError
1517 Raised if this URI is schemeless and relative path and so can
1518 not be forced to file absolute path without context.
1519 """
1520 if not self.isabs():
1521 raise RuntimeError(f"Internal error: Can not force {self} to absolute file URI")
1522 uri = self._uri._replace(scheme="file", path=urllib.parse.quote(os2posix(self.path)))
1523 # mypy really wants a ButlerFileURI to be returned here
1524 return ButlerURI(uri, forceDirectory=self.dirLike) # type: ignore
1526 @staticmethod
1527 def _fixupPathUri(parsed: urllib.parse.ParseResult, root: Optional[Union[str, ButlerURI]] = None,
1528 forceAbsolute: bool = False,
1529 forceDirectory: bool = False) -> Tuple[urllib.parse.ParseResult, bool]:
1530 """Fix up relative paths for local file system.
1532 Parameters
1533 ----------
1534 parsed : `~urllib.parse.ParseResult`
1535 The result from parsing a URI using `urllib.parse`.
1536 root : `str` or `ButlerURI`, optional
1537 Path to use as root when converting relative to absolute.
1538 If `None`, it will be the current working directory. This
1539 is a local file system path, or a file URI.
1540 forceAbsolute : `bool`, optional
1541 If `True`, scheme-less relative URI will be converted to an
1542 absolute path using a ``file`` scheme. If `False` scheme-less URI
1543 will remain scheme-less and will not be updated to ``file`` or
1544 absolute path.
1545 forceDirectory : `bool`, optional
1546 If `True` forces the URI to end with a separator, otherwise given
1547 URI is interpreted as is.
1549 Returns
1550 -------
1551 modified : `~urllib.parse.ParseResult`
1552 Update result if a URI is being handled.
1553 dirLike : `bool`
1554 `True` if given parsed URI has a trailing separator or
1555 forceDirectory is True. Otherwise `False`.
1557 Notes
1558 -----
1559 Relative paths are explicitly not supported by RFC8089 but `urllib`
1560 does accept URIs of the form ``file:relative/path.ext``. They need
1561 to be turned into absolute paths before they can be used. This is
1562 always done regardless of the ``forceAbsolute`` parameter.
1564 Scheme-less paths are normalized.
1565 """
1566 # assume we are not dealing with a directory URI
1567 dirLike = False
1569 # Replacement values for the URI
1570 replacements = {}
1572 if root is None:
1573 root = os.path.abspath(os.path.curdir)
1574 elif isinstance(root, ButlerURI):
1575 if root.scheme and root.scheme != "file":
1576 raise RuntimeError(f"The override root must be a file URI not {root.scheme}")
1577 root = os.path.abspath(root.ospath)
1579 # this is a local OS file path which can support tilde expansion.
1580 # we quoted it in the constructor so unquote here
1581 expandedPath = os.path.expanduser(urllib.parse.unquote(parsed.path))
1583 # Ensure that this becomes a file URI if it is already absolute
1584 if os.path.isabs(expandedPath):
1585 replacements["scheme"] = "file"
1586 # Keep in OS form for now to simplify later logic
1587 replacements["path"] = os.path.normpath(expandedPath)
1588 elif forceAbsolute:
1589 # This can stay in OS path form, do not change to file
1590 # scheme.
1591 replacements["path"] = os.path.normpath(os.path.join(root, expandedPath))
1592 else:
1593 # No change needed for relative local path staying relative
1594 # except normalization
1595 replacements["path"] = os.path.normpath(expandedPath)
1596 # normalization of empty path returns "." so we are dirLike
1597 if expandedPath == "":
1598 dirLike = True
1600 # normpath strips trailing "/" which makes it hard to keep
1601 # track of directory vs file when calling replaceFile
1603 # For local file system we can explicitly check to see if this
1604 # really is a directory. The URI might point to a location that
1605 # does not exists yet but all that matters is if it is a directory
1606 # then we make sure use that fact. No need to do the check if
1607 # we are already being told.
1608 if not forceDirectory and os.path.isdir(replacements["path"]):
1609 forceDirectory = True
1611 # add the trailing separator only if explicitly required or
1612 # if it was stripped by normpath. Acknowledge that trailing
1613 # separator exists.
1614 endsOnSep = expandedPath.endswith(os.sep) and not replacements["path"].endswith(os.sep)
1615 if (forceDirectory or endsOnSep or dirLike):
1616 dirLike = True
1617 if not replacements["path"].endswith(os.sep):
1618 replacements["path"] += os.sep
1620 if "scheme" in replacements:
1621 # This is now meant to be a URI path so force to posix
1622 # and quote
1623 replacements["path"] = urllib.parse.quote(os2posix(replacements["path"]))
1625 # ParseResult is a NamedTuple so _replace is standard API
1626 parsed = parsed._replace(**replacements)
1628 if parsed.params or parsed.fragment or parsed.query:
1629 log.warning("Additional items unexpectedly encountered in schemeless URI: %s", parsed.geturl())
1631 return parsed, dirLike