Coverage for python/lsst/daf/butler/core/_butlerUri.py : 17%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("ButlerURI",)
26import contextlib
27import os
28import os.path
29import shutil
30import urllib
31import pkg_resources
32import posixpath
33from pathlib import Path, PurePath, PurePosixPath
34import requests
35import tempfile
36import copy
37import logging
38import re
40from typing import (
41 TYPE_CHECKING,
42 Any,
43 Callable,
44 cast,
45 Iterator,
46 Optional,
47 Tuple,
48 Type,
49 Union,
50)
52from .utils import safeMakeDir
54if TYPE_CHECKING: 54 ↛ 55line 54 didn't jump to line 55, because the condition on line 54 was never true
55 try:
56 import boto3
57 except ImportError:
58 pass
59 from .datastore import DatastoreTransaction
62log = logging.getLogger(__name__)
64# Determine if the path separator for the OS looks like POSIX
65IS_POSIX = os.sep == posixpath.sep
67# Root path for this operating system
68OS_ROOT_PATH = Path().resolve().root
70# Regex for looking for URI escapes
71ESCAPES_RE = re.compile(r"%[A-F0-9]{2}")
74def os2posix(ospath: str) -> str:
75 """Convert a local path description to a POSIX path description.
77 Parameters
78 ----------
79 ospath : `str`
80 Path using the local path separator.
82 Returns
83 -------
84 posix : `str`
85 Path using POSIX path separator
86 """
87 if IS_POSIX:
88 return ospath
90 posix = PurePath(ospath).as_posix()
92 # PurePath strips trailing "/" from paths such that you can no
93 # longer tell if a path is meant to be referring to a directory
94 # Try to fix this.
95 if ospath.endswith(os.sep) and not posix.endswith(posixpath.sep):
96 posix += posixpath.sep
98 return posix
101def posix2os(posix: Union[PurePath, str]) -> str:
102 """Convert a POSIX path description to a local path description.
104 Parameters
105 ----------
106 posix : `str`, `PurePath`
107 Path using the POSIX path separator.
109 Returns
110 -------
111 ospath : `str`
112 Path using OS path separator
113 """
114 if IS_POSIX:
115 return str(posix)
117 posixPath = PurePosixPath(posix)
118 paths = list(posixPath.parts)
120 # Have to convert the root directory after splitting
121 if paths[0] == posixPath.root:
122 paths[0] = OS_ROOT_PATH
124 # Trailing "/" is stripped so we need to add back an empty path
125 # for consistency
126 if str(posix).endswith(posixpath.sep):
127 paths.append("")
129 return os.path.join(*paths)
132class NoTransaction:
133 """A simple emulation of the `DatastoreTransaction` class.
135 Does nothing.
136 """
138 def __init__(self) -> None:
139 return
141 @contextlib.contextmanager
142 def undoWith(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> Iterator[None]:
143 """No-op context manager to replace `DatastoreTransaction`
144 """
145 yield None
148class ButlerURI:
149 """Convenience wrapper around URI parsers.
151 Provides access to URI components and can convert file
152 paths into absolute path URIs. Scheme-less URIs are treated as if
153 they are local file system paths and are converted to absolute URIs.
155 A specialist subclass is created for each supported URI scheme.
157 Parameters
158 ----------
159 uri : `str` or `urllib.parse.ParseResult`
160 URI in string form. Can be scheme-less if referring to a local
161 filesystem path.
162 root : `str`, optional
163 When fixing up a relative path in a ``file`` scheme or if scheme-less,
164 use this as the root. Must be absolute. If `None` the current
165 working directory will be used.
166 forceAbsolute : `bool`, optional
167 If `True`, scheme-less relative URI will be converted to an absolute
168 path using a ``file`` scheme. If `False` scheme-less URI will remain
169 scheme-less and will not be updated to ``file`` or absolute path.
170 forceDirectory: `bool`, optional
171 If `True` forces the URI to end with a separator, otherwise given URI
172 is interpreted as is.
173 """
175 _pathLib: Type[PurePath] = PurePosixPath
176 """Path library to use for this scheme."""
178 _pathModule = posixpath
179 """Path module to use for this scheme."""
181 transferModes: Tuple[str, ...] = ("copy", "auto", "move")
182 """Transfer modes supported by this implementation.
184 Move is special in that it is generally a copy followed by an unlink.
185 Whether that unlink works depends critically on whether the source URI
186 implements unlink. If it does not the move will be reported as a failure.
187 """
189 transferDefault: str = "copy"
190 """Default mode to use for transferring if ``auto`` is specified."""
192 quotePaths = True
193 """True if path-like elements modifying a URI should be quoted.
195 All non-schemeless URIs have to internally use quoted paths. Therefore
196 if a new file name is given (e.g. to updateFile or join) a decision must
197 be made whether to quote it to be consistent.
198 """
200 # This is not an ABC with abstract methods because the __new__ being
201 # a factory confuses mypy such that it assumes that every constructor
202 # returns a ButlerURI and then determines that all the abstract methods
203 # are still abstract. If they are not marked abstract but just raise
204 # mypy is fine with it.
206 # mypy is confused without this
207 _uri: urllib.parse.ParseResult
209 def __new__(cls, uri: Union[str, urllib.parse.ParseResult, ButlerURI],
210 root: Optional[str] = None, forceAbsolute: bool = True,
211 forceDirectory: bool = False) -> ButlerURI:
212 parsed: urllib.parse.ParseResult
213 dirLike: bool
214 subclass: Optional[Type] = None
216 # Record if we need to post process the URI components
217 # or if the instance is already fully configured
218 if isinstance(uri, str):
219 # Since local file names can have special characters in them
220 # we need to quote them for the parser but we can unquote
221 # later. Assume that all other URI schemes are quoted.
222 # Since sometimes people write file:/a/b and not file:///a/b
223 # we should not quote in the explicit case of file:
224 if "://" not in uri and not uri.startswith("file:"):
225 if ESCAPES_RE.search(uri):
226 log.warning("Possible double encoding of %s", uri)
227 else:
228 uri = urllib.parse.quote(uri)
229 parsed = urllib.parse.urlparse(uri)
230 elif isinstance(uri, urllib.parse.ParseResult):
231 parsed = copy.copy(uri)
232 elif isinstance(uri, ButlerURI):
233 parsed = copy.copy(uri._uri)
234 dirLike = uri.dirLike
235 # No further parsing required and we know the subclass
236 subclass = type(uri)
237 else:
238 raise ValueError(f"Supplied URI must be string, ButlerURI, or ParseResult but got '{uri!r}'")
240 if subclass is None:
241 # Work out the subclass from the URI scheme
242 if not parsed.scheme:
243 subclass = ButlerSchemelessURI
244 elif parsed.scheme == "file":
245 subclass = ButlerFileURI
246 elif parsed.scheme == "s3":
247 subclass = ButlerS3URI
248 elif parsed.scheme.startswith("http"):
249 subclass = ButlerHttpURI
250 elif parsed.scheme == "resource":
251 # Rules for scheme names disasllow pkg_resource
252 subclass = ButlerPackageResourceURI
253 elif parsed.scheme == "mem":
254 # in-memory datastore object
255 subclass = ButlerInMemoryURI
256 else:
257 raise NotImplementedError(f"No URI support for scheme: '{parsed.scheme}'"
258 " in {parsed.geturl()}")
260 parsed, dirLike = subclass._fixupPathUri(parsed, root=root,
261 forceAbsolute=forceAbsolute,
262 forceDirectory=forceDirectory)
264 # It is possible for the class to change from schemeless
265 # to file so handle that
266 if parsed.scheme == "file":
267 subclass = ButlerFileURI
269 # Now create an instance of the correct subclass and set the
270 # attributes directly
271 self = object.__new__(subclass)
272 self._uri = parsed
273 self.dirLike = dirLike
274 return self
276 @property
277 def scheme(self) -> str:
278 """The URI scheme (``://`` is not part of the scheme)."""
279 return self._uri.scheme
281 @property
282 def netloc(self) -> str:
283 """The URI network location."""
284 return self._uri.netloc
286 @property
287 def path(self) -> str:
288 """The path component of the URI."""
289 return self._uri.path
291 @property
292 def unquoted_path(self) -> str:
293 """The path component of the URI with any URI quoting reversed."""
294 return urllib.parse.unquote(self._uri.path)
296 @property
297 def ospath(self) -> str:
298 """Path component of the URI localized to current OS."""
299 raise AttributeError(f"Non-file URI ({self}) has no local OS path.")
301 @property
302 def relativeToPathRoot(self) -> str:
303 """Returns path relative to network location.
305 Effectively, this is the path property with posix separator stripped
306 from the left hand side of the path.
308 Always unquotes.
309 """
310 p = self._pathLib(self.path)
311 relToRoot = str(p.relative_to(p.root))
312 if self.dirLike and not relToRoot.endswith("/"):
313 relToRoot += "/"
314 return urllib.parse.unquote(relToRoot)
316 @property
317 def fragment(self) -> str:
318 """The fragment component of the URI."""
319 return self._uri.fragment
321 @property
322 def params(self) -> str:
323 """Any parameters included in the URI."""
324 return self._uri.params
326 @property
327 def query(self) -> str:
328 """Any query strings included in the URI."""
329 return self._uri.query
331 def geturl(self) -> str:
332 """Return the URI in string form.
334 Returns
335 -------
336 url : `str`
337 String form of URI.
338 """
339 return self._uri.geturl()
341 def split(self) -> Tuple[ButlerURI, str]:
342 """Splits URI into head and tail. Equivalent to os.path.split where
343 head preserves the URI components.
345 Returns
346 -------
347 head: `ButlerURI`
348 Everything leading up to tail, expanded and normalized as per
349 ButlerURI rules.
350 tail : `str`
351 Last `self.path` component. Tail will be empty if path ends on a
352 separator. Tail will never contain separators. It will be
353 unquoted.
354 """
355 head, tail = self._pathModule.split(self.path)
356 headuri = self._uri._replace(path=head)
358 # The file part should never include quoted metacharacters
359 tail = urllib.parse.unquote(tail)
361 # Schemeless is special in that it can be a relative path
362 # We need to ensure that it stays that way. All other URIs will
363 # be absolute already.
364 forceAbsolute = self._pathModule.isabs(self.path)
365 return ButlerURI(headuri, forceDirectory=True, forceAbsolute=forceAbsolute), tail
367 def basename(self) -> str:
368 """Returns the base name, last element of path, of the URI. If URI ends
369 on a slash returns an empty string. This is the second element returned
370 by split().
372 Equivalent of os.path.basename().
374 Returns
375 -------
376 tail : `str`
377 Last part of the path attribute. Trail will be empty if path ends
378 on a separator.
379 """
380 return self.split()[1]
382 def dirname(self) -> ButlerURI:
383 """Returns a ButlerURI containing all the directories of the path
384 attribute.
386 Equivalent of os.path.dirname()
388 Returns
389 -------
390 head : `ButlerURI`
391 Everything except the tail of path attribute, expanded and
392 normalized as per ButlerURI rules.
393 """
394 return self.split()[0]
396 def replace(self, **kwargs: Any) -> ButlerURI:
397 """Replace components in a URI with new values and return a new
398 instance.
400 Returns
401 -------
402 new : `ButlerURI`
403 New `ButlerURI` object with updated values.
404 """
405 return self.__class__(self._uri._replace(**kwargs))
407 def updateFile(self, newfile: str) -> None:
408 """Update in place the final component of the path with the supplied
409 file name.
411 Parameters
412 ----------
413 newfile : `str`
414 File name with no path component.
416 Notes
417 -----
418 Updates the URI in place.
419 Updates the ButlerURI.dirLike attribute. The new file path will
420 be quoted if necessary.
421 """
422 if self.quotePaths:
423 newfile = urllib.parse.quote(newfile)
424 dir, _ = self._pathModule.split(self.path)
425 newpath = self._pathModule.join(dir, newfile)
427 self.dirLike = False
428 self._uri = self._uri._replace(path=newpath)
430 def getExtension(self) -> str:
431 """Return the file extension(s) associated with this URI path.
433 Returns
434 -------
435 ext : `str`
436 The file extension (including the ``.``). Can be empty string
437 if there is no file extension. Usually returns only the last
438 file extension unless there is a special extension modifier
439 indicating file compression, in which case the combined
440 extension (e.g. ``.fits.gz``) will be returned.
441 """
442 special = {".gz", ".bz2", ".xz", ".fz"}
444 extensions = self._pathLib(self.path).suffixes
446 if not extensions:
447 return ""
449 ext = extensions.pop()
451 # Multiple extensions, decide whether to include the final two
452 if extensions and ext in special:
453 ext = f"{extensions[-1]}{ext}"
455 return ext
457 def join(self, path: str) -> ButlerURI:
458 """Create a new `ButlerURI` with additional path components including
459 a file.
461 Parameters
462 ----------
463 path : `str`
464 Additional file components to append to the current URI. Assumed
465 to include a file at the end. Will be quoted depending on the
466 associated URI scheme.
468 Returns
469 -------
470 new : `ButlerURI`
471 New URI with any file at the end replaced with the new path
472 components.
474 Notes
475 -----
476 Schemeless URIs assume local path separator but all other URIs assume
477 POSIX separator if the supplied path has directory structure. It
478 may be this never becomes a problem but datastore templates assume
479 POSIX separator is being used.
480 """
481 new = self.dirname() # By definition a directory URI
483 # new should be asked about quoting, not self, since dirname can
484 # change the URI scheme for schemeless -> file
485 if new.quotePaths:
486 path = urllib.parse.quote(path)
488 newpath = self._pathModule.normpath(self._pathModule.join(new.path, path))
489 new._uri = new._uri._replace(path=newpath)
490 # Declare the new URI not be dirLike unless path ended in /
491 if not path.endswith(self._pathModule.sep):
492 new.dirLike = False
493 return new
495 def relative_to(self, other: ButlerURI) -> Optional[str]:
496 """Return the relative path from this URI to the other URI.
498 Parameters
499 ----------
500 other : `ButlerURI`
501 URI to use to calculate the relative path. Must be a parent
502 of this URI.
504 Returns
505 -------
506 subpath : `str`
507 The sub path of this URI relative to the supplied other URI.
508 Returns `None` if there is no parent child relationship.
509 Scheme and netloc must match.
510 """
511 if self.scheme != other.scheme or self.netloc != other.netloc:
512 return None
514 enclosed_path = self._pathLib(self.relativeToPathRoot)
515 parent_path = other.relativeToPathRoot
516 subpath: Optional[str]
517 try:
518 subpath = str(enclosed_path.relative_to(parent_path))
519 except ValueError:
520 subpath = None
521 else:
522 subpath = urllib.parse.unquote(subpath)
523 return subpath
525 def exists(self) -> bool:
526 """Indicate that the resource is available.
528 Returns
529 -------
530 exists : `bool`
531 `True` if the resource exists.
532 """
533 raise NotImplementedError()
535 def remove(self) -> None:
536 """Remove the resource."""
537 raise NotImplementedError()
539 def isabs(self) -> bool:
540 """Indicate that the resource is fully specified.
542 For non-schemeless URIs this is always true.
544 Returns
545 -------
546 isabs : `bool`
547 `True` in all cases except schemeless URI.
548 """
549 return True
551 def as_local(self) -> Tuple[str, bool]:
552 """Return the location of the (possibly remote) resource in the
553 local file system.
555 Returns
556 -------
557 path : `str`
558 If this is a remote resource, it will be a copy of the resource
559 on the local file system, probably in a temporary directory.
560 For a local resource this should be the actual path to the
561 resource.
562 is_temporary : `bool`
563 Indicates if the local path is a temporary file or not.
564 """
565 raise NotImplementedError()
567 def read(self, size: int = -1) -> bytes:
568 """Open the resource and return the contents in bytes.
570 Parameters
571 ----------
572 size : `int`, optional
573 The number of bytes to read. Negative or omitted indicates
574 that all data should be read.
575 """
576 raise NotImplementedError()
578 def write(self, data: bytes, overwrite: bool = True) -> None:
579 """Write the supplied bytes to the new resource.
581 Parameters
582 ----------
583 data : `bytes`
584 The bytes to write to the resource. The entire contents of the
585 resource will be replaced.
586 overwrite : `bool`, optional
587 If `True` the resource will be overwritten if it exists. Otherwise
588 the write will fail.
589 """
590 raise NotImplementedError()
592 def mkdir(self) -> None:
593 """For a dir-like URI, create the directory resource if it does not
594 already exist.
595 """
596 raise NotImplementedError()
598 def __str__(self) -> str:
599 return self.geturl()
601 def __repr__(self) -> str:
602 return f'ButlerURI("{self.geturl()}")'
604 def __eq__(self, other: Any) -> bool:
605 if not isinstance(other, ButlerURI):
606 return False
607 return self.geturl() == other.geturl()
609 def __copy__(self) -> ButlerURI:
610 # Implement here because the __new__ method confuses things
611 return type(self)(str(self))
613 def __deepcopy__(self, memo: Any) -> ButlerURI:
614 # Implement here because the __new__ method confuses things
615 return self.__copy__()
617 def __getnewargs__(self) -> Tuple:
618 return (str(self),)
620 @staticmethod
621 def _fixupPathUri(parsed: urllib.parse.ParseResult, root: Optional[str] = None,
622 forceAbsolute: bool = False,
623 forceDirectory: bool = False) -> Tuple[urllib.parse.ParseResult, bool]:
624 """Correct any issues with the supplied URI.
626 Parameters
627 ----------
628 parsed : `~urllib.parse.ParseResult`
629 The result from parsing a URI using `urllib.parse`.
630 root : `str`, ignored
631 Not used by the this implementation since all URIs are
632 absolute except for those representing the local file system.
633 forceAbsolute : `bool`, ignored.
634 Not used by this implementation. URIs are generally always
635 absolute.
636 forceDirectory : `bool`, optional
637 If `True` forces the URI to end with a separator, otherwise given
638 URI is interpreted as is. Specifying that the URI is conceptually
639 equivalent to a directory can break some ambiguities when
640 interpreting the last element of a path.
642 Returns
643 -------
644 modified : `~urllib.parse.ParseResult`
645 Update result if a URI is being handled.
646 dirLike : `bool`
647 `True` if given parsed URI has a trailing separator or
648 forceDirectory is True. Otherwise `False`.
650 Notes
651 -----
652 Relative paths are explicitly not supported by RFC8089 but `urllib`
653 does accept URIs of the form ``file:relative/path.ext``. They need
654 to be turned into absolute paths before they can be used. This is
655 always done regardless of the ``forceAbsolute`` parameter.
657 AWS S3 differentiates between keys with trailing POSIX separators (i.e
658 `/dir` and `/dir/`) whereas POSIX does not neccessarily.
660 Scheme-less paths are normalized.
661 """
662 # assume we are not dealing with a directory like URI
663 dirLike = False
665 # URI is dir-like if explicitly stated or if it ends on a separator
666 endsOnSep = parsed.path.endswith(posixpath.sep)
667 if forceDirectory or endsOnSep:
668 dirLike = True
669 # only add the separator if it's not already there
670 if not endsOnSep:
671 parsed = parsed._replace(path=parsed.path+posixpath.sep)
673 return parsed, dirLike
675 def transfer_from(self, src: ButlerURI, transfer: str,
676 overwrite: bool = False,
677 transaction: Optional[Union[DatastoreTransaction, NoTransaction]] = None) -> None:
678 """Transfer the current resource to a new location.
680 Parameters
681 ----------
682 src : `ButlerURI`
683 Source URI.
684 transfer : `str`
685 Mode to use for transferring the resource. Generically there are
686 many standard options: copy, link, symlink, hardlink, relsymlink.
687 Not all URIs support all modes.
688 overwrite : `bool`, optional
689 Allow an existing file to be overwritten. Defaults to `False`.
690 transaction : `DatastoreTransaction`, optional
691 A transaction object that can (depending on implementation)
692 rollback transfers on error. Not guaranteed to be implemented.
694 Notes
695 -----
696 Conceptually this is hard to scale as the number of URI schemes
697 grow. The destination URI is more important than the source URI
698 since that is where all the transfer modes are relevant (with the
699 complication that "move" deletes the source).
701 Local file to local file is the fundamental use case but every
702 other scheme has to support "copy" to local file (with implicit
703 support for "move") and copy from local file.
704 All the "link" options tend to be specific to local file systems.
706 "move" is a "copy" where the remote resource is deleted at the end.
707 Whether this works depends on the source URI rather than the
708 destination URI. Reverting a move on transaction rollback is
709 expected to be problematic if a remote resource was involved.
710 """
711 raise NotImplementedError(f"No transfer modes supported by URI scheme {self.scheme}")
714class ButlerFileURI(ButlerURI):
715 """URI for explicit ``file`` scheme."""
717 transferModes = ("copy", "link", "symlink", "hardlink", "relsymlink", "auto", "move")
718 transferDefault: str = "link"
720 @property
721 def ospath(self) -> str:
722 """Path component of the URI localized to current OS.
724 Will unquote URI path since a formal URI must include the quoting.
725 """
726 return urllib.parse.unquote(posix2os(self._uri.path))
728 def exists(self) -> bool:
729 # Uses os.path.exists so if there is a soft link that points
730 # to a file that no longer exists this will return False
731 return os.path.exists(self.ospath)
733 def remove(self) -> None:
734 """Remove the resource."""
735 os.remove(self.ospath)
737 def as_local(self) -> Tuple[str, bool]:
738 """Return the local path of the file.
740 Returns
741 -------
742 path : `str`
743 The local path to this file.
744 temporary : `bool`
745 Always returns `False` (this is not a temporary file).
746 """
747 return self.ospath, False
749 def _force_to_file(self) -> ButlerFileURI:
750 """Force a schemeless URI to a file URI and returns a new URI.
752 Returns
753 -------
754 file : `ButlerFileURI`
755 A copy of the URI using file scheme. If already a file scheme
756 the copy will be identical.
758 Raises
759 ------
760 ValueError
761 Raised if this URI is schemeless and relative path and so can
762 not be forced to file absolute path without context.
763 """
764 # This is always a file scheme so always return copy
765 return copy.copy(self)
767 def relative_to(self, other: ButlerURI) -> Optional[str]:
768 """Return the relative path from this URI to the other URI.
770 Parameters
771 ----------
772 other : `ButlerURI`
773 URI to use to calculate the relative path. Must be a parent
774 of this URI.
776 Returns
777 -------
778 subpath : `str`
779 The sub path of this URI relative to the supplied other URI.
780 Returns `None` if there is no parent child relationship.
781 Scheme and netloc must match but for file URIs schemeless
782 is also used. If this URI is a relative URI but the other is
783 absolute, it is assumed to be in the parent completely unless it
784 starts with ".." (in which case the path is combined and tested).
785 If both URIs are relative, the relative paths are compared
786 for commonality.
788 Notes
789 -----
790 By definition a relative path will be relative to the enclosing
791 absolute parent URI. It will be returned unchanged if it does not
792 use a parent directory specification.
793 """
794 # We know self is a file so check the other. Anything other than
795 # file or schemeless means by definition these have no paths in common
796 if other.scheme and other.scheme != "file":
797 return None
799 # for case where both URIs are relative use the normal logic
800 # where a/b/c.txt and a/b/ returns c.txt.
801 if not self.isabs() and not other.isabs():
802 return super().relative_to(other)
804 # if we have a relative path convert it to absolute
805 # relative to the supplied parent. This is solely to handle
806 # the case where the relative path includes ".." but somehow
807 # then goes back inside the directory of the parent
808 if not self.isabs():
809 childUri = other.join(self.path)
810 return childUri.relative_to(other)
812 # By this point if the schemes are identical we can use the
813 # base class implementation.
814 if self.scheme == other.scheme:
815 return super().relative_to(other)
817 # if one is schemeless and the other is not the base implementation
818 # will fail so we need to fix that -- they are both absolute so
819 # forcing to file is fine.
820 # Use a cast to convince mypy that other has to be a ButlerFileURI
821 # in order to get to this part of the code.
822 return self._force_to_file().relative_to(cast(ButlerFileURI, other)._force_to_file())
824 def read(self, size: int = -1) -> bytes:
825 # Docstring inherits
826 with open(self.ospath, "rb") as fh:
827 return fh.read(size)
829 def write(self, data: bytes, overwrite: bool = True) -> None:
830 dir = os.path.dirname(self.ospath)
831 if not os.path.exists(dir):
832 safeMakeDir(dir)
833 if overwrite:
834 mode = "wb"
835 else:
836 mode = "xb"
837 with open(self.ospath, mode) as f:
838 f.write(data)
840 def mkdir(self) -> None:
841 if not os.path.exists(self.ospath):
842 safeMakeDir(self.ospath)
843 elif not os.path.isdir(self.ospath):
844 raise FileExistsError(f"URI {self} exists but is not a directory!")
846 def transfer_from(self, src: ButlerURI, transfer: str,
847 overwrite: bool = False,
848 transaction: Optional[Union[DatastoreTransaction, NoTransaction]] = None) -> None:
849 """Transfer the current resource to a local file.
851 Parameters
852 ----------
853 src : `ButlerURI`
854 Source URI.
855 transfer : `str`
856 Mode to use for transferring the resource. Supports the following
857 options: copy, link, symlink, hardlink, relsymlink.
858 overwrite : `bool`, optional
859 Allow an existing file to be overwritten. Defaults to `False`.
860 transaction : `DatastoreTransaction`, optional
861 If a transaction is provided, undo actions will be registered.
862 """
863 # Fail early to prevent delays if remote resources are requested
864 if transfer not in self.transferModes:
865 raise ValueError(f"Transfer mode '{transfer}' not supported by URI scheme {self.scheme}")
867 log.debug(f"Transferring {src} [exists: {src.exists()}] -> "
868 f"{self} [exists: {self.exists()}] (transfer={transfer})")
870 # We do not have to special case ButlerFileURI here because
871 # as_local handles that.
872 local_src, is_temporary = src.as_local()
874 # Default transfer mode depends on whether we have a temporary
875 # file or not.
876 if transfer == "auto":
877 transfer = self.transferDefault if not is_temporary else "copy"
879 # Follow soft links
880 local_src = os.path.realpath(os.path.normpath(local_src))
882 if not os.path.exists(local_src):
883 raise FileNotFoundError(f"Source URI {src} does not exist")
885 # All the modes involving linking use "link" somewhere
886 if "link" in transfer and is_temporary:
887 raise RuntimeError("Can not use local file system transfer mode"
888 f" {transfer} for remote resource ({src})")
890 # For temporary files we can own them
891 requested_transfer = transfer
892 if is_temporary and transfer == "copy":
893 transfer = "move"
895 # The output location should not exist
896 dest_exists = self.exists()
897 if not overwrite and dest_exists:
898 raise FileExistsError(f"Destination path '{self}' already exists. Transfer "
899 f"from {src} cannot be completed.")
901 # Make the path absolute (but don't follow links since that
902 # would possibly cause us to end up in the wrong place if the
903 # file existed already as a soft link)
904 newFullPath = os.path.abspath(self.ospath)
905 outputDir = os.path.dirname(newFullPath)
906 if not os.path.isdir(outputDir):
907 # Must create the directory -- this can not be rolled back
908 # since another transfer running concurrently may
909 # be relying on this existing.
910 safeMakeDir(outputDir)
912 if transaction is None:
913 # Use a no-op transaction to reduce code duplication
914 transaction = NoTransaction()
916 # For links the OS doesn't let us overwrite so if something does
917 # exist we have to remove it before we do the actual "transfer" below
918 if "link" in transfer and overwrite and dest_exists:
919 try:
920 self.remove()
921 except Exception:
922 # If this fails we ignore it since it's a problem
923 # that will manifest immediately below with a more relevant
924 # error message
925 pass
927 if transfer == "move":
928 with transaction.undoWith(f"move from {local_src}", shutil.move, newFullPath, local_src):
929 shutil.move(local_src, newFullPath)
930 elif transfer == "copy":
931 with transaction.undoWith(f"copy from {local_src}", os.remove, newFullPath):
932 shutil.copy(local_src, newFullPath)
933 elif transfer == "link":
934 # Try hard link and if that fails use a symlink
935 with transaction.undoWith(f"link to {local_src}", os.remove, newFullPath):
936 try:
937 os.link(local_src, newFullPath)
938 except OSError:
939 # Read through existing symlinks
940 os.symlink(local_src, newFullPath)
941 elif transfer == "hardlink":
942 with transaction.undoWith(f"hardlink to {local_src}", os.remove, newFullPath):
943 os.link(local_src, newFullPath)
944 elif transfer == "symlink":
945 # Read through existing symlinks
946 with transaction.undoWith(f"symlink to {local_src}", os.remove, newFullPath):
947 os.symlink(local_src, newFullPath)
948 elif transfer == "relsymlink":
949 # This is a standard symlink but using a relative path
950 # Need the directory name to give to relative root
951 # A full file path confuses it into an extra ../
952 newFullPathRoot = os.path.dirname(newFullPath)
953 relPath = os.path.relpath(local_src, newFullPathRoot)
954 with transaction.undoWith(f"relsymlink to {local_src}", os.remove, newFullPath):
955 os.symlink(relPath, newFullPath)
956 else:
957 raise NotImplementedError("Transfer type '{}' not supported.".format(transfer))
959 # This was an explicit move requested from a remote resource
960 # try to remove that resource. We check is_temporary because
961 # the local file would have been moved by shutil.move already.
962 if requested_transfer == "move" and is_temporary:
963 # Transactions do not work here
964 src.remove()
966 if is_temporary and os.path.exists(local_src):
967 # This should never happen since we have moved it above
968 os.remove(local_src)
970 @staticmethod
971 def _fixupPathUri(parsed: urllib.parse.ParseResult, root: Optional[str] = None,
972 forceAbsolute: bool = False,
973 forceDirectory: bool = False) -> Tuple[urllib.parse.ParseResult, bool]:
974 """Fix up relative paths in URI instances.
976 Parameters
977 ----------
978 parsed : `~urllib.parse.ParseResult`
979 The result from parsing a URI using `urllib.parse`.
980 root : `str`, optional
981 Path to use as root when converting relative to absolute.
982 If `None`, it will be the current working directory. This
983 is a local file system path, not a URI. It is only used if
984 a file-scheme is used incorrectly with a relative path.
985 forceAbsolute : `bool`, ignored
986 Has no effect for this subclass. ``file`` URIs are always
987 absolute.
988 forceDirectory : `bool`, optional
989 If `True` forces the URI to end with a separator, otherwise given
990 URI is interpreted as is.
992 Returns
993 -------
994 modified : `~urllib.parse.ParseResult`
995 Update result if a URI is being handled.
996 dirLike : `bool`
997 `True` if given parsed URI has a trailing separator or
998 forceDirectory is True. Otherwise `False`.
1000 Notes
1001 -----
1002 Relative paths are explicitly not supported by RFC8089 but `urllib`
1003 does accept URIs of the form ``file:relative/path.ext``. They need
1004 to be turned into absolute paths before they can be used. This is
1005 always done regardless of the ``forceAbsolute`` parameter.
1006 """
1007 # assume we are not dealing with a directory like URI
1008 dirLike = False
1010 # file URI implies POSIX path separators so split as POSIX,
1011 # then join as os, and convert to abspath. Do not handle
1012 # home directories since "file" scheme is explicitly documented
1013 # to not do tilde expansion.
1014 sep = posixpath.sep
1016 # For local file system we can explicitly check to see if this
1017 # really is a directory. The URI might point to a location that
1018 # does not exists yet but all that matters is if it is a directory
1019 # then we make sure use that fact. No need to do the check if
1020 # we are already being told.
1021 if not forceDirectory and posixpath.isdir(parsed.path):
1022 forceDirectory = True
1024 # For an absolute path all we need to do is check if we need
1025 # to force the directory separator
1026 if posixpath.isabs(parsed.path):
1027 if forceDirectory:
1028 if not parsed.path.endswith(sep):
1029 parsed = parsed._replace(path=parsed.path+sep)
1030 dirLike = True
1031 return copy.copy(parsed), dirLike
1033 # Relative path so must fix it to be compliant with the standard
1035 # Replacement values for the URI
1036 replacements = {}
1038 if root is None:
1039 root = os.path.abspath(os.path.curdir)
1041 replacements["path"] = posixpath.normpath(posixpath.join(os2posix(root), parsed.path))
1043 # normpath strips trailing "/" so put it back if necessary
1044 # Acknowledge that trailing separator exists.
1045 if forceDirectory or (parsed.path.endswith(sep) and not replacements["path"].endswith(sep)):
1046 replacements["path"] += sep
1047 dirLike = True
1049 # ParseResult is a NamedTuple so _replace is standard API
1050 parsed = parsed._replace(**replacements)
1052 if parsed.params or parsed.query:
1053 log.warning("Additional items unexpectedly encountered in file URI: %s", parsed.geturl())
1055 return parsed, dirLike
1058class ButlerS3URI(ButlerURI):
1059 """S3 URI"""
1061 @property
1062 def client(self) -> boto3.client:
1063 """Client object to address remote resource."""
1064 # Defer import for circular dependencies
1065 from .s3utils import getS3Client
1066 return getS3Client()
1068 def exists(self) -> bool:
1069 # s3utils itself imports ButlerURI so defer this import
1070 from .s3utils import s3CheckFileExists
1071 exists, _ = s3CheckFileExists(self, client=self.client)
1072 return exists
1074 def remove(self) -> None:
1075 """Remove the resource."""
1077 # https://github.com/boto/boto3/issues/507 - there is no
1078 # way of knowing if the file was actually deleted except
1079 # for checking all the keys again, reponse is HTTP 204 OK
1080 # response all the time
1081 self.client.delete(Bucket=self.netloc, Key=self.relativeToPathRoot)
1083 def read(self, size: int = -1) -> bytes:
1084 args = {}
1085 if size > 0:
1086 args["Range"] = f"bytes=0-{size-1}"
1087 try:
1088 response = self.client.get_object(Bucket=self.netloc,
1089 Key=self.relativeToPathRoot,
1090 **args)
1091 except (self.client.exceptions.NoSuchKey, self.client.exceptions.NoSuchBucket) as err:
1092 raise FileNotFoundError(f"No such resource: {self}") from err
1093 body = response["Body"].read()
1094 response["Body"].close()
1095 return body
1097 def write(self, data: bytes, overwrite: bool = True) -> None:
1098 if not overwrite:
1099 if self.exists():
1100 raise FileExistsError(f"Remote resource {self} exists and overwrite has been disabled")
1101 self.client.put_object(Bucket=self.netloc, Key=self.relativeToPathRoot,
1102 Body=data)
1104 def mkdir(self) -> None:
1105 # Defer import for circular dependencies
1106 from .s3utils import bucketExists
1107 if not bucketExists(self.netloc):
1108 raise ValueError(f"Bucket {self.netloc} does not exist for {self}!")
1110 if not self.dirLike:
1111 raise ValueError(f"Can not create a 'directory' for file-like URI {self}")
1113 # don't create S3 key when root is at the top-level of an Bucket
1114 if not self.path == "/":
1115 self.client.put_object(Bucket=self.netloc, Key=self.relativeToPathRoot)
1117 def as_local(self) -> Tuple[str, bool]:
1118 """Download object from S3 and place in temporary directory.
1120 Returns
1121 -------
1122 path : `str`
1123 Path to local temporary file.
1124 temporary : `bool`
1125 Always returns `True`. This is always a temporary file.
1126 """
1127 with tempfile.NamedTemporaryFile(suffix=self.getExtension(), delete=False) as tmpFile:
1128 self.client.download_fileobj(self.netloc, self.relativeToPathRoot, tmpFile)
1129 return tmpFile.name, True
1131 def transfer_from(self, src: ButlerURI, transfer: str = "copy",
1132 overwrite: bool = False,
1133 transaction: Optional[Union[DatastoreTransaction, NoTransaction]] = None) -> None:
1134 """Transfer the current resource to an S3 bucket.
1136 Parameters
1137 ----------
1138 src : `ButlerURI`
1139 Source URI.
1140 transfer : `str`
1141 Mode to use for transferring the resource. Supports the following
1142 options: copy.
1143 overwrite : `bool`, optional
1144 Allow an existing file to be overwritten. Defaults to `False`.
1145 transaction : `DatastoreTransaction`, optional
1146 Currently unused.
1147 """
1148 # Fail early to prevent delays if remote resources are requested
1149 if transfer not in self.transferModes:
1150 raise ValueError(f"Transfer mode '{transfer}' not supported by URI scheme {self.scheme}")
1152 log.debug(f"Transferring {src} [exists: {src.exists()}] -> "
1153 f"{self} [exists: {self.exists()}] (transfer={transfer})")
1155 if not overwrite and self.exists():
1156 raise FileExistsError(f"Destination path '{self}' already exists.")
1158 if transfer == "auto":
1159 transfer = self.transferDefault
1161 if isinstance(src, type(self)):
1162 # Looks like an S3 remote uri so we can use direct copy
1163 # note that boto3.resource.meta.copy is cleverer than the low
1164 # level copy_object
1165 copy_source = {
1166 "Bucket": src.netloc,
1167 "Key": src.relativeToPathRoot,
1168 }
1169 self.client.copy_object(CopySource=copy_source, Bucket=self.netloc, Key=self.relativeToPathRoot)
1170 else:
1171 # Use local file and upload it
1172 local_src, is_temporary = src.as_local()
1174 # resource.meta.upload_file seems like the right thing
1175 # but we have a low level client
1176 with open(local_src, "rb") as fh:
1177 self.client.put_object(Bucket=self.netloc,
1178 Key=self.relativeToPathRoot, Body=fh)
1179 if is_temporary:
1180 os.remove(local_src)
1182 # This was an explicit move requested from a remote resource
1183 # try to remove that resource
1184 if transfer == "move":
1185 # Transactions do not work here
1186 src.remove()
1189class ButlerPackageResourceURI(ButlerURI):
1190 """URI referring to a Python package resource.
1192 These URIs look like: ``resource://lsst.daf.butler/configs/file.yaml``
1193 where the network location is the Python package and the path is the
1194 resource name.
1195 """
1197 def exists(self) -> bool:
1198 """Check that the python resource exists."""
1199 return pkg_resources.resource_exists(self.netloc, self.relativeToPathRoot)
1201 def read(self, size: int = -1) -> bytes:
1202 with pkg_resources.resource_stream(self.netloc, self.relativeToPathRoot) as fh:
1203 return fh.read(size)
1206class ButlerHttpURI(ButlerURI):
1207 """General HTTP(S) resource."""
1209 @property
1210 def session(self) -> requests.Session:
1211 """Client object to address remote resource."""
1212 from .webdavutils import getHttpSession, isWebdavEndpoint
1213 if isWebdavEndpoint(self):
1214 log.debug("%s looks like a Webdav endpoint.", self.geturl())
1215 return getHttpSession()
1217 log.debug("%s looks like a standard HTTP endpoint.", self.geturl())
1218 return requests.Session()
1220 def exists(self) -> bool:
1221 """Check that a remote HTTP resource exists."""
1222 log.debug("Checking if resource exists: %s", self.geturl())
1223 r = self.session.head(self.geturl())
1225 return True if r.status_code == 200 else False
1227 def mkdir(self) -> None:
1228 """For a dir-like URI, create the directory resource if it does not
1229 already exist.
1230 """
1231 if not self.dirLike:
1232 raise ValueError(f"Can not create a 'directory' for file-like URI {self}")
1234 if not self.exists():
1235 log.debug("Creating new directory: %s", self.geturl())
1236 r = self.session.request("MKCOL", self.geturl())
1237 if r.status_code != 201:
1238 raise ValueError(f"Can not create directory {self}, status code: {r.status_code}")
1240 def remove(self) -> None:
1241 """Remove the resource."""
1242 log.debug("Removing resource: %s", self.geturl())
1243 r = self.session.delete(self.geturl())
1244 if r.status_code not in [200, 202, 204]:
1245 raise FileNotFoundError(f"Unable to delete resource {self}; status code: {r.status_code}")
1247 def as_local(self) -> Tuple[str, bool]:
1248 """Download object over HTTP and place in temporary directory.
1250 Returns
1251 -------
1252 path : `str`
1253 Path to local temporary file.
1254 temporary : `bool`
1255 Always returns `True`. This is always a temporary file.
1256 """
1257 log.debug("Downloading remote resource as local file: %s", self.geturl())
1258 r = self.session.get(self.geturl(), stream=True)
1259 if r.status_code != 200:
1260 raise FileNotFoundError(f"Unable to download resource {self}; status code: {r.status_code}")
1261 with tempfile.NamedTemporaryFile(suffix=self.getExtension(), delete=False) as tmpFile:
1262 for chunk in r.iter_content():
1263 tmpFile.write(chunk)
1264 return tmpFile.name, True
1266 def read(self, size: int = -1) -> bytes:
1267 """Open the resource and return the contents in bytes.
1269 Parameters
1270 ----------
1271 size : `int`, optional
1272 The number of bytes to read. Negative or omitted indicates
1273 that all data should be read.
1274 """
1275 log.debug("Reading from remote resource: %s", self.geturl())
1276 stream = True if size > 0 else False
1277 r = self.session.get(self.geturl(), stream=stream)
1278 if r.status_code != 200:
1279 raise FileNotFoundError(f"Unable to read resource {self}; status code: {r.status_code}")
1280 if not stream:
1281 return r.content
1282 else:
1283 return next(r.iter_content(chunk_size=size))
1285 def write(self, data: bytes, overwrite: bool = True) -> None:
1286 """Write the supplied bytes to the new resource.
1288 Parameters
1289 ----------
1290 data : `bytes`
1291 The bytes to write to the resource. The entire contents of the
1292 resource will be replaced.
1293 overwrite : `bool`, optional
1294 If `True` the resource will be overwritten if it exists. Otherwise
1295 the write will fail.
1296 """
1297 log.debug("Writing to remote resource: %s", self.geturl())
1298 if not overwrite:
1299 if self.exists():
1300 raise FileExistsError(f"Remote resource {self} exists and overwrite has been disabled")
1301 self.session.put(self.geturl(), data=data)
1303 def transfer_from(self, src: ButlerURI, transfer: str = "copy",
1304 overwrite: bool = False,
1305 transaction: Optional[Union[DatastoreTransaction, NoTransaction]] = None) -> None:
1306 """Transfer the current resource to a Webdav repository.
1308 Parameters
1309 ----------
1310 src : `ButlerURI`
1311 Source URI.
1312 transfer : `str`
1313 Mode to use for transferring the resource. Supports the following
1314 options: copy.
1315 transaction : `DatastoreTransaction`, optional
1316 Currently unused.
1317 """
1318 # Fail early to prevent delays if remote resources are requested
1319 if transfer not in self.transferModes:
1320 raise ValueError(f"Transfer mode {transfer} not supported by URI scheme {self.scheme}")
1322 log.debug(f"Transferring {src} [exists: {src.exists()}] -> "
1323 f"{self} [exists: {self.exists()}] (transfer={transfer})")
1325 if self.exists():
1326 raise FileExistsError(f"Destination path {self} already exists.")
1328 if transfer == "auto":
1329 transfer = self.transferDefault
1331 if isinstance(src, type(self)):
1332 if transfer == "move":
1333 self.session.request("MOVE", src.geturl(), headers={"Destination": self.geturl()})
1334 else:
1335 self.session.request("COPY", src.geturl(), headers={"Destination": self.geturl()})
1336 else:
1337 # Use local file and upload it
1338 local_src, is_temporary = src.as_local()
1339 f = open(local_src, "rb")
1340 files = {"file": f}
1341 self.session.post(self.geturl(), files=files)
1342 f.close()
1343 if is_temporary:
1344 os.remove(local_src)
1347class ButlerInMemoryURI(ButlerURI):
1348 """Internal in-memory datastore URI (`mem://`).
1350 Not used for any real purpose other than indicating that the dataset
1351 is in memory.
1352 """
1354 def exists(self) -> bool:
1355 """Test for existence and always return False."""
1356 return True
1358 def as_local(self) -> Tuple[str, bool]:
1359 raise RuntimeError(f"Do not know how to retrieve data for URI '{self}'")
1362class ButlerSchemelessURI(ButlerFileURI):
1363 """Scheme-less URI referring to the local file system"""
1365 _pathLib = PurePath
1366 _pathModule = os.path
1367 quotePaths = False
1369 @property
1370 def ospath(self) -> str:
1371 """Path component of the URI localized to current OS."""
1372 return self.path
1374 def isabs(self) -> bool:
1375 """Indicate that the resource is fully specified.
1377 For non-schemeless URIs this is always true.
1379 Returns
1380 -------
1381 isabs : `bool`
1382 `True` if the file is absolute, `False` otherwise.
1383 """
1384 return os.path.isabs(self.ospath)
1386 def _force_to_file(self) -> ButlerFileURI:
1387 """Force a schemeless URI to a file URI and returns a new URI.
1389 This will include URI quoting of the path.
1391 Returns
1392 -------
1393 file : `ButlerFileURI`
1394 A copy of the URI using file scheme. If already a file scheme
1395 the copy will be identical.
1397 Raises
1398 ------
1399 ValueError
1400 Raised if this URI is schemeless and relative path and so can
1401 not be forced to file absolute path without context.
1402 """
1403 if not self.isabs():
1404 raise RuntimeError(f"Internal error: Can not force {self} to absolute file URI")
1405 uri = self._uri._replace(scheme="file", path=urllib.parse.quote(os2posix(self.path)))
1406 # mypy really wants a ButlerFileURI to be returned here
1407 return ButlerURI(uri, forceDirectory=self.dirLike) # type: ignore
1409 @staticmethod
1410 def _fixupPathUri(parsed: urllib.parse.ParseResult, root: Optional[str] = None,
1411 forceAbsolute: bool = False,
1412 forceDirectory: bool = False) -> Tuple[urllib.parse.ParseResult, bool]:
1413 """Fix up relative paths for local file system.
1415 Parameters
1416 ----------
1417 parsed : `~urllib.parse.ParseResult`
1418 The result from parsing a URI using `urllib.parse`.
1419 root : `str`, optional
1420 Path to use as root when converting relative to absolute.
1421 If `None`, it will be the current working directory. This
1422 is a local file system path, not a URI.
1423 forceAbsolute : `bool`, optional
1424 If `True`, scheme-less relative URI will be converted to an
1425 absolute path using a ``file`` scheme. If `False` scheme-less URI
1426 will remain scheme-less and will not be updated to ``file`` or
1427 absolute path.
1428 forceDirectory : `bool`, optional
1429 If `True` forces the URI to end with a separator, otherwise given
1430 URI is interpreted as is.
1432 Returns
1433 -------
1434 modified : `~urllib.parse.ParseResult`
1435 Update result if a URI is being handled.
1436 dirLike : `bool`
1437 `True` if given parsed URI has a trailing separator or
1438 forceDirectory is True. Otherwise `False`.
1440 Notes
1441 -----
1442 Relative paths are explicitly not supported by RFC8089 but `urllib`
1443 does accept URIs of the form ``file:relative/path.ext``. They need
1444 to be turned into absolute paths before they can be used. This is
1445 always done regardless of the ``forceAbsolute`` parameter.
1447 Scheme-less paths are normalized.
1448 """
1449 # assume we are not dealing with a directory URI
1450 dirLike = False
1452 # Replacement values for the URI
1453 replacements = {}
1455 if root is None:
1456 root = os.path.abspath(os.path.curdir)
1458 # this is a local OS file path which can support tilde expansion.
1459 # we quoted it in the constructor so unquote here
1460 expandedPath = os.path.expanduser(urllib.parse.unquote(parsed.path))
1462 # Ensure that this becomes a file URI if it is already absolute
1463 if os.path.isabs(expandedPath):
1464 replacements["scheme"] = "file"
1465 # Keep in OS form for now to simplify later logic
1466 replacements["path"] = os.path.normpath(expandedPath)
1467 elif forceAbsolute:
1468 # This can stay in OS path form, do not change to file
1469 # scheme.
1470 replacements["path"] = os.path.normpath(os.path.join(root, expandedPath))
1471 else:
1472 # No change needed for relative local path staying relative
1473 # except normalization
1474 replacements["path"] = os.path.normpath(expandedPath)
1475 # normalization of empty path returns "." so we are dirLike
1476 if expandedPath == "":
1477 dirLike = True
1479 # normpath strips trailing "/" which makes it hard to keep
1480 # track of directory vs file when calling replaceFile
1482 # For local file system we can explicitly check to see if this
1483 # really is a directory. The URI might point to a location that
1484 # does not exists yet but all that matters is if it is a directory
1485 # then we make sure use that fact. No need to do the check if
1486 # we are already being told.
1487 if not forceDirectory and os.path.isdir(replacements["path"]):
1488 forceDirectory = True
1490 # add the trailing separator only if explicitly required or
1491 # if it was stripped by normpath. Acknowledge that trailing
1492 # separator exists.
1493 endsOnSep = expandedPath.endswith(os.sep) and not replacements["path"].endswith(os.sep)
1494 if (forceDirectory or endsOnSep or dirLike):
1495 dirLike = True
1496 if not replacements["path"].endswith(os.sep):
1497 replacements["path"] += os.sep
1499 if "scheme" in replacements:
1500 # This is now meant to be a URI path so force to posix
1501 # and quote
1502 replacements["path"] = urllib.parse.quote(os2posix(replacements["path"]))
1504 # ParseResult is a NamedTuple so _replace is standard API
1505 parsed = parsed._replace(**replacements)
1507 if parsed.params or parsed.fragment or parsed.query:
1508 log.warning("Additional items unexpectedly encountered in schemeless URI: %s", parsed.geturl())
1510 return parsed, dirLike