Coverage for python/lsst/resources/_resourcePath.py: 21%
410 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-04-11 02:04 -0700
« prev ^ index » next coverage.py v6.5.0, created at 2023-04-11 02:04 -0700
1# This file is part of lsst-resources.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# Use of this source code is governed by a 3-clause BSD-style
10# license that can be found in the LICENSE file.
12from __future__ import annotations
14import concurrent.futures
15import contextlib
16import copy
17import io
18import locale
19import logging
20import os
21import posixpath
22import re
23import shutil
24import tempfile
25import urllib.parse
26from pathlib import Path, PurePath, PurePosixPath
27from random import Random
29__all__ = ("ResourcePath", "ResourcePathExpression")
31from typing import (
32 TYPE_CHECKING,
33 Any,
34 Dict,
35 Iterable,
36 Iterator,
37 List,
38 Literal,
39 Optional,
40 Tuple,
41 Type,
42 Union,
43 overload,
44)
46from ._resourceHandles._baseResourceHandle import ResourceHandleProtocol
48if TYPE_CHECKING:
49 from .utils import TransactionProtocol
52log = logging.getLogger(__name__)
54# Regex for looking for URI escapes
55ESCAPES_RE = re.compile(r"%[A-F0-9]{2}")
57# Precomputed escaped hash
58ESCAPED_HASH = urllib.parse.quote("#")
60# Maximum number of worker threads for parallelized operations.
61# If greater than 10, be aware that this number has to be consistent
62# with connection pool sizing (for example in urllib3).
63MAX_WORKERS = 10
66ResourcePathExpression = Union[str, urllib.parse.ParseResult, "ResourcePath", Path]
67"""Type-annotation alias for objects that can be coerced to ResourcePath.
68"""
71class ResourcePath:
72 """Convenience wrapper around URI parsers.
74 Provides access to URI components and can convert file
75 paths into absolute path URIs. Scheme-less URIs are treated as if
76 they are local file system paths and are converted to absolute URIs.
78 A specialist subclass is created for each supported URI scheme.
80 Parameters
81 ----------
82 uri : `str`, `Path`, `urllib.parse.ParseResult`, or `ResourcePath`.
83 URI in string form. Can be scheme-less if referring to a relative
84 path or an absolute path on the local file system.
85 root : `str` or `ResourcePath`, optional
86 When fixing up a relative path in a ``file`` scheme or if scheme-less,
87 use this as the root. Must be absolute. If `None` the current
88 working directory will be used. Can be any supported URI scheme.
89 Not used if ``forceAbsolute`` is `False`.
90 forceAbsolute : `bool`, optional
91 If `True`, scheme-less relative URI will be converted to an absolute
92 path using a ``file`` scheme. If `False` scheme-less URI will remain
93 scheme-less and will not be updated to ``file`` or absolute path unless
94 it is already an absolute path, in which case it will be updated to
95 a ``file`` scheme.
96 forceDirectory: `bool`, optional
97 If `True` forces the URI to end with a separator, otherwise given URI
98 is interpreted as is.
99 isTemporary : `bool`, optional
100 If `True` indicates that this URI points to a temporary resource.
101 The default is `False`, unless ``uri`` is already a `ResourcePath`
102 instance and ``uri.isTemporary is True``.
104 Notes
105 -----
106 A non-standard URI of the form ``file:dir/file.txt`` is always converted
107 to an absolute ``file`` URI.
108 """
110 _pathLib: Type[PurePath] = PurePosixPath
111 """Path library to use for this scheme."""
113 _pathModule = posixpath
114 """Path module to use for this scheme."""
116 transferModes: Tuple[str, ...] = ("copy", "auto", "move")
117 """Transfer modes supported by this implementation.
119 Move is special in that it is generally a copy followed by an unlink.
120 Whether that unlink works depends critically on whether the source URI
121 implements unlink. If it does not the move will be reported as a failure.
122 """
124 transferDefault: str = "copy"
125 """Default mode to use for transferring if ``auto`` is specified."""
127 quotePaths = True
128 """True if path-like elements modifying a URI should be quoted.
130 All non-schemeless URIs have to internally use quoted paths. Therefore
131 if a new file name is given (e.g. to updatedFile or join) a decision must
132 be made whether to quote it to be consistent.
133 """
135 isLocal = False
136 """If `True` this URI refers to a local file."""
138 # This is not an ABC with abstract methods because the __new__ being
139 # a factory confuses mypy such that it assumes that every constructor
140 # returns a ResourcePath and then determines that all the abstract methods
141 # are still abstract. If they are not marked abstract but just raise
142 # mypy is fine with it.
144 # mypy is confused without these
145 _uri: urllib.parse.ParseResult
146 isTemporary: bool
147 dirLike: bool
149 def __new__(
150 cls,
151 uri: ResourcePathExpression,
152 root: Optional[Union[str, ResourcePath]] = None,
153 forceAbsolute: bool = True,
154 forceDirectory: bool = False,
155 isTemporary: Optional[bool] = None,
156 ) -> ResourcePath:
157 """Create and return new specialist ResourcePath subclass."""
158 parsed: urllib.parse.ParseResult
159 dirLike: bool = False
160 subclass: Optional[Type[ResourcePath]] = None
162 # Force root to be a ResourcePath -- this simplifies downstream
163 # code.
164 if root is None:
165 root_uri = None
166 elif isinstance(root, str):
167 root_uri = ResourcePath(root, forceDirectory=True, forceAbsolute=True)
168 else:
169 root_uri = root
171 if isinstance(uri, os.PathLike):
172 uri = str(uri)
174 # Record if we need to post process the URI components
175 # or if the instance is already fully configured
176 if isinstance(uri, str):
177 # Since local file names can have special characters in them
178 # we need to quote them for the parser but we can unquote
179 # later. Assume that all other URI schemes are quoted.
180 # Since sometimes people write file:/a/b and not file:///a/b
181 # we should not quote in the explicit case of file:
182 if "://" not in uri and not uri.startswith("file:"):
183 if ESCAPES_RE.search(uri):
184 log.warning("Possible double encoding of %s", uri)
185 else:
186 uri = urllib.parse.quote(uri)
187 # Special case hash since we must support fragments
188 # even in schemeless URIs -- although try to only replace
189 # them in file part and not directory part
190 if ESCAPED_HASH in uri:
191 dirpos = uri.rfind("/")
192 # Do replacement after this /
193 uri = uri[: dirpos + 1] + uri[dirpos + 1 :].replace(ESCAPED_HASH, "#")
195 parsed = urllib.parse.urlparse(uri)
196 elif isinstance(uri, urllib.parse.ParseResult):
197 parsed = copy.copy(uri)
198 # If we are being instantiated with a subclass, rather than
199 # ResourcePath, ensure that that subclass is used directly.
200 # This could lead to inconsistencies if this constructor
201 # is used externally outside of the ResourcePath.replace() method.
202 # S3ResourcePath(urllib.parse.urlparse("file://a/b.txt"))
203 # will be a problem.
204 # This is needed to prevent a schemeless absolute URI become
205 # a file URI unexpectedly when calling updatedFile or
206 # updatedExtension
207 if cls is not ResourcePath:
208 parsed, dirLike = cls._fixDirectorySep(parsed, forceDirectory)
209 subclass = cls
211 elif isinstance(uri, ResourcePath):
212 # Since ResourcePath is immutable we can return the argument
213 # unchanged if it already agrees with forceDirectory, isTemporary,
214 # and forceAbsolute.
215 # We invoke __new__ again with str(self) to add a scheme for
216 # forceAbsolute, but for the others that seems more likely to paper
217 # over logic errors than do something useful, so we just raise.
218 if forceDirectory and not uri.dirLike:
219 raise RuntimeError(
220 f"{uri} is already a file-like ResourcePath; cannot force it to directory."
221 )
222 if isTemporary is not None and isTemporary is not uri.isTemporary:
223 raise RuntimeError(
224 f"{uri} is already a {'temporary' if uri.isTemporary else 'permanent'} "
225 f"ResourcePath; cannot make it {'temporary' if isTemporary else 'permanent'}."
226 )
227 if forceAbsolute and not uri.scheme:
228 return ResourcePath(
229 str(uri),
230 root=root,
231 forceAbsolute=True,
232 forceDirectory=uri.dirLike,
233 isTemporary=uri.isTemporary,
234 )
235 return uri
236 else:
237 raise ValueError(
238 f"Supplied URI must be string, Path, ResourcePath, or ParseResult but got '{uri!r}'"
239 )
241 if subclass is None:
242 # Work out the subclass from the URI scheme
243 if not parsed.scheme:
244 # Root may be specified as a ResourcePath that overrides
245 # the schemeless determination.
246 if (
247 root_uri is not None
248 and root_uri.scheme != "file" # file scheme has different code path
249 and not parsed.path.startswith("/") # Not already absolute path
250 ):
251 if not root_uri.dirLike:
252 raise ValueError(
253 f"Root URI ({root}) was not a directory so can not be joined with"
254 f" path {parsed.path!r}"
255 )
256 # If root is temporary or this schemeless is temporary we
257 # assume this URI is temporary.
258 isTemporary = isTemporary or root_uri.isTemporary
259 joined = root_uri.join(
260 parsed.path, forceDirectory=forceDirectory, isTemporary=isTemporary
261 )
263 # Rather than returning this new ResourcePath directly we
264 # instead extract the path and the scheme and adjust the
265 # URI we were given -- we need to do this to preserve
266 # fragments since join() will drop them.
267 parsed = parsed._replace(scheme=joined.scheme, path=joined.path, netloc=joined.netloc)
268 subclass = type(joined)
270 # Clear the root parameter to indicate that it has
271 # been applied already.
272 root_uri = None
273 else:
274 from .schemeless import SchemelessResourcePath
276 subclass = SchemelessResourcePath
277 elif parsed.scheme == "file":
278 from .file import FileResourcePath
280 subclass = FileResourcePath
281 elif parsed.scheme == "s3":
282 from .s3 import S3ResourcePath
284 subclass = S3ResourcePath
285 elif parsed.scheme.startswith("http"):
286 from .http import HttpResourcePath
288 subclass = HttpResourcePath
289 elif parsed.scheme == "gs":
290 from .gs import GSResourcePath
292 subclass = GSResourcePath
293 elif parsed.scheme == "resource":
294 # Rules for scheme names disallow pkg_resource
295 from .packageresource import PackageResourcePath
297 subclass = PackageResourcePath
298 elif parsed.scheme == "mem":
299 # in-memory datastore object
300 from .mem import InMemoryResourcePath
302 subclass = InMemoryResourcePath
303 else:
304 raise NotImplementedError(
305 f"No URI support for scheme: '{parsed.scheme}' in {parsed.geturl()}"
306 )
308 parsed, dirLike = subclass._fixupPathUri(
309 parsed, root=root_uri, forceAbsolute=forceAbsolute, forceDirectory=forceDirectory
310 )
312 # It is possible for the class to change from schemeless
313 # to file so handle that
314 if parsed.scheme == "file":
315 from .file import FileResourcePath
317 subclass = FileResourcePath
319 # Now create an instance of the correct subclass and set the
320 # attributes directly
321 self = object.__new__(subclass)
322 self._uri = parsed
323 self.dirLike = dirLike
324 if isTemporary is None:
325 isTemporary = False
326 self.isTemporary = isTemporary
327 return self
329 @property
330 def scheme(self) -> str:
331 """Return the URI scheme.
333 Notes
334 -----
335 (``://`` is not part of the scheme).
336 """
337 return self._uri.scheme
339 @property
340 def netloc(self) -> str:
341 """Return the URI network location."""
342 return self._uri.netloc
344 @property
345 def path(self) -> str:
346 """Return the path component of the URI."""
347 return self._uri.path
349 @property
350 def unquoted_path(self) -> str:
351 """Return path component of the URI with any URI quoting reversed."""
352 return urllib.parse.unquote(self._uri.path)
354 @property
355 def ospath(self) -> str:
356 """Return the path component of the URI localized to current OS."""
357 raise AttributeError(f"Non-file URI ({self}) has no local OS path.")
359 @property
360 def relativeToPathRoot(self) -> str:
361 """Return path relative to network location.
363 Effectively, this is the path property with posix separator stripped
364 from the left hand side of the path.
366 Always unquotes.
367 """
368 p = self._pathLib(self.path)
369 relToRoot = str(p.relative_to(p.root))
370 if self.dirLike and not relToRoot.endswith("/"):
371 relToRoot += "/"
372 return urllib.parse.unquote(relToRoot)
374 @property
375 def is_root(self) -> bool:
376 """Return whether this URI points to the root of the network location.
378 This means that the path components refers to the top level.
379 """
380 relpath = self.relativeToPathRoot
381 if relpath == "./":
382 return True
383 return False
385 @property
386 def fragment(self) -> str:
387 """Return the fragment component of the URI."""
388 return self._uri.fragment
390 @property
391 def params(self) -> str:
392 """Return any parameters included in the URI."""
393 return self._uri.params
395 @property
396 def query(self) -> str:
397 """Return any query strings included in the URI."""
398 return self._uri.query
400 def geturl(self) -> str:
401 """Return the URI in string form.
403 Returns
404 -------
405 url : `str`
406 String form of URI.
407 """
408 return self._uri.geturl()
410 def root_uri(self) -> ResourcePath:
411 """Return the base root URI.
413 Returns
414 -------
415 uri : `ResourcePath`
416 root URI.
417 """
418 return self.replace(path="", forceDirectory=True)
420 def split(self) -> Tuple[ResourcePath, str]:
421 """Split URI into head and tail.
423 Returns
424 -------
425 head: `ResourcePath`
426 Everything leading up to tail, expanded and normalized as per
427 ResourcePath rules.
428 tail : `str`
429 Last `self.path` component. Tail will be empty if path ends on a
430 separator. Tail will never contain separators. It will be
431 unquoted.
433 Notes
434 -----
435 Equivalent to `os.path.split()` where head preserves the URI
436 components.
437 """
438 head, tail = self._pathModule.split(self.path)
439 headuri = self._uri._replace(path=head)
441 # The file part should never include quoted metacharacters
442 tail = urllib.parse.unquote(tail)
444 # Schemeless is special in that it can be a relative path
445 # We need to ensure that it stays that way. All other URIs will
446 # be absolute already.
447 forceAbsolute = self._pathModule.isabs(self.path)
448 return ResourcePath(headuri, forceDirectory=True, forceAbsolute=forceAbsolute), tail
450 def basename(self) -> str:
451 """Return the base name, last element of path, of the URI.
453 Returns
454 -------
455 tail : `str`
456 Last part of the path attribute. Trail will be empty if path ends
457 on a separator.
459 Notes
460 -----
461 If URI ends on a slash returns an empty string. This is the second
462 element returned by `split()`.
464 Equivalent of `os.path.basename()``.
465 """
466 return self.split()[1]
468 def dirname(self) -> ResourcePath:
469 """Return the directory component of the path as a new `ResourcePath`.
471 Returns
472 -------
473 head : `ResourcePath`
474 Everything except the tail of path attribute, expanded and
475 normalized as per ResourcePath rules.
477 Notes
478 -----
479 Equivalent of `os.path.dirname()`.
480 """
481 return self.split()[0]
483 def parent(self) -> ResourcePath:
484 """Return a `ResourcePath` of the parent directory.
486 Returns
487 -------
488 head : `ResourcePath`
489 Everything except the tail of path attribute, expanded and
490 normalized as per `ResourcePath` rules.
492 Notes
493 -----
494 For a file-like URI this will be the same as calling `dirname()`.
495 """
496 # When self is file-like, return self.dirname()
497 if not self.dirLike:
498 return self.dirname()
499 # When self is dir-like, return its parent directory,
500 # regardless of the presence of a trailing separator
501 originalPath = self._pathLib(self.path)
502 parentPath = originalPath.parent
503 return self.replace(path=str(parentPath), forceDirectory=True)
505 def replace(self, forceDirectory: bool = False, isTemporary: bool = False, **kwargs: Any) -> ResourcePath:
506 """Return new `ResourcePath` with specified components replaced.
508 Parameters
509 ----------
510 forceDirectory : `bool`, optional
511 Parameter passed to ResourcePath constructor to force this
512 new URI to be dir-like.
513 isTemporary : `bool`, optional
514 Indicate that the resulting URI is temporary resource.
515 **kwargs
516 Components of a `urllib.parse.ParseResult` that should be
517 modified for the newly-created `ResourcePath`.
519 Returns
520 -------
521 new : `ResourcePath`
522 New `ResourcePath` object with updated values.
524 Notes
525 -----
526 Does not, for now, allow a change in URI scheme.
527 """
528 # Disallow a change in scheme
529 if "scheme" in kwargs:
530 raise ValueError(f"Can not use replace() method to change URI scheme for {self}")
531 return self.__class__(
532 self._uri._replace(**kwargs), forceDirectory=forceDirectory, isTemporary=isTemporary
533 )
535 def updatedFile(self, newfile: str) -> ResourcePath:
536 """Return new URI with an updated final component of the path.
538 Parameters
539 ----------
540 newfile : `str`
541 File name with no path component.
543 Returns
544 -------
545 updated : `ResourcePath`
547 Notes
548 -----
549 Forces the ResourcePath.dirLike attribute to be false. The new file
550 path will be quoted if necessary.
551 """
552 if self.quotePaths:
553 newfile = urllib.parse.quote(newfile)
554 dir, _ = self._pathModule.split(self.path)
555 newpath = self._pathModule.join(dir, newfile)
557 updated = self.replace(path=newpath)
558 updated.dirLike = False
559 return updated
561 def updatedExtension(self, ext: Optional[str]) -> ResourcePath:
562 """Return a new `ResourcePath` with updated file extension.
564 All file extensions are replaced.
566 Parameters
567 ----------
568 ext : `str` or `None`
569 New extension. If an empty string is given any extension will
570 be removed. If `None` is given there will be no change.
572 Returns
573 -------
574 updated : `ResourcePath`
575 URI with the specified extension. Can return itself if
576 no extension was specified.
577 """
578 if ext is None:
579 return self
581 # Get the extension
582 current = self.getExtension()
584 # Nothing to do if the extension already matches
585 if current == ext:
586 return self
588 # Remove the current extension from the path
589 # .fits.gz counts as one extension do not use os.path.splitext
590 path = self.path
591 if current:
592 path = path[: -len(current)]
594 # Ensure that we have a leading "." on file extension (and we do not
595 # try to modify the empty string)
596 if ext and not ext.startswith("."):
597 ext = "." + ext
599 return self.replace(path=path + ext)
601 def getExtension(self) -> str:
602 """Return the file extension(s) associated with this URI path.
604 Returns
605 -------
606 ext : `str`
607 The file extension (including the ``.``). Can be empty string
608 if there is no file extension. Usually returns only the last
609 file extension unless there is a special extension modifier
610 indicating file compression, in which case the combined
611 extension (e.g. ``.fits.gz``) will be returned.
612 """
613 special = {".gz", ".bz2", ".xz", ".fz"}
615 # Get the file part of the path so as not to be confused by
616 # "." in directory names.
617 basename = self.basename()
618 extensions = self._pathLib(basename).suffixes
620 if not extensions:
621 return ""
623 ext = extensions.pop()
625 # Multiple extensions, decide whether to include the final two
626 if extensions and ext in special:
627 ext = f"{extensions[-1]}{ext}"
629 return ext
631 def join(
632 self, path: Union[str, ResourcePath], isTemporary: Optional[bool] = None, forceDirectory: bool = False
633 ) -> ResourcePath:
634 """Return new `ResourcePath` with additional path components.
636 Parameters
637 ----------
638 path : `str`, `ResourcePath`
639 Additional file components to append to the current URI. Assumed
640 to include a file at the end. Will be quoted depending on the
641 associated URI scheme. If the path looks like a URI with a scheme
642 referring to an absolute location, it will be returned
643 directly (matching the behavior of `os.path.join()`). It can
644 also be a `ResourcePath`.
645 isTemporary : `bool`, optional
646 Indicate that the resulting URI represents a temporary resource.
647 Default is ``self.isTemporary``.
648 forceDirectory : `bool`, optional
649 If `True` forces the URI to end with a separator, otherwise given
650 URI is interpreted as is.
652 Returns
653 -------
654 new : `ResourcePath`
655 New URI with any file at the end replaced with the new path
656 components.
658 Notes
659 -----
660 Schemeless URIs assume local path separator but all other URIs assume
661 POSIX separator if the supplied path has directory structure. It
662 may be this never becomes a problem but datastore templates assume
663 POSIX separator is being used.
665 If an absolute `ResourcePath` is given for ``path`` is is assumed that
666 this should be returned directly. Giving a ``path`` of an absolute
667 scheme-less URI is not allowed for safety reasons as it may indicate
668 a mistake in the calling code.
670 Raises
671 ------
672 ValueError
673 Raised if the ``path`` is an absolute scheme-less URI. In that
674 situation it is unclear whether the intent is to return a
675 ``file`` URI or it was a mistake and a relative scheme-less URI
676 was meant.
677 RuntimeError
678 Raised if this attempts to join a temporary URI to a non-temporary
679 URI.
680 """
681 if isTemporary is None:
682 isTemporary = self.isTemporary
683 elif not isTemporary and self.isTemporary:
684 raise RuntimeError("Cannot join temporary URI to non-temporary URI.")
685 # If we have a full URI in path we will use it directly
686 # but without forcing to absolute so that we can trap the
687 # expected option of relative path.
688 path_uri = ResourcePath(
689 path, forceAbsolute=False, forceDirectory=forceDirectory, isTemporary=isTemporary
690 )
691 if path_uri.scheme:
692 # Check for scheme so can distinguish explicit URIs from
693 # absolute scheme-less URIs.
694 return path_uri
696 if path_uri.isabs():
697 # Absolute scheme-less path.
698 raise ValueError(f"Can not join absolute scheme-less {path_uri!r} to another URI.")
700 # If this was originally a ResourcePath extract the unquoted path from
701 # it. Otherwise we use the string we were given to allow "#" to appear
702 # in the filename if given as a plain string.
703 if not isinstance(path, str):
704 path = path_uri.unquoted_path
706 new = self.dirname() # By definition a directory URI
708 # new should be asked about quoting, not self, since dirname can
709 # change the URI scheme for schemeless -> file
710 if new.quotePaths:
711 path = urllib.parse.quote(path)
713 newpath = self._pathModule.normpath(self._pathModule.join(new.path, path))
715 # normpath can strip trailing / so we force directory if the supplied
716 # path ended with a /
717 return new.replace(
718 path=newpath,
719 forceDirectory=(forceDirectory or path.endswith(self._pathModule.sep)),
720 isTemporary=isTemporary,
721 )
723 def relative_to(self, other: ResourcePath) -> Optional[str]:
724 """Return the relative path from this URI to the other URI.
726 Parameters
727 ----------
728 other : `ResourcePath`
729 URI to use to calculate the relative path. Must be a parent
730 of this URI.
732 Returns
733 -------
734 subpath : `str`
735 The sub path of this URI relative to the supplied other URI.
736 Returns `None` if there is no parent child relationship.
737 Scheme and netloc must match.
738 """
739 # Scheme-less absolute other is treated as if it's a file scheme.
740 # Scheme-less relative other can only return non-None if self
741 # is also scheme-less relative and that is handled specifically
742 # in a subclass.
743 if not other.scheme and other.isabs():
744 other = other.abspath()
746 # Scheme-less self is handled elsewhere.
747 if self.scheme != other.scheme:
748 return None
749 if self.netloc != other.netloc:
750 # Special case for localhost vs empty string.
751 # There can be many variants of localhost.
752 local_netlocs = {"", "localhost", "localhost.localdomain", "127.0.0.1"}
753 if not {self.netloc, other.netloc}.issubset(local_netlocs):
754 return None
756 enclosed_path = self._pathLib(self.relativeToPathRoot)
757 parent_path = other.relativeToPathRoot
758 subpath: Optional[str]
759 try:
760 subpath = str(enclosed_path.relative_to(parent_path))
761 except ValueError:
762 subpath = None
763 else:
764 subpath = urllib.parse.unquote(subpath)
765 return subpath
767 def exists(self) -> bool:
768 """Indicate that the resource is available.
770 Returns
771 -------
772 exists : `bool`
773 `True` if the resource exists.
774 """
775 raise NotImplementedError()
777 @classmethod
778 def mexists(cls, uris: Iterable[ResourcePath]) -> Dict[ResourcePath, bool]:
779 """Check for existence of multiple URIs at once.
781 Parameters
782 ----------
783 uris : iterable of `ResourcePath`
784 The URIs to test.
786 Returns
787 -------
788 existence : `dict` of [`ResourcePath`, `bool`]
789 Mapping of original URI to boolean indicating existence.
790 """
791 exists_executor = concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS)
792 future_exists = {exists_executor.submit(uri.exists): uri for uri in uris}
794 results: Dict[ResourcePath, bool] = {}
795 for future in concurrent.futures.as_completed(future_exists):
796 uri = future_exists[future]
797 try:
798 exists = future.result()
799 except Exception:
800 exists = False
801 results[uri] = exists
802 return results
804 def remove(self) -> None:
805 """Remove the resource."""
806 raise NotImplementedError()
808 def isabs(self) -> bool:
809 """Indicate that the resource is fully specified.
811 For non-schemeless URIs this is always true.
813 Returns
814 -------
815 isabs : `bool`
816 `True` in all cases except schemeless URI.
817 """
818 return True
820 def abspath(self) -> ResourcePath:
821 """Return URI using an absolute path.
823 Returns
824 -------
825 abs : `ResourcePath`
826 Absolute URI. For non-schemeless URIs this always returns itself.
827 Schemeless URIs are upgraded to file URIs.
828 """
829 return self
831 def _as_local(self) -> Tuple[str, bool]:
832 """Return the location of the (possibly remote) resource as local file.
834 This is a helper function for `as_local` context manager.
836 Returns
837 -------
838 path : `str`
839 If this is a remote resource, it will be a copy of the resource
840 on the local file system, probably in a temporary directory.
841 For a local resource this should be the actual path to the
842 resource.
843 is_temporary : `bool`
844 Indicates if the local path is a temporary file or not.
845 """
846 raise NotImplementedError()
848 @contextlib.contextmanager
849 def as_local(self) -> Iterator[ResourcePath]:
850 """Return the location of the (possibly remote) resource as local file.
852 Yields
853 ------
854 local : `ResourcePath`
855 If this is a remote resource, it will be a copy of the resource
856 on the local file system, probably in a temporary directory.
857 For a local resource this should be the actual path to the
858 resource.
860 Notes
861 -----
862 The context manager will automatically delete any local temporary
863 file.
865 Examples
866 --------
867 Should be used as a context manager:
869 .. code-block:: py
871 with uri.as_local() as local:
872 ospath = local.ospath
873 """
874 if self.dirLike:
875 raise IsADirectoryError(f"Directory-like URI {self} cannot be fetched as local.")
876 local_src, is_temporary = self._as_local()
877 local_uri = ResourcePath(local_src, isTemporary=is_temporary)
879 try:
880 yield local_uri
881 finally:
882 # The caller might have relocated the temporary file.
883 # Do not ever delete if the temporary matches self
884 # (since it may have been that a temporary file was made local
885 # but already was local).
886 if self != local_uri and is_temporary and local_uri.exists():
887 local_uri.remove()
889 @classmethod
890 @contextlib.contextmanager
891 def temporary_uri(
892 cls, prefix: Optional[ResourcePath] = None, suffix: Optional[str] = None
893 ) -> Iterator[ResourcePath]:
894 """Create a temporary file-like URI.
896 Parameters
897 ----------
898 prefix : `ResourcePath`, optional
899 Prefix to use. Without this the path will be formed as a local
900 file URI in a temporary directory. Ensuring that the prefix
901 location exists is the responsibility of the caller.
902 suffix : `str`, optional
903 A file suffix to be used. The ``.`` should be included in this
904 suffix.
906 Yields
907 ------
908 uri : `ResourcePath`
909 The temporary URI. Will be removed when the context is completed.
910 """
911 use_tempdir = False
912 if prefix is None:
913 prefix = ResourcePath(tempfile.mkdtemp(), forceDirectory=True, isTemporary=True)
914 # Record that we need to delete this directory. Can not rely
915 # on isTemporary flag since an external prefix may have that
916 # set as well.
917 use_tempdir = True
919 # Need to create a randomized file name. For consistency do not
920 # use mkstemp for local and something else for remote. Additionally
921 # this method does not create the file to prevent name clashes.
922 characters = "abcdefghijklmnopqrstuvwxyz0123456789_"
923 rng = Random()
924 tempname = "".join(rng.choice(characters) for _ in range(16))
925 if suffix:
926 tempname += suffix
927 temporary_uri = prefix.join(tempname, isTemporary=True)
928 if temporary_uri.dirLike:
929 # If we had a safe way to clean up a remote temporary directory, we
930 # could support this.
931 raise NotImplementedError("temporary_uri cannot be used to create a temporary directory.")
932 try:
933 yield temporary_uri
934 finally:
935 if use_tempdir:
936 shutil.rmtree(prefix.ospath, ignore_errors=True)
937 else:
938 try:
939 # It's okay if this does not work because the user removed
940 # the file.
941 temporary_uri.remove()
942 except FileNotFoundError:
943 pass
945 def read(self, size: int = -1) -> bytes:
946 """Open the resource and return the contents in bytes.
948 Parameters
949 ----------
950 size : `int`, optional
951 The number of bytes to read. Negative or omitted indicates
952 that all data should be read.
953 """
954 raise NotImplementedError()
956 def write(self, data: bytes, overwrite: bool = True) -> None:
957 """Write the supplied bytes to the new resource.
959 Parameters
960 ----------
961 data : `bytes`
962 The bytes to write to the resource. The entire contents of the
963 resource will be replaced.
964 overwrite : `bool`, optional
965 If `True` the resource will be overwritten if it exists. Otherwise
966 the write will fail.
967 """
968 raise NotImplementedError()
970 def mkdir(self) -> None:
971 """For a dir-like URI, create the directory resource if needed."""
972 raise NotImplementedError()
974 def isdir(self) -> bool:
975 """Return True if this URI looks like a directory, else False."""
976 return self.dirLike
978 def size(self) -> int:
979 """For non-dir-like URI, return the size of the resource.
981 Returns
982 -------
983 sz : `int`
984 The size in bytes of the resource associated with this URI.
985 Returns 0 if dir-like.
986 """
987 raise NotImplementedError()
989 def __str__(self) -> str:
990 """Convert the URI to its native string form."""
991 return self.geturl()
993 def __repr__(self) -> str:
994 """Return string representation suitable for evaluation."""
995 return f'ResourcePath("{self.geturl()}")'
997 def __eq__(self, other: Any) -> bool:
998 """Compare supplied object with this `ResourcePath`."""
999 if not isinstance(other, ResourcePath):
1000 return NotImplemented
1001 return self.geturl() == other.geturl()
1003 def __hash__(self) -> int:
1004 """Return hash of this object."""
1005 return hash(str(self))
1007 def __lt__(self, other: ResourcePath) -> bool:
1008 return self.geturl() < other.geturl()
1010 def __le__(self, other: ResourcePath) -> bool:
1011 return self.geturl() <= other.geturl()
1013 def __gt__(self, other: ResourcePath) -> bool:
1014 return self.geturl() > other.geturl()
1016 def __ge__(self, other: ResourcePath) -> bool:
1017 return self.geturl() >= other.geturl()
1019 def __copy__(self) -> ResourcePath:
1020 """Copy constructor.
1022 Object is immutable so copy can return itself.
1023 """
1024 # Implement here because the __new__ method confuses things
1025 return self
1027 def __deepcopy__(self, memo: Any) -> ResourcePath:
1028 """Deepcopy the object.
1030 Object is immutable so copy can return itself.
1031 """
1032 # Implement here because the __new__ method confuses things
1033 return self
1035 def __getnewargs__(self) -> Tuple:
1036 """Support pickling."""
1037 return (str(self),)
1039 @classmethod
1040 def _fixDirectorySep(
1041 cls, parsed: urllib.parse.ParseResult, forceDirectory: bool = False
1042 ) -> Tuple[urllib.parse.ParseResult, bool]:
1043 """Ensure that a path separator is present on directory paths.
1045 Parameters
1046 ----------
1047 parsed : `~urllib.parse.ParseResult`
1048 The result from parsing a URI using `urllib.parse`.
1049 forceDirectory : `bool`, optional
1050 If `True` forces the URI to end with a separator, otherwise given
1051 URI is interpreted as is. Specifying that the URI is conceptually
1052 equivalent to a directory can break some ambiguities when
1053 interpreting the last element of a path.
1055 Returns
1056 -------
1057 modified : `~urllib.parse.ParseResult`
1058 Update result if a URI is being handled.
1059 dirLike : `bool`
1060 `True` if given parsed URI has a trailing separator or
1061 forceDirectory is True. Otherwise `False`.
1062 """
1063 # assume we are not dealing with a directory like URI
1064 dirLike = False
1066 # Directory separator
1067 sep = cls._pathModule.sep
1069 # URI is dir-like if explicitly stated or if it ends on a separator
1070 endsOnSep = parsed.path.endswith(sep)
1071 if forceDirectory or endsOnSep:
1072 dirLike = True
1073 # only add the separator if it's not already there
1074 if not endsOnSep:
1075 parsed = parsed._replace(path=parsed.path + sep)
1077 return parsed, dirLike
1079 @classmethod
1080 def _fixupPathUri(
1081 cls,
1082 parsed: urllib.parse.ParseResult,
1083 root: Optional[ResourcePath] = None,
1084 forceAbsolute: bool = False,
1085 forceDirectory: bool = False,
1086 ) -> Tuple[urllib.parse.ParseResult, bool]:
1087 """Correct any issues with the supplied URI.
1089 Parameters
1090 ----------
1091 parsed : `~urllib.parse.ParseResult`
1092 The result from parsing a URI using `urllib.parse`.
1093 root : `ResourcePath`, ignored
1094 Not used by the this implementation since all URIs are
1095 absolute except for those representing the local file system.
1096 forceAbsolute : `bool`, ignored.
1097 Not used by this implementation. URIs are generally always
1098 absolute.
1099 forceDirectory : `bool`, optional
1100 If `True` forces the URI to end with a separator, otherwise given
1101 URI is interpreted as is. Specifying that the URI is conceptually
1102 equivalent to a directory can break some ambiguities when
1103 interpreting the last element of a path.
1105 Returns
1106 -------
1107 modified : `~urllib.parse.ParseResult`
1108 Update result if a URI is being handled.
1109 dirLike : `bool`
1110 `True` if given parsed URI has a trailing separator or
1111 forceDirectory is True. Otherwise `False`.
1113 Notes
1114 -----
1115 Relative paths are explicitly not supported by RFC8089 but `urllib`
1116 does accept URIs of the form ``file:relative/path.ext``. They need
1117 to be turned into absolute paths before they can be used. This is
1118 always done regardless of the ``forceAbsolute`` parameter.
1120 AWS S3 differentiates between keys with trailing POSIX separators (i.e
1121 `/dir` and `/dir/`) whereas POSIX does not neccessarily.
1123 Scheme-less paths are normalized.
1124 """
1125 return cls._fixDirectorySep(parsed, forceDirectory)
1127 def transfer_from(
1128 self,
1129 src: ResourcePath,
1130 transfer: str,
1131 overwrite: bool = False,
1132 transaction: Optional[TransactionProtocol] = None,
1133 ) -> None:
1134 """Transfer to this URI from another.
1136 Parameters
1137 ----------
1138 src : `ResourcePath`
1139 Source URI.
1140 transfer : `str`
1141 Mode to use for transferring the resource. Generically there are
1142 many standard options: copy, link, symlink, hardlink, relsymlink.
1143 Not all URIs support all modes.
1144 overwrite : `bool`, optional
1145 Allow an existing file to be overwritten. Defaults to `False`.
1146 transaction : `~lsst.resources.utils.TransactionProtocol`, optional
1147 A transaction object that can (depending on implementation)
1148 rollback transfers on error. Not guaranteed to be implemented.
1150 Notes
1151 -----
1152 Conceptually this is hard to scale as the number of URI schemes
1153 grow. The destination URI is more important than the source URI
1154 since that is where all the transfer modes are relevant (with the
1155 complication that "move" deletes the source).
1157 Local file to local file is the fundamental use case but every
1158 other scheme has to support "copy" to local file (with implicit
1159 support for "move") and copy from local file.
1160 All the "link" options tend to be specific to local file systems.
1162 "move" is a "copy" where the remote resource is deleted at the end.
1163 Whether this works depends on the source URI rather than the
1164 destination URI. Reverting a move on transaction rollback is
1165 expected to be problematic if a remote resource was involved.
1166 """
1167 raise NotImplementedError(f"No transfer modes supported by URI scheme {self.scheme}")
1169 def walk(
1170 self, file_filter: Optional[Union[str, re.Pattern]] = None
1171 ) -> Iterator[Union[List, Tuple[ResourcePath, List[str], List[str]]]]:
1172 """Walk the directory tree returning matching files and directories.
1174 Parameters
1175 ----------
1176 file_filter : `str` or `re.Pattern`, optional
1177 Regex to filter out files from the list before it is returned.
1179 Yields
1180 ------
1181 dirpath : `ResourcePath`
1182 Current directory being examined.
1183 dirnames : `list` of `str`
1184 Names of subdirectories within dirpath.
1185 filenames : `list` of `str`
1186 Names of all the files within dirpath.
1187 """
1188 raise NotImplementedError()
1190 @overload
1191 @classmethod
1192 def findFileResources(
1193 cls,
1194 candidates: Iterable[ResourcePathExpression],
1195 file_filter: Optional[Union[str, re.Pattern]],
1196 grouped: Literal[True],
1197 ) -> Iterator[Iterator[ResourcePath]]:
1198 ...
1200 @overload
1201 @classmethod
1202 def findFileResources(
1203 cls,
1204 candidates: Iterable[ResourcePathExpression],
1205 *,
1206 grouped: Literal[True],
1207 ) -> Iterator[Iterator[ResourcePath]]:
1208 ...
1210 @overload
1211 @classmethod
1212 def findFileResources(
1213 cls,
1214 candidates: Iterable[ResourcePathExpression],
1215 file_filter: Optional[Union[str, re.Pattern]] = None,
1216 grouped: Literal[False] = False,
1217 ) -> Iterator[ResourcePath]:
1218 ...
1220 @classmethod
1221 def findFileResources(
1222 cls,
1223 candidates: Iterable[ResourcePathExpression],
1224 file_filter: Optional[Union[str, re.Pattern]] = None,
1225 grouped: bool = False,
1226 ) -> Iterator[Union[ResourcePath, Iterator[ResourcePath]]]:
1227 """Get all the files from a list of values.
1229 Parameters
1230 ----------
1231 candidates : iterable [`str` or `ResourcePath`]
1232 The files to return and directories in which to look for files to
1233 return.
1234 file_filter : `str` or `re.Pattern`, optional
1235 The regex to use when searching for files within directories.
1236 By default returns all the found files.
1237 grouped : `bool`, optional
1238 If `True` the results will be grouped by directory and each
1239 yielded value will be an iterator over URIs. If `False` each
1240 URI will be returned separately.
1242 Yields
1243 ------
1244 found_file: `ResourcePath`
1245 The passed-in URIs and URIs found in passed-in directories.
1246 If grouping is enabled, each of the yielded values will be an
1247 iterator yielding members of the group. Files given explicitly
1248 will be returned as a single group at the end.
1250 Notes
1251 -----
1252 If a value is a file it is yielded immediately without checking that it
1253 exists. If a value is a directory, all the files in the directory
1254 (recursively) that match the regex will be yielded in turn.
1255 """
1256 fileRegex = None if file_filter is None else re.compile(file_filter)
1258 singles = []
1260 # Find all the files of interest
1261 for location in candidates:
1262 uri = ResourcePath(location)
1263 if uri.isdir():
1264 for found in uri.walk(fileRegex):
1265 if not found:
1266 # This means the uri does not exist and by
1267 # convention we ignore it
1268 continue
1269 root, dirs, files = found
1270 if not files:
1271 continue
1272 if grouped:
1273 yield (root.join(name) for name in files)
1274 else:
1275 for name in files:
1276 yield root.join(name)
1277 else:
1278 if grouped:
1279 singles.append(uri)
1280 else:
1281 yield uri
1283 # Finally, return any explicitly given files in one group
1284 if grouped and singles:
1285 yield iter(singles)
1287 @contextlib.contextmanager
1288 def open(
1289 self,
1290 mode: str = "r",
1291 *,
1292 encoding: Optional[str] = None,
1293 prefer_file_temporary: bool = False,
1294 ) -> Iterator[ResourceHandleProtocol]:
1295 """Return a context manager that wraps an object that behaves like an
1296 open file at the location of the URI.
1298 Parameters
1299 ----------
1300 mode : `str`
1301 String indicating the mode in which to open the file. Values are
1302 the same as those accepted by `builtins.open`, though intrinsically
1303 read-only URI types may only support read modes, and
1304 `io.IOBase.seekable` is not guaranteed to be `True` on the returned
1305 object.
1306 encoding : `str`, optional
1307 Unicode encoding for text IO; ignored for binary IO. Defaults to
1308 ``locale.getpreferredencoding(False)``, just as `builtins.open`
1309 does.
1310 prefer_file_temporary : `bool`, optional
1311 If `True`, for implementations that require transfers from a remote
1312 system to temporary local storage and/or back, use a temporary file
1313 instead of an in-memory buffer; this is generally slower, but it
1314 may be necessary to avoid excessive memory usage by large files.
1315 Ignored by implementations that do not require a temporary.
1317 Returns
1318 -------
1319 cm : `contextlib.ContextManager`
1320 A context manager that wraps a file-like object.
1322 Notes
1323 -----
1324 The default implementation of this method uses a local temporary buffer
1325 (in-memory or file, depending on ``prefer_file_temporary``) with calls
1326 to `read`, `write`, `as_local`, and `transfer_from` as necessary to
1327 read and write from/to remote systems. Remote writes thus occur only
1328 when the context manager is exited. `ResourcePath` implementations
1329 that can return a more efficient native buffer should do so whenever
1330 possible (as is guaranteed for local files). `ResourcePath`
1331 implementations for which `as_local` does not return a temporary are
1332 required to reimplement `open`, though they may delegate to `super`
1333 when `prefer_file_temporary` is `False`.
1334 """
1335 if self.dirLike:
1336 raise IsADirectoryError(f"Directory-like URI {self} cannot be opened.")
1337 if "x" in mode and self.exists():
1338 raise FileExistsError(f"File at {self} already exists.")
1339 if prefer_file_temporary:
1340 if "r" in mode or "a" in mode:
1341 local_cm = self.as_local()
1342 else:
1343 local_cm = self.temporary_uri(suffix=self.getExtension())
1344 with local_cm as local_uri:
1345 assert local_uri.isTemporary, (
1346 "ResourcePath implementations for which as_local is not "
1347 "a temporary must reimplement `open`."
1348 )
1349 with open(local_uri.ospath, mode=mode, encoding=encoding) as file_buffer:
1350 if "a" in mode:
1351 file_buffer.seek(0, io.SEEK_END)
1352 yield file_buffer
1353 if "r" not in mode or "+" in mode:
1354 self.transfer_from(local_uri, transfer="copy", overwrite=("x" not in mode))
1355 else:
1356 with self._openImpl(mode, encoding=encoding) as handle:
1357 yield handle
1359 @contextlib.contextmanager
1360 def _openImpl(
1361 self, mode: str = "r", *, encoding: Optional[str] = None
1362 ) -> Iterator[ResourceHandleProtocol]:
1363 """Implement opening of a resource handle.
1365 This private method may be overridden by specific `ResourcePath`
1366 implementations to provide a customized handle like interface.
1368 Parameters
1369 ----------
1370 mode : `str`
1371 The mode the handle should be opened with
1372 encoding : `str`, optional
1373 The byte encoding of any binary text
1375 Yields
1376 ------
1377 handle : `BaseResourceHandle`
1378 A handle that conforms to the `BaseResourcehandle interface
1380 Notes
1381 -----
1382 The base implementation of a file handle reads in a files entire
1383 contents into a buffer for manipulation, and then writes it back out
1384 upon close. Subclasses of this class may offer more fine grained
1385 control.
1386 """
1387 if "r" in mode or "a" in mode:
1388 in_bytes = self.read()
1389 else:
1390 in_bytes = b""
1391 if "b" in mode:
1392 bytes_buffer = io.BytesIO(in_bytes)
1393 if "a" in mode:
1394 bytes_buffer.seek(0, io.SEEK_END)
1395 yield bytes_buffer
1396 out_bytes = bytes_buffer.getvalue()
1397 else:
1398 if encoding is None:
1399 encoding = locale.getpreferredencoding(False)
1400 str_buffer = io.StringIO(in_bytes.decode(encoding))
1401 if "a" in mode:
1402 str_buffer.seek(0, io.SEEK_END)
1403 yield str_buffer
1404 out_bytes = str_buffer.getvalue().encode(encoding)
1405 if "r" not in mode or "+" in mode:
1406 self.write(out_bytes, overwrite=("x" not in mode))