Coverage for python/lsst/resources/_resourcePath.py: 21%
410 statements
« prev ^ index » next coverage.py v7.2.3, created at 2023-04-20 03:07 -0700
« prev ^ index » next coverage.py v7.2.3, created at 2023-04-20 03:07 -0700
1# This file is part of lsst-resources.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# Use of this source code is governed by a 3-clause BSD-style
10# license that can be found in the LICENSE file.
12from __future__ import annotations
14import concurrent.futures
15import contextlib
16import copy
17import io
18import locale
19import logging
20import os
21import posixpath
22import re
23import shutil
24import tempfile
25import urllib.parse
26from pathlib import Path, PurePath, PurePosixPath
27from random import Random
29__all__ = ("ResourcePath", "ResourcePathExpression")
31from typing import (
32 TYPE_CHECKING,
33 Any,
34 Dict,
35 Iterable,
36 Iterator,
37 List,
38 Literal,
39 Optional,
40 Tuple,
41 Type,
42 Union,
43 overload,
44)
46from ._resourceHandles._baseResourceHandle import ResourceHandleProtocol
48if TYPE_CHECKING:
49 from .utils import TransactionProtocol
52log = logging.getLogger(__name__)
54# Regex for looking for URI escapes
55ESCAPES_RE = re.compile(r"%[A-F0-9]{2}")
57# Precomputed escaped hash
58ESCAPED_HASH = urllib.parse.quote("#")
60# Maximum number of worker threads for parallelized operations.
61# If greater than 10, be aware that this number has to be consistent
62# with connection pool sizing (for example in urllib3).
63MAX_WORKERS = 10
66class ResourcePath:
67 """Convenience wrapper around URI parsers.
69 Provides access to URI components and can convert file
70 paths into absolute path URIs. Scheme-less URIs are treated as if
71 they are local file system paths and are converted to absolute URIs.
73 A specialist subclass is created for each supported URI scheme.
75 Parameters
76 ----------
77 uri : `str`, `Path`, `urllib.parse.ParseResult`, or `ResourcePath`.
78 URI in string form. Can be scheme-less if referring to a relative
79 path or an absolute path on the local file system.
80 root : `str` or `ResourcePath`, optional
81 When fixing up a relative path in a ``file`` scheme or if scheme-less,
82 use this as the root. Must be absolute. If `None` the current
83 working directory will be used. Can be any supported URI scheme.
84 Not used if ``forceAbsolute`` is `False`.
85 forceAbsolute : `bool`, optional
86 If `True`, scheme-less relative URI will be converted to an absolute
87 path using a ``file`` scheme. If `False` scheme-less URI will remain
88 scheme-less and will not be updated to ``file`` or absolute path unless
89 it is already an absolute path, in which case it will be updated to
90 a ``file`` scheme.
91 forceDirectory: `bool`, optional
92 If `True` forces the URI to end with a separator, otherwise given URI
93 is interpreted as is.
94 isTemporary : `bool`, optional
95 If `True` indicates that this URI points to a temporary resource.
96 The default is `False`, unless ``uri`` is already a `ResourcePath`
97 instance and ``uri.isTemporary is True``.
99 Notes
100 -----
101 A non-standard URI of the form ``file:dir/file.txt`` is always converted
102 to an absolute ``file`` URI.
103 """
105 _pathLib: Type[PurePath] = PurePosixPath
106 """Path library to use for this scheme."""
108 _pathModule = posixpath
109 """Path module to use for this scheme."""
111 transferModes: Tuple[str, ...] = ("copy", "auto", "move")
112 """Transfer modes supported by this implementation.
114 Move is special in that it is generally a copy followed by an unlink.
115 Whether that unlink works depends critically on whether the source URI
116 implements unlink. If it does not the move will be reported as a failure.
117 """
119 transferDefault: str = "copy"
120 """Default mode to use for transferring if ``auto`` is specified."""
122 quotePaths = True
123 """True if path-like elements modifying a URI should be quoted.
125 All non-schemeless URIs have to internally use quoted paths. Therefore
126 if a new file name is given (e.g. to updatedFile or join) a decision must
127 be made whether to quote it to be consistent.
128 """
130 isLocal = False
131 """If `True` this URI refers to a local file."""
133 # This is not an ABC with abstract methods because the __new__ being
134 # a factory confuses mypy such that it assumes that every constructor
135 # returns a ResourcePath and then determines that all the abstract methods
136 # are still abstract. If they are not marked abstract but just raise
137 # mypy is fine with it.
139 # mypy is confused without these
140 _uri: urllib.parse.ParseResult
141 isTemporary: bool
142 dirLike: bool
144 def __new__(
145 cls,
146 uri: ResourcePathExpression,
147 root: Optional[Union[str, ResourcePath]] = None,
148 forceAbsolute: bool = True,
149 forceDirectory: bool = False,
150 isTemporary: Optional[bool] = None,
151 ) -> ResourcePath:
152 """Create and return new specialist ResourcePath subclass."""
153 parsed: urllib.parse.ParseResult
154 dirLike: bool = False
155 subclass: Optional[Type[ResourcePath]] = None
157 # Force root to be a ResourcePath -- this simplifies downstream
158 # code.
159 if root is None:
160 root_uri = None
161 elif isinstance(root, str):
162 root_uri = ResourcePath(root, forceDirectory=True, forceAbsolute=True)
163 else:
164 root_uri = root
166 if isinstance(uri, os.PathLike):
167 uri = str(uri)
169 # Record if we need to post process the URI components
170 # or if the instance is already fully configured
171 if isinstance(uri, str):
172 # Since local file names can have special characters in them
173 # we need to quote them for the parser but we can unquote
174 # later. Assume that all other URI schemes are quoted.
175 # Since sometimes people write file:/a/b and not file:///a/b
176 # we should not quote in the explicit case of file:
177 if "://" not in uri and not uri.startswith("file:"):
178 if ESCAPES_RE.search(uri):
179 log.warning("Possible double encoding of %s", uri)
180 else:
181 uri = urllib.parse.quote(uri)
182 # Special case hash since we must support fragments
183 # even in schemeless URIs -- although try to only replace
184 # them in file part and not directory part
185 if ESCAPED_HASH in uri:
186 dirpos = uri.rfind("/")
187 # Do replacement after this /
188 uri = uri[: dirpos + 1] + uri[dirpos + 1 :].replace(ESCAPED_HASH, "#")
190 parsed = urllib.parse.urlparse(uri)
191 elif isinstance(uri, urllib.parse.ParseResult):
192 parsed = copy.copy(uri)
193 # If we are being instantiated with a subclass, rather than
194 # ResourcePath, ensure that that subclass is used directly.
195 # This could lead to inconsistencies if this constructor
196 # is used externally outside of the ResourcePath.replace() method.
197 # S3ResourcePath(urllib.parse.urlparse("file://a/b.txt"))
198 # will be a problem.
199 # This is needed to prevent a schemeless absolute URI become
200 # a file URI unexpectedly when calling updatedFile or
201 # updatedExtension
202 if cls is not ResourcePath:
203 parsed, dirLike = cls._fixDirectorySep(parsed, forceDirectory)
204 subclass = cls
206 elif isinstance(uri, ResourcePath):
207 # Since ResourcePath is immutable we can return the argument
208 # unchanged if it already agrees with forceDirectory, isTemporary,
209 # and forceAbsolute.
210 # We invoke __new__ again with str(self) to add a scheme for
211 # forceAbsolute, but for the others that seems more likely to paper
212 # over logic errors than do something useful, so we just raise.
213 if forceDirectory and not uri.dirLike:
214 raise RuntimeError(
215 f"{uri} is already a file-like ResourcePath; cannot force it to directory."
216 )
217 if isTemporary is not None and isTemporary is not uri.isTemporary:
218 raise RuntimeError(
219 f"{uri} is already a {'temporary' if uri.isTemporary else 'permanent'} "
220 f"ResourcePath; cannot make it {'temporary' if isTemporary else 'permanent'}."
221 )
222 if forceAbsolute and not uri.scheme:
223 return ResourcePath(
224 str(uri),
225 root=root,
226 forceAbsolute=True,
227 forceDirectory=uri.dirLike,
228 isTemporary=uri.isTemporary,
229 )
230 return uri
231 else:
232 raise ValueError(
233 f"Supplied URI must be string, Path, ResourcePath, or ParseResult but got '{uri!r}'"
234 )
236 if subclass is None:
237 # Work out the subclass from the URI scheme
238 if not parsed.scheme:
239 # Root may be specified as a ResourcePath that overrides
240 # the schemeless determination.
241 if (
242 root_uri is not None
243 and root_uri.scheme != "file" # file scheme has different code path
244 and not parsed.path.startswith("/") # Not already absolute path
245 ):
246 if not root_uri.dirLike:
247 raise ValueError(
248 f"Root URI ({root}) was not a directory so can not be joined with"
249 f" path {parsed.path!r}"
250 )
251 # If root is temporary or this schemeless is temporary we
252 # assume this URI is temporary.
253 isTemporary = isTemporary or root_uri.isTemporary
254 joined = root_uri.join(
255 parsed.path, forceDirectory=forceDirectory, isTemporary=isTemporary
256 )
258 # Rather than returning this new ResourcePath directly we
259 # instead extract the path and the scheme and adjust the
260 # URI we were given -- we need to do this to preserve
261 # fragments since join() will drop them.
262 parsed = parsed._replace(scheme=joined.scheme, path=joined.path, netloc=joined.netloc)
263 subclass = type(joined)
265 # Clear the root parameter to indicate that it has
266 # been applied already.
267 root_uri = None
268 else:
269 from .schemeless import SchemelessResourcePath
271 subclass = SchemelessResourcePath
272 elif parsed.scheme == "file":
273 from .file import FileResourcePath
275 subclass = FileResourcePath
276 elif parsed.scheme == "s3":
277 from .s3 import S3ResourcePath
279 subclass = S3ResourcePath
280 elif parsed.scheme.startswith("http"):
281 from .http import HttpResourcePath
283 subclass = HttpResourcePath
284 elif parsed.scheme == "gs":
285 from .gs import GSResourcePath
287 subclass = GSResourcePath
288 elif parsed.scheme == "resource":
289 # Rules for scheme names disallow pkg_resource
290 from .packageresource import PackageResourcePath
292 subclass = PackageResourcePath
293 elif parsed.scheme == "mem":
294 # in-memory datastore object
295 from .mem import InMemoryResourcePath
297 subclass = InMemoryResourcePath
298 else:
299 raise NotImplementedError(
300 f"No URI support for scheme: '{parsed.scheme}' in {parsed.geturl()}"
301 )
303 parsed, dirLike = subclass._fixupPathUri(
304 parsed, root=root_uri, forceAbsolute=forceAbsolute, forceDirectory=forceDirectory
305 )
307 # It is possible for the class to change from schemeless
308 # to file so handle that
309 if parsed.scheme == "file":
310 from .file import FileResourcePath
312 subclass = FileResourcePath
314 # Now create an instance of the correct subclass and set the
315 # attributes directly
316 self = object.__new__(subclass)
317 self._uri = parsed
318 self.dirLike = dirLike
319 if isTemporary is None:
320 isTemporary = False
321 self.isTemporary = isTemporary
322 return self
324 @property
325 def scheme(self) -> str:
326 """Return the URI scheme.
328 Notes
329 -----
330 (``://`` is not part of the scheme).
331 """
332 return self._uri.scheme
334 @property
335 def netloc(self) -> str:
336 """Return the URI network location."""
337 return self._uri.netloc
339 @property
340 def path(self) -> str:
341 """Return the path component of the URI."""
342 return self._uri.path
344 @property
345 def unquoted_path(self) -> str:
346 """Return path component of the URI with any URI quoting reversed."""
347 return urllib.parse.unquote(self._uri.path)
349 @property
350 def ospath(self) -> str:
351 """Return the path component of the URI localized to current OS."""
352 raise AttributeError(f"Non-file URI ({self}) has no local OS path.")
354 @property
355 def relativeToPathRoot(self) -> str:
356 """Return path relative to network location.
358 Effectively, this is the path property with posix separator stripped
359 from the left hand side of the path.
361 Always unquotes.
362 """
363 p = self._pathLib(self.path)
364 relToRoot = str(p.relative_to(p.root))
365 if self.dirLike and not relToRoot.endswith("/"):
366 relToRoot += "/"
367 return urllib.parse.unquote(relToRoot)
369 @property
370 def is_root(self) -> bool:
371 """Return whether this URI points to the root of the network location.
373 This means that the path components refers to the top level.
374 """
375 relpath = self.relativeToPathRoot
376 if relpath == "./":
377 return True
378 return False
380 @property
381 def fragment(self) -> str:
382 """Return the fragment component of the URI."""
383 return self._uri.fragment
385 @property
386 def params(self) -> str:
387 """Return any parameters included in the URI."""
388 return self._uri.params
390 @property
391 def query(self) -> str:
392 """Return any query strings included in the URI."""
393 return self._uri.query
395 def geturl(self) -> str:
396 """Return the URI in string form.
398 Returns
399 -------
400 url : `str`
401 String form of URI.
402 """
403 return self._uri.geturl()
405 def root_uri(self) -> ResourcePath:
406 """Return the base root URI.
408 Returns
409 -------
410 uri : `ResourcePath`
411 root URI.
412 """
413 return self.replace(path="", forceDirectory=True)
415 def split(self) -> Tuple[ResourcePath, str]:
416 """Split URI into head and tail.
418 Returns
419 -------
420 head: `ResourcePath`
421 Everything leading up to tail, expanded and normalized as per
422 ResourcePath rules.
423 tail : `str`
424 Last `self.path` component. Tail will be empty if path ends on a
425 separator. Tail will never contain separators. It will be
426 unquoted.
428 Notes
429 -----
430 Equivalent to `os.path.split()` where head preserves the URI
431 components.
432 """
433 head, tail = self._pathModule.split(self.path)
434 headuri = self._uri._replace(path=head)
436 # The file part should never include quoted metacharacters
437 tail = urllib.parse.unquote(tail)
439 # Schemeless is special in that it can be a relative path
440 # We need to ensure that it stays that way. All other URIs will
441 # be absolute already.
442 forceAbsolute = self._pathModule.isabs(self.path)
443 return ResourcePath(headuri, forceDirectory=True, forceAbsolute=forceAbsolute), tail
445 def basename(self) -> str:
446 """Return the base name, last element of path, of the URI.
448 Returns
449 -------
450 tail : `str`
451 Last part of the path attribute. Trail will be empty if path ends
452 on a separator.
454 Notes
455 -----
456 If URI ends on a slash returns an empty string. This is the second
457 element returned by `split()`.
459 Equivalent of `os.path.basename()``.
460 """
461 return self.split()[1]
463 def dirname(self) -> ResourcePath:
464 """Return the directory component of the path as a new `ResourcePath`.
466 Returns
467 -------
468 head : `ResourcePath`
469 Everything except the tail of path attribute, expanded and
470 normalized as per ResourcePath rules.
472 Notes
473 -----
474 Equivalent of `os.path.dirname()`.
475 """
476 return self.split()[0]
478 def parent(self) -> ResourcePath:
479 """Return a `ResourcePath` of the parent directory.
481 Returns
482 -------
483 head : `ResourcePath`
484 Everything except the tail of path attribute, expanded and
485 normalized as per `ResourcePath` rules.
487 Notes
488 -----
489 For a file-like URI this will be the same as calling `dirname()`.
490 """
491 # When self is file-like, return self.dirname()
492 if not self.dirLike:
493 return self.dirname()
494 # When self is dir-like, return its parent directory,
495 # regardless of the presence of a trailing separator
496 originalPath = self._pathLib(self.path)
497 parentPath = originalPath.parent
498 return self.replace(path=str(parentPath), forceDirectory=True)
500 def replace(self, forceDirectory: bool = False, isTemporary: bool = False, **kwargs: Any) -> ResourcePath:
501 """Return new `ResourcePath` with specified components replaced.
503 Parameters
504 ----------
505 forceDirectory : `bool`, optional
506 Parameter passed to ResourcePath constructor to force this
507 new URI to be dir-like.
508 isTemporary : `bool`, optional
509 Indicate that the resulting URI is temporary resource.
510 **kwargs
511 Components of a `urllib.parse.ParseResult` that should be
512 modified for the newly-created `ResourcePath`.
514 Returns
515 -------
516 new : `ResourcePath`
517 New `ResourcePath` object with updated values.
519 Notes
520 -----
521 Does not, for now, allow a change in URI scheme.
522 """
523 # Disallow a change in scheme
524 if "scheme" in kwargs:
525 raise ValueError(f"Can not use replace() method to change URI scheme for {self}")
526 return self.__class__(
527 self._uri._replace(**kwargs), forceDirectory=forceDirectory, isTemporary=isTemporary
528 )
530 def updatedFile(self, newfile: str) -> ResourcePath:
531 """Return new URI with an updated final component of the path.
533 Parameters
534 ----------
535 newfile : `str`
536 File name with no path component.
538 Returns
539 -------
540 updated : `ResourcePath`
542 Notes
543 -----
544 Forces the ResourcePath.dirLike attribute to be false. The new file
545 path will be quoted if necessary.
546 """
547 if self.quotePaths:
548 newfile = urllib.parse.quote(newfile)
549 dir, _ = self._pathModule.split(self.path)
550 newpath = self._pathModule.join(dir, newfile)
552 updated = self.replace(path=newpath)
553 updated.dirLike = False
554 return updated
556 def updatedExtension(self, ext: Optional[str]) -> ResourcePath:
557 """Return a new `ResourcePath` with updated file extension.
559 All file extensions are replaced.
561 Parameters
562 ----------
563 ext : `str` or `None`
564 New extension. If an empty string is given any extension will
565 be removed. If `None` is given there will be no change.
567 Returns
568 -------
569 updated : `ResourcePath`
570 URI with the specified extension. Can return itself if
571 no extension was specified.
572 """
573 if ext is None:
574 return self
576 # Get the extension
577 current = self.getExtension()
579 # Nothing to do if the extension already matches
580 if current == ext:
581 return self
583 # Remove the current extension from the path
584 # .fits.gz counts as one extension do not use os.path.splitext
585 path = self.path
586 if current:
587 path = path[: -len(current)]
589 # Ensure that we have a leading "." on file extension (and we do not
590 # try to modify the empty string)
591 if ext and not ext.startswith("."):
592 ext = "." + ext
594 return self.replace(path=path + ext)
596 def getExtension(self) -> str:
597 """Return the file extension(s) associated with this URI path.
599 Returns
600 -------
601 ext : `str`
602 The file extension (including the ``.``). Can be empty string
603 if there is no file extension. Usually returns only the last
604 file extension unless there is a special extension modifier
605 indicating file compression, in which case the combined
606 extension (e.g. ``.fits.gz``) will be returned.
607 """
608 special = {".gz", ".bz2", ".xz", ".fz"}
610 # Get the file part of the path so as not to be confused by
611 # "." in directory names.
612 basename = self.basename()
613 extensions = self._pathLib(basename).suffixes
615 if not extensions:
616 return ""
618 ext = extensions.pop()
620 # Multiple extensions, decide whether to include the final two
621 if extensions and ext in special:
622 ext = f"{extensions[-1]}{ext}"
624 return ext
626 def join(
627 self, path: Union[str, ResourcePath], isTemporary: Optional[bool] = None, forceDirectory: bool = False
628 ) -> ResourcePath:
629 """Return new `ResourcePath` with additional path components.
631 Parameters
632 ----------
633 path : `str`, `ResourcePath`
634 Additional file components to append to the current URI. Assumed
635 to include a file at the end. Will be quoted depending on the
636 associated URI scheme. If the path looks like a URI with a scheme
637 referring to an absolute location, it will be returned
638 directly (matching the behavior of `os.path.join()`). It can
639 also be a `ResourcePath`.
640 isTemporary : `bool`, optional
641 Indicate that the resulting URI represents a temporary resource.
642 Default is ``self.isTemporary``.
643 forceDirectory : `bool`, optional
644 If `True` forces the URI to end with a separator, otherwise given
645 URI is interpreted as is.
647 Returns
648 -------
649 new : `ResourcePath`
650 New URI with any file at the end replaced with the new path
651 components.
653 Notes
654 -----
655 Schemeless URIs assume local path separator but all other URIs assume
656 POSIX separator if the supplied path has directory structure. It
657 may be this never becomes a problem but datastore templates assume
658 POSIX separator is being used.
660 If an absolute `ResourcePath` is given for ``path`` is is assumed that
661 this should be returned directly. Giving a ``path`` of an absolute
662 scheme-less URI is not allowed for safety reasons as it may indicate
663 a mistake in the calling code.
665 Raises
666 ------
667 ValueError
668 Raised if the ``path`` is an absolute scheme-less URI. In that
669 situation it is unclear whether the intent is to return a
670 ``file`` URI or it was a mistake and a relative scheme-less URI
671 was meant.
672 RuntimeError
673 Raised if this attempts to join a temporary URI to a non-temporary
674 URI.
675 """
676 if isTemporary is None:
677 isTemporary = self.isTemporary
678 elif not isTemporary and self.isTemporary:
679 raise RuntimeError("Cannot join temporary URI to non-temporary URI.")
680 # If we have a full URI in path we will use it directly
681 # but without forcing to absolute so that we can trap the
682 # expected option of relative path.
683 path_uri = ResourcePath(
684 path, forceAbsolute=False, forceDirectory=forceDirectory, isTemporary=isTemporary
685 )
686 if path_uri.scheme:
687 # Check for scheme so can distinguish explicit URIs from
688 # absolute scheme-less URIs.
689 return path_uri
691 if path_uri.isabs():
692 # Absolute scheme-less path.
693 raise ValueError(f"Can not join absolute scheme-less {path_uri!r} to another URI.")
695 # If this was originally a ResourcePath extract the unquoted path from
696 # it. Otherwise we use the string we were given to allow "#" to appear
697 # in the filename if given as a plain string.
698 if not isinstance(path, str):
699 path = path_uri.unquoted_path
701 new = self.dirname() # By definition a directory URI
703 # new should be asked about quoting, not self, since dirname can
704 # change the URI scheme for schemeless -> file
705 if new.quotePaths:
706 path = urllib.parse.quote(path)
708 newpath = self._pathModule.normpath(self._pathModule.join(new.path, path))
710 # normpath can strip trailing / so we force directory if the supplied
711 # path ended with a /
712 return new.replace(
713 path=newpath,
714 forceDirectory=(forceDirectory or path.endswith(self._pathModule.sep)),
715 isTemporary=isTemporary,
716 )
718 def relative_to(self, other: ResourcePath) -> Optional[str]:
719 """Return the relative path from this URI to the other URI.
721 Parameters
722 ----------
723 other : `ResourcePath`
724 URI to use to calculate the relative path. Must be a parent
725 of this URI.
727 Returns
728 -------
729 subpath : `str`
730 The sub path of this URI relative to the supplied other URI.
731 Returns `None` if there is no parent child relationship.
732 Scheme and netloc must match.
733 """
734 # Scheme-less absolute other is treated as if it's a file scheme.
735 # Scheme-less relative other can only return non-None if self
736 # is also scheme-less relative and that is handled specifically
737 # in a subclass.
738 if not other.scheme and other.isabs():
739 other = other.abspath()
741 # Scheme-less self is handled elsewhere.
742 if self.scheme != other.scheme:
743 return None
744 if self.netloc != other.netloc:
745 # Special case for localhost vs empty string.
746 # There can be many variants of localhost.
747 local_netlocs = {"", "localhost", "localhost.localdomain", "127.0.0.1"}
748 if not {self.netloc, other.netloc}.issubset(local_netlocs):
749 return None
751 enclosed_path = self._pathLib(self.relativeToPathRoot)
752 parent_path = other.relativeToPathRoot
753 subpath: Optional[str]
754 try:
755 subpath = str(enclosed_path.relative_to(parent_path))
756 except ValueError:
757 subpath = None
758 else:
759 subpath = urllib.parse.unquote(subpath)
760 return subpath
762 def exists(self) -> bool:
763 """Indicate that the resource is available.
765 Returns
766 -------
767 exists : `bool`
768 `True` if the resource exists.
769 """
770 raise NotImplementedError()
772 @classmethod
773 def mexists(cls, uris: Iterable[ResourcePath]) -> Dict[ResourcePath, bool]:
774 """Check for existence of multiple URIs at once.
776 Parameters
777 ----------
778 uris : iterable of `ResourcePath`
779 The URIs to test.
781 Returns
782 -------
783 existence : `dict` of [`ResourcePath`, `bool`]
784 Mapping of original URI to boolean indicating existence.
785 """
786 exists_executor = concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS)
787 future_exists = {exists_executor.submit(uri.exists): uri for uri in uris}
789 results: Dict[ResourcePath, bool] = {}
790 for future in concurrent.futures.as_completed(future_exists):
791 uri = future_exists[future]
792 try:
793 exists = future.result()
794 except Exception:
795 exists = False
796 results[uri] = exists
797 return results
799 def remove(self) -> None:
800 """Remove the resource."""
801 raise NotImplementedError()
803 def isabs(self) -> bool:
804 """Indicate that the resource is fully specified.
806 For non-schemeless URIs this is always true.
808 Returns
809 -------
810 isabs : `bool`
811 `True` in all cases except schemeless URI.
812 """
813 return True
815 def abspath(self) -> ResourcePath:
816 """Return URI using an absolute path.
818 Returns
819 -------
820 abs : `ResourcePath`
821 Absolute URI. For non-schemeless URIs this always returns itself.
822 Schemeless URIs are upgraded to file URIs.
823 """
824 return self
826 def _as_local(self) -> Tuple[str, bool]:
827 """Return the location of the (possibly remote) resource as local file.
829 This is a helper function for `as_local` context manager.
831 Returns
832 -------
833 path : `str`
834 If this is a remote resource, it will be a copy of the resource
835 on the local file system, probably in a temporary directory.
836 For a local resource this should be the actual path to the
837 resource.
838 is_temporary : `bool`
839 Indicates if the local path is a temporary file or not.
840 """
841 raise NotImplementedError()
843 @contextlib.contextmanager
844 def as_local(self) -> Iterator[ResourcePath]:
845 """Return the location of the (possibly remote) resource as local file.
847 Yields
848 ------
849 local : `ResourcePath`
850 If this is a remote resource, it will be a copy of the resource
851 on the local file system, probably in a temporary directory.
852 For a local resource this should be the actual path to the
853 resource.
855 Notes
856 -----
857 The context manager will automatically delete any local temporary
858 file.
860 Examples
861 --------
862 Should be used as a context manager:
864 .. code-block:: py
866 with uri.as_local() as local:
867 ospath = local.ospath
868 """
869 if self.dirLike:
870 raise IsADirectoryError(f"Directory-like URI {self} cannot be fetched as local.")
871 local_src, is_temporary = self._as_local()
872 local_uri = ResourcePath(local_src, isTemporary=is_temporary)
874 try:
875 yield local_uri
876 finally:
877 # The caller might have relocated the temporary file.
878 # Do not ever delete if the temporary matches self
879 # (since it may have been that a temporary file was made local
880 # but already was local).
881 if self != local_uri and is_temporary and local_uri.exists():
882 local_uri.remove()
884 @classmethod
885 @contextlib.contextmanager
886 def temporary_uri(
887 cls, prefix: Optional[ResourcePath] = None, suffix: Optional[str] = None
888 ) -> Iterator[ResourcePath]:
889 """Create a temporary file-like URI.
891 Parameters
892 ----------
893 prefix : `ResourcePath`, optional
894 Prefix to use. Without this the path will be formed as a local
895 file URI in a temporary directory. Ensuring that the prefix
896 location exists is the responsibility of the caller.
897 suffix : `str`, optional
898 A file suffix to be used. The ``.`` should be included in this
899 suffix.
901 Yields
902 ------
903 uri : `ResourcePath`
904 The temporary URI. Will be removed when the context is completed.
905 """
906 use_tempdir = False
907 if prefix is None:
908 prefix = ResourcePath(tempfile.mkdtemp(), forceDirectory=True, isTemporary=True)
909 # Record that we need to delete this directory. Can not rely
910 # on isTemporary flag since an external prefix may have that
911 # set as well.
912 use_tempdir = True
914 # Need to create a randomized file name. For consistency do not
915 # use mkstemp for local and something else for remote. Additionally
916 # this method does not create the file to prevent name clashes.
917 characters = "abcdefghijklmnopqrstuvwxyz0123456789_"
918 rng = Random()
919 tempname = "".join(rng.choice(characters) for _ in range(16))
920 if suffix:
921 tempname += suffix
922 temporary_uri = prefix.join(tempname, isTemporary=True)
923 if temporary_uri.dirLike:
924 # If we had a safe way to clean up a remote temporary directory, we
925 # could support this.
926 raise NotImplementedError("temporary_uri cannot be used to create a temporary directory.")
927 try:
928 yield temporary_uri
929 finally:
930 if use_tempdir:
931 shutil.rmtree(prefix.ospath, ignore_errors=True)
932 else:
933 try:
934 # It's okay if this does not work because the user removed
935 # the file.
936 temporary_uri.remove()
937 except FileNotFoundError:
938 pass
940 def read(self, size: int = -1) -> bytes:
941 """Open the resource and return the contents in bytes.
943 Parameters
944 ----------
945 size : `int`, optional
946 The number of bytes to read. Negative or omitted indicates
947 that all data should be read.
948 """
949 raise NotImplementedError()
951 def write(self, data: bytes, overwrite: bool = True) -> None:
952 """Write the supplied bytes to the new resource.
954 Parameters
955 ----------
956 data : `bytes`
957 The bytes to write to the resource. The entire contents of the
958 resource will be replaced.
959 overwrite : `bool`, optional
960 If `True` the resource will be overwritten if it exists. Otherwise
961 the write will fail.
962 """
963 raise NotImplementedError()
965 def mkdir(self) -> None:
966 """For a dir-like URI, create the directory resource if needed."""
967 raise NotImplementedError()
969 def isdir(self) -> bool:
970 """Return True if this URI looks like a directory, else False."""
971 return self.dirLike
973 def size(self) -> int:
974 """For non-dir-like URI, return the size of the resource.
976 Returns
977 -------
978 sz : `int`
979 The size in bytes of the resource associated with this URI.
980 Returns 0 if dir-like.
981 """
982 raise NotImplementedError()
984 def __str__(self) -> str:
985 """Convert the URI to its native string form."""
986 return self.geturl()
988 def __repr__(self) -> str:
989 """Return string representation suitable for evaluation."""
990 return f'ResourcePath("{self.geturl()}")'
992 def __eq__(self, other: Any) -> bool:
993 """Compare supplied object with this `ResourcePath`."""
994 if not isinstance(other, ResourcePath):
995 return NotImplemented
996 return self.geturl() == other.geturl()
998 def __hash__(self) -> int:
999 """Return hash of this object."""
1000 return hash(str(self))
1002 def __lt__(self, other: ResourcePath) -> bool:
1003 return self.geturl() < other.geturl()
1005 def __le__(self, other: ResourcePath) -> bool:
1006 return self.geturl() <= other.geturl()
1008 def __gt__(self, other: ResourcePath) -> bool:
1009 return self.geturl() > other.geturl()
1011 def __ge__(self, other: ResourcePath) -> bool:
1012 return self.geturl() >= other.geturl()
1014 def __copy__(self) -> ResourcePath:
1015 """Copy constructor.
1017 Object is immutable so copy can return itself.
1018 """
1019 # Implement here because the __new__ method confuses things
1020 return self
1022 def __deepcopy__(self, memo: Any) -> ResourcePath:
1023 """Deepcopy the object.
1025 Object is immutable so copy can return itself.
1026 """
1027 # Implement here because the __new__ method confuses things
1028 return self
1030 def __getnewargs__(self) -> Tuple:
1031 """Support pickling."""
1032 return (str(self),)
1034 @classmethod
1035 def _fixDirectorySep(
1036 cls, parsed: urllib.parse.ParseResult, forceDirectory: bool = False
1037 ) -> Tuple[urllib.parse.ParseResult, bool]:
1038 """Ensure that a path separator is present on directory paths.
1040 Parameters
1041 ----------
1042 parsed : `~urllib.parse.ParseResult`
1043 The result from parsing a URI using `urllib.parse`.
1044 forceDirectory : `bool`, optional
1045 If `True` forces the URI to end with a separator, otherwise given
1046 URI is interpreted as is. Specifying that the URI is conceptually
1047 equivalent to a directory can break some ambiguities when
1048 interpreting the last element of a path.
1050 Returns
1051 -------
1052 modified : `~urllib.parse.ParseResult`
1053 Update result if a URI is being handled.
1054 dirLike : `bool`
1055 `True` if given parsed URI has a trailing separator or
1056 forceDirectory is True. Otherwise `False`.
1057 """
1058 # assume we are not dealing with a directory like URI
1059 dirLike = False
1061 # Directory separator
1062 sep = cls._pathModule.sep
1064 # URI is dir-like if explicitly stated or if it ends on a separator
1065 endsOnSep = parsed.path.endswith(sep)
1066 if forceDirectory or endsOnSep:
1067 dirLike = True
1068 # only add the separator if it's not already there
1069 if not endsOnSep:
1070 parsed = parsed._replace(path=parsed.path + sep)
1072 return parsed, dirLike
1074 @classmethod
1075 def _fixupPathUri(
1076 cls,
1077 parsed: urllib.parse.ParseResult,
1078 root: Optional[ResourcePath] = None,
1079 forceAbsolute: bool = False,
1080 forceDirectory: bool = False,
1081 ) -> Tuple[urllib.parse.ParseResult, bool]:
1082 """Correct any issues with the supplied URI.
1084 Parameters
1085 ----------
1086 parsed : `~urllib.parse.ParseResult`
1087 The result from parsing a URI using `urllib.parse`.
1088 root : `ResourcePath`, ignored
1089 Not used by the this implementation since all URIs are
1090 absolute except for those representing the local file system.
1091 forceAbsolute : `bool`, ignored.
1092 Not used by this implementation. URIs are generally always
1093 absolute.
1094 forceDirectory : `bool`, optional
1095 If `True` forces the URI to end with a separator, otherwise given
1096 URI is interpreted as is. Specifying that the URI is conceptually
1097 equivalent to a directory can break some ambiguities when
1098 interpreting the last element of a path.
1100 Returns
1101 -------
1102 modified : `~urllib.parse.ParseResult`
1103 Update result if a URI is being handled.
1104 dirLike : `bool`
1105 `True` if given parsed URI has a trailing separator or
1106 forceDirectory is True. Otherwise `False`.
1108 Notes
1109 -----
1110 Relative paths are explicitly not supported by RFC8089 but `urllib`
1111 does accept URIs of the form ``file:relative/path.ext``. They need
1112 to be turned into absolute paths before they can be used. This is
1113 always done regardless of the ``forceAbsolute`` parameter.
1115 AWS S3 differentiates between keys with trailing POSIX separators (i.e
1116 `/dir` and `/dir/`) whereas POSIX does not neccessarily.
1118 Scheme-less paths are normalized.
1119 """
1120 return cls._fixDirectorySep(parsed, forceDirectory)
1122 def transfer_from(
1123 self,
1124 src: ResourcePath,
1125 transfer: str,
1126 overwrite: bool = False,
1127 transaction: Optional[TransactionProtocol] = None,
1128 ) -> None:
1129 """Transfer to this URI from another.
1131 Parameters
1132 ----------
1133 src : `ResourcePath`
1134 Source URI.
1135 transfer : `str`
1136 Mode to use for transferring the resource. Generically there are
1137 many standard options: copy, link, symlink, hardlink, relsymlink.
1138 Not all URIs support all modes.
1139 overwrite : `bool`, optional
1140 Allow an existing file to be overwritten. Defaults to `False`.
1141 transaction : `~lsst.resources.utils.TransactionProtocol`, optional
1142 A transaction object that can (depending on implementation)
1143 rollback transfers on error. Not guaranteed to be implemented.
1145 Notes
1146 -----
1147 Conceptually this is hard to scale as the number of URI schemes
1148 grow. The destination URI is more important than the source URI
1149 since that is where all the transfer modes are relevant (with the
1150 complication that "move" deletes the source).
1152 Local file to local file is the fundamental use case but every
1153 other scheme has to support "copy" to local file (with implicit
1154 support for "move") and copy from local file.
1155 All the "link" options tend to be specific to local file systems.
1157 "move" is a "copy" where the remote resource is deleted at the end.
1158 Whether this works depends on the source URI rather than the
1159 destination URI. Reverting a move on transaction rollback is
1160 expected to be problematic if a remote resource was involved.
1161 """
1162 raise NotImplementedError(f"No transfer modes supported by URI scheme {self.scheme}")
1164 def walk(
1165 self, file_filter: Optional[Union[str, re.Pattern]] = None
1166 ) -> Iterator[Union[List, Tuple[ResourcePath, List[str], List[str]]]]:
1167 """Walk the directory tree returning matching files and directories.
1169 Parameters
1170 ----------
1171 file_filter : `str` or `re.Pattern`, optional
1172 Regex to filter out files from the list before it is returned.
1174 Yields
1175 ------
1176 dirpath : `ResourcePath`
1177 Current directory being examined.
1178 dirnames : `list` of `str`
1179 Names of subdirectories within dirpath.
1180 filenames : `list` of `str`
1181 Names of all the files within dirpath.
1182 """
1183 raise NotImplementedError()
1185 @overload
1186 @classmethod
1187 def findFileResources(
1188 cls,
1189 candidates: Iterable[ResourcePathExpression],
1190 file_filter: Optional[Union[str, re.Pattern]],
1191 grouped: Literal[True],
1192 ) -> Iterator[Iterator[ResourcePath]]:
1193 ...
1195 @overload
1196 @classmethod
1197 def findFileResources(
1198 cls,
1199 candidates: Iterable[ResourcePathExpression],
1200 *,
1201 grouped: Literal[True],
1202 ) -> Iterator[Iterator[ResourcePath]]:
1203 ...
1205 @overload
1206 @classmethod
1207 def findFileResources(
1208 cls,
1209 candidates: Iterable[ResourcePathExpression],
1210 file_filter: Optional[Union[str, re.Pattern]] = None,
1211 grouped: Literal[False] = False,
1212 ) -> Iterator[ResourcePath]:
1213 ...
1215 @classmethod
1216 def findFileResources(
1217 cls,
1218 candidates: Iterable[ResourcePathExpression],
1219 file_filter: Optional[Union[str, re.Pattern]] = None,
1220 grouped: bool = False,
1221 ) -> Iterator[Union[ResourcePath, Iterator[ResourcePath]]]:
1222 """Get all the files from a list of values.
1224 Parameters
1225 ----------
1226 candidates : iterable [`str` or `ResourcePath`]
1227 The files to return and directories in which to look for files to
1228 return.
1229 file_filter : `str` or `re.Pattern`, optional
1230 The regex to use when searching for files within directories.
1231 By default returns all the found files.
1232 grouped : `bool`, optional
1233 If `True` the results will be grouped by directory and each
1234 yielded value will be an iterator over URIs. If `False` each
1235 URI will be returned separately.
1237 Yields
1238 ------
1239 found_file: `ResourcePath`
1240 The passed-in URIs and URIs found in passed-in directories.
1241 If grouping is enabled, each of the yielded values will be an
1242 iterator yielding members of the group. Files given explicitly
1243 will be returned as a single group at the end.
1245 Notes
1246 -----
1247 If a value is a file it is yielded immediately without checking that it
1248 exists. If a value is a directory, all the files in the directory
1249 (recursively) that match the regex will be yielded in turn.
1250 """
1251 fileRegex = None if file_filter is None else re.compile(file_filter)
1253 singles = []
1255 # Find all the files of interest
1256 for location in candidates:
1257 uri = ResourcePath(location)
1258 if uri.isdir():
1259 for found in uri.walk(fileRegex):
1260 if not found:
1261 # This means the uri does not exist and by
1262 # convention we ignore it
1263 continue
1264 root, dirs, files = found
1265 if not files:
1266 continue
1267 if grouped:
1268 yield (root.join(name) for name in files)
1269 else:
1270 for name in files:
1271 yield root.join(name)
1272 else:
1273 if grouped:
1274 singles.append(uri)
1275 else:
1276 yield uri
1278 # Finally, return any explicitly given files in one group
1279 if grouped and singles:
1280 yield iter(singles)
1282 @contextlib.contextmanager
1283 def open(
1284 self,
1285 mode: str = "r",
1286 *,
1287 encoding: Optional[str] = None,
1288 prefer_file_temporary: bool = False,
1289 ) -> Iterator[ResourceHandleProtocol]:
1290 """Return a context manager that wraps an object that behaves like an
1291 open file at the location of the URI.
1293 Parameters
1294 ----------
1295 mode : `str`
1296 String indicating the mode in which to open the file. Values are
1297 the same as those accepted by `builtins.open`, though intrinsically
1298 read-only URI types may only support read modes, and
1299 `io.IOBase.seekable` is not guaranteed to be `True` on the returned
1300 object.
1301 encoding : `str`, optional
1302 Unicode encoding for text IO; ignored for binary IO. Defaults to
1303 ``locale.getpreferredencoding(False)``, just as `builtins.open`
1304 does.
1305 prefer_file_temporary : `bool`, optional
1306 If `True`, for implementations that require transfers from a remote
1307 system to temporary local storage and/or back, use a temporary file
1308 instead of an in-memory buffer; this is generally slower, but it
1309 may be necessary to avoid excessive memory usage by large files.
1310 Ignored by implementations that do not require a temporary.
1312 Returns
1313 -------
1314 cm : `contextlib.ContextManager`
1315 A context manager that wraps a file-like object.
1317 Notes
1318 -----
1319 The default implementation of this method uses a local temporary buffer
1320 (in-memory or file, depending on ``prefer_file_temporary``) with calls
1321 to `read`, `write`, `as_local`, and `transfer_from` as necessary to
1322 read and write from/to remote systems. Remote writes thus occur only
1323 when the context manager is exited. `ResourcePath` implementations
1324 that can return a more efficient native buffer should do so whenever
1325 possible (as is guaranteed for local files). `ResourcePath`
1326 implementations for which `as_local` does not return a temporary are
1327 required to reimplement `open`, though they may delegate to `super`
1328 when `prefer_file_temporary` is `False`.
1329 """
1330 if self.dirLike:
1331 raise IsADirectoryError(f"Directory-like URI {self} cannot be opened.")
1332 if "x" in mode and self.exists():
1333 raise FileExistsError(f"File at {self} already exists.")
1334 if prefer_file_temporary:
1335 if "r" in mode or "a" in mode:
1336 local_cm = self.as_local()
1337 else:
1338 local_cm = self.temporary_uri(suffix=self.getExtension())
1339 with local_cm as local_uri:
1340 assert local_uri.isTemporary, (
1341 "ResourcePath implementations for which as_local is not "
1342 "a temporary must reimplement `open`."
1343 )
1344 with open(local_uri.ospath, mode=mode, encoding=encoding) as file_buffer:
1345 if "a" in mode:
1346 file_buffer.seek(0, io.SEEK_END)
1347 yield file_buffer
1348 if "r" not in mode or "+" in mode:
1349 self.transfer_from(local_uri, transfer="copy", overwrite=("x" not in mode))
1350 else:
1351 with self._openImpl(mode, encoding=encoding) as handle:
1352 yield handle
1354 @contextlib.contextmanager
1355 def _openImpl(
1356 self, mode: str = "r", *, encoding: Optional[str] = None
1357 ) -> Iterator[ResourceHandleProtocol]:
1358 """Implement opening of a resource handle.
1360 This private method may be overridden by specific `ResourcePath`
1361 implementations to provide a customized handle like interface.
1363 Parameters
1364 ----------
1365 mode : `str`
1366 The mode the handle should be opened with
1367 encoding : `str`, optional
1368 The byte encoding of any binary text
1370 Yields
1371 ------
1372 handle : `BaseResourceHandle`
1373 A handle that conforms to the `BaseResourcehandle interface
1375 Notes
1376 -----
1377 The base implementation of a file handle reads in a files entire
1378 contents into a buffer for manipulation, and then writes it back out
1379 upon close. Subclasses of this class may offer more fine grained
1380 control.
1381 """
1382 if "r" in mode or "a" in mode:
1383 in_bytes = self.read()
1384 else:
1385 in_bytes = b""
1386 if "b" in mode:
1387 bytes_buffer = io.BytesIO(in_bytes)
1388 if "a" in mode:
1389 bytes_buffer.seek(0, io.SEEK_END)
1390 yield bytes_buffer
1391 out_bytes = bytes_buffer.getvalue()
1392 else:
1393 if encoding is None:
1394 encoding = locale.getpreferredencoding(False)
1395 str_buffer = io.StringIO(in_bytes.decode(encoding))
1396 if "a" in mode:
1397 str_buffer.seek(0, io.SEEK_END)
1398 yield str_buffer
1399 out_bytes = str_buffer.getvalue().encode(encoding)
1400 if "r" not in mode or "+" in mode:
1401 self.write(out_bytes, overwrite=("x" not in mode))
1404ResourcePathExpression = Union[str, urllib.parse.ParseResult, ResourcePath, Path]
1405"""Type-annotation alias for objects that can be coerced to ResourcePath.
1406"""