Coverage for python/lsst/resources/_resourcePath.py: 28%
425 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-13 09:44 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-13 09:44 +0000
1# This file is part of lsst-resources.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# Use of this source code is governed by a 3-clause BSD-style
10# license that can be found in the LICENSE file.
12from __future__ import annotations
14import concurrent.futures
15import contextlib
16import copy
17import io
18import locale
19import logging
20import os
21import posixpath
22import re
23import shutil
24import tempfile
25import urllib.parse
26from pathlib import Path, PurePath, PurePosixPath
27from random import Random
29__all__ = ("ResourcePath", "ResourcePathExpression")
31from collections.abc import Iterable, Iterator
32from typing import TYPE_CHECKING, Any, Literal, overload
34from ._resourceHandles._baseResourceHandle import ResourceHandleProtocol
36if TYPE_CHECKING:
37 from .utils import TransactionProtocol
40log = logging.getLogger(__name__)
42# Regex for looking for URI escapes
43ESCAPES_RE = re.compile(r"%[A-F0-9]{2}")
45# Precomputed escaped hash
46ESCAPED_HASH = urllib.parse.quote("#")
48# Maximum number of worker threads for parallelized operations.
49# If greater than 10, be aware that this number has to be consistent
50# with connection pool sizing (for example in urllib3).
51MAX_WORKERS = 10
54class ResourcePath:
55 """Convenience wrapper around URI parsers.
57 Provides access to URI components and can convert file
58 paths into absolute path URIs. Scheme-less URIs are treated as if
59 they are local file system paths and are converted to absolute URIs.
61 A specialist subclass is created for each supported URI scheme.
63 Parameters
64 ----------
65 uri : `str`, `pathlib.Path`, `urllib.parse.ParseResult`, or `ResourcePath`.
66 URI in string form. Can be scheme-less if referring to a relative
67 path or an absolute path on the local file system.
68 root : `str` or `ResourcePath`, optional
69 When fixing up a relative path in a ``file`` scheme or if scheme-less,
70 use this as the root. Must be absolute. If `None` the current
71 working directory will be used. Can be any supported URI scheme.
72 Not used if ``forceAbsolute`` is `False`.
73 forceAbsolute : `bool`, optional
74 If `True`, scheme-less relative URI will be converted to an absolute
75 path using a ``file`` scheme. If `False` scheme-less URI will remain
76 scheme-less and will not be updated to ``file`` or absolute path unless
77 it is already an absolute path, in which case it will be updated to
78 a ``file`` scheme.
79 forceDirectory: `bool`, optional
80 If `True` forces the URI to end with a separator, otherwise given URI
81 is interpreted as is.
82 isTemporary : `bool`, optional
83 If `True` indicates that this URI points to a temporary resource.
84 The default is `False`, unless ``uri`` is already a `ResourcePath`
85 instance and ``uri.isTemporary is True``.
87 Notes
88 -----
89 A non-standard URI of the form ``file:dir/file.txt`` is always converted
90 to an absolute ``file`` URI.
91 """
93 _pathLib: type[PurePath] = PurePosixPath
94 """Path library to use for this scheme."""
96 _pathModule = posixpath
97 """Path module to use for this scheme."""
99 transferModes: tuple[str, ...] = ("copy", "auto", "move")
100 """Transfer modes supported by this implementation.
102 Move is special in that it is generally a copy followed by an unlink.
103 Whether that unlink works depends critically on whether the source URI
104 implements unlink. If it does not the move will be reported as a failure.
105 """
107 transferDefault: str = "copy"
108 """Default mode to use for transferring if ``auto`` is specified."""
110 quotePaths = True
111 """True if path-like elements modifying a URI should be quoted.
113 All non-schemeless URIs have to internally use quoted paths. Therefore
114 if a new file name is given (e.g. to updatedFile or join) a decision must
115 be made whether to quote it to be consistent.
116 """
118 isLocal = False
119 """If `True` this URI refers to a local file."""
121 # This is not an ABC with abstract methods because the __new__ being
122 # a factory confuses mypy such that it assumes that every constructor
123 # returns a ResourcePath and then determines that all the abstract methods
124 # are still abstract. If they are not marked abstract but just raise
125 # mypy is fine with it.
127 # mypy is confused without these
128 _uri: urllib.parse.ParseResult
129 isTemporary: bool
130 dirLike: bool
132 def __new__(
133 cls,
134 uri: ResourcePathExpression,
135 root: str | ResourcePath | None = None,
136 forceAbsolute: bool = True,
137 forceDirectory: bool = False,
138 isTemporary: bool | None = None,
139 ) -> ResourcePath:
140 """Create and return new specialist ResourcePath subclass."""
141 parsed: urllib.parse.ParseResult
142 dirLike: bool = False
143 subclass: type[ResourcePath] | None = None
145 # Force root to be a ResourcePath -- this simplifies downstream
146 # code.
147 if root is None:
148 root_uri = None
149 elif isinstance(root, str):
150 root_uri = ResourcePath(root, forceDirectory=True, forceAbsolute=True)
151 else:
152 root_uri = root
154 if isinstance(uri, os.PathLike):
155 uri = str(uri)
157 # Record if we need to post process the URI components
158 # or if the instance is already fully configured
159 if isinstance(uri, str):
160 # Since local file names can have special characters in them
161 # we need to quote them for the parser but we can unquote
162 # later. Assume that all other URI schemes are quoted.
163 # Since sometimes people write file:/a/b and not file:///a/b
164 # we should not quote in the explicit case of file:
165 if "://" not in uri and not uri.startswith("file:"):
166 if ESCAPES_RE.search(uri):
167 log.warning("Possible double encoding of %s", uri)
168 else:
169 # Fragments are generally not encoded so we must search
170 # for the fragment boundary ourselves. This is making
171 # an assumption that the filename does not include a "#"
172 # and also that there is no "/" in the fragment itself.
173 to_encode = uri
174 fragment = ""
175 if "#" in uri:
176 dirpos = uri.rfind("/")
177 trailing = uri[dirpos + 1 :]
178 hashpos = trailing.rfind("#")
179 if hashpos != -1:
180 fragment = trailing[hashpos:]
181 to_encode = uri[: dirpos + hashpos + 1]
183 uri = urllib.parse.quote(to_encode) + fragment
185 parsed = urllib.parse.urlparse(uri)
186 elif isinstance(uri, urllib.parse.ParseResult):
187 parsed = copy.copy(uri)
188 # If we are being instantiated with a subclass, rather than
189 # ResourcePath, ensure that that subclass is used directly.
190 # This could lead to inconsistencies if this constructor
191 # is used externally outside of the ResourcePath.replace() method.
192 # S3ResourcePath(urllib.parse.urlparse("file://a/b.txt"))
193 # will be a problem.
194 # This is needed to prevent a schemeless absolute URI become
195 # a file URI unexpectedly when calling updatedFile or
196 # updatedExtension
197 if cls is not ResourcePath:
198 parsed, dirLike = cls._fixDirectorySep(parsed, forceDirectory)
199 subclass = cls
201 elif isinstance(uri, ResourcePath):
202 # Since ResourcePath is immutable we can return the argument
203 # unchanged if it already agrees with forceDirectory, isTemporary,
204 # and forceAbsolute.
205 # We invoke __new__ again with str(self) to add a scheme for
206 # forceAbsolute, but for the others that seems more likely to paper
207 # over logic errors than do something useful, so we just raise.
208 if forceDirectory and not uri.dirLike:
209 raise RuntimeError(
210 f"{uri} is already a file-like ResourcePath; cannot force it to directory."
211 )
212 if isTemporary is not None and isTemporary is not uri.isTemporary:
213 raise RuntimeError(
214 f"{uri} is already a {'temporary' if uri.isTemporary else 'permanent'} "
215 f"ResourcePath; cannot make it {'temporary' if isTemporary else 'permanent'}."
216 )
217 if forceAbsolute and not uri.scheme:
218 return ResourcePath(
219 str(uri),
220 root=root,
221 forceAbsolute=True,
222 forceDirectory=uri.dirLike,
223 isTemporary=uri.isTemporary,
224 )
225 return uri
226 else:
227 raise ValueError(
228 f"Supplied URI must be string, Path, ResourcePath, or ParseResult but got '{uri!r}'"
229 )
231 if subclass is None:
232 # Work out the subclass from the URI scheme
233 if not parsed.scheme:
234 # Root may be specified as a ResourcePath that overrides
235 # the schemeless determination.
236 if (
237 root_uri is not None
238 and root_uri.scheme != "file" # file scheme has different code path
239 and not parsed.path.startswith("/") # Not already absolute path
240 ):
241 if not root_uri.dirLike:
242 raise ValueError(
243 f"Root URI ({root}) was not a directory so can not be joined with"
244 f" path {parsed.path!r}"
245 )
246 # If root is temporary or this schemeless is temporary we
247 # assume this URI is temporary.
248 isTemporary = isTemporary or root_uri.isTemporary
249 joined = root_uri.join(
250 parsed.path, forceDirectory=forceDirectory, isTemporary=isTemporary
251 )
253 # Rather than returning this new ResourcePath directly we
254 # instead extract the path and the scheme and adjust the
255 # URI we were given -- we need to do this to preserve
256 # fragments since join() will drop them.
257 parsed = parsed._replace(scheme=joined.scheme, path=joined.path, netloc=joined.netloc)
258 subclass = type(joined)
260 # Clear the root parameter to indicate that it has
261 # been applied already.
262 root_uri = None
263 else:
264 from .schemeless import SchemelessResourcePath
266 subclass = SchemelessResourcePath
267 elif parsed.scheme == "file":
268 from .file import FileResourcePath
270 subclass = FileResourcePath
271 elif parsed.scheme == "s3":
272 from .s3 import S3ResourcePath
274 subclass = S3ResourcePath
275 elif parsed.scheme.startswith("http"):
276 from .http import HttpResourcePath
278 subclass = HttpResourcePath
279 elif parsed.scheme == "gs":
280 from .gs import GSResourcePath
282 subclass = GSResourcePath
283 elif parsed.scheme == "resource":
284 # Rules for scheme names disallow pkg_resource
285 from .packageresource import PackageResourcePath
287 subclass = PackageResourcePath
288 elif parsed.scheme == "mem":
289 # in-memory datastore object
290 from .mem import InMemoryResourcePath
292 subclass = InMemoryResourcePath
293 else:
294 raise NotImplementedError(
295 f"No URI support for scheme: '{parsed.scheme}' in {parsed.geturl()}"
296 )
298 parsed, dirLike = subclass._fixupPathUri(
299 parsed, root=root_uri, forceAbsolute=forceAbsolute, forceDirectory=forceDirectory
300 )
302 # It is possible for the class to change from schemeless
303 # to file so handle that
304 if parsed.scheme == "file":
305 from .file import FileResourcePath
307 subclass = FileResourcePath
309 # Now create an instance of the correct subclass and set the
310 # attributes directly
311 self = object.__new__(subclass)
312 self._uri = parsed
313 self.dirLike = dirLike
314 if isTemporary is None:
315 isTemporary = False
316 self.isTemporary = isTemporary
317 return self
319 @property
320 def scheme(self) -> str:
321 """Return the URI scheme.
323 Notes
324 -----
325 (``://`` is not part of the scheme).
326 """
327 return self._uri.scheme
329 @property
330 def netloc(self) -> str:
331 """Return the URI network location."""
332 return self._uri.netloc
334 @property
335 def path(self) -> str:
336 """Return the path component of the URI."""
337 return self._uri.path
339 @property
340 def unquoted_path(self) -> str:
341 """Return path component of the URI with any URI quoting reversed."""
342 return urllib.parse.unquote(self._uri.path)
344 @property
345 def ospath(self) -> str:
346 """Return the path component of the URI localized to current OS."""
347 raise AttributeError(f"Non-file URI ({self}) has no local OS path.")
349 @property
350 def relativeToPathRoot(self) -> str:
351 """Return path relative to network location.
353 Effectively, this is the path property with posix separator stripped
354 from the left hand side of the path.
356 Always unquotes.
357 """
358 p = self._pathLib(self.path)
359 relToRoot = str(p.relative_to(p.root))
360 if self.dirLike and not relToRoot.endswith("/"):
361 relToRoot += "/"
362 return urllib.parse.unquote(relToRoot)
364 @property
365 def is_root(self) -> bool:
366 """Return whether this URI points to the root of the network location.
368 This means that the path components refers to the top level.
369 """
370 relpath = self.relativeToPathRoot
371 if relpath == "./":
372 return True
373 return False
375 @property
376 def fragment(self) -> str:
377 """Return the fragment component of the URI."""
378 return self._uri.fragment
380 @property
381 def params(self) -> str:
382 """Return any parameters included in the URI."""
383 return self._uri.params
385 @property
386 def query(self) -> str:
387 """Return any query strings included in the URI."""
388 return self._uri.query
390 def geturl(self) -> str:
391 """Return the URI in string form.
393 Returns
394 -------
395 url : `str`
396 String form of URI.
397 """
398 return self._uri.geturl()
400 def root_uri(self) -> ResourcePath:
401 """Return the base root URI.
403 Returns
404 -------
405 uri : `ResourcePath`
406 root URI.
407 """
408 return self.replace(path="", forceDirectory=True)
410 def split(self) -> tuple[ResourcePath, str]:
411 """Split URI into head and tail.
413 Returns
414 -------
415 head: `ResourcePath`
416 Everything leading up to tail, expanded and normalized as per
417 ResourcePath rules.
418 tail : `str`
419 Last path component. Tail will be empty if path ends on a
420 separator. Tail will never contain separators. It will be
421 unquoted.
423 Notes
424 -----
425 Equivalent to `os.path.split` where head preserves the URI
426 components.
427 """
428 head, tail = self._pathModule.split(self.path)
429 headuri = self._uri._replace(path=head)
431 # The file part should never include quoted metacharacters
432 tail = urllib.parse.unquote(tail)
434 # Schemeless is special in that it can be a relative path
435 # We need to ensure that it stays that way. All other URIs will
436 # be absolute already.
437 forceAbsolute = self._pathModule.isabs(self.path)
438 return ResourcePath(headuri, forceDirectory=True, forceAbsolute=forceAbsolute), tail
440 def basename(self) -> str:
441 """Return the base name, last element of path, of the URI.
443 Returns
444 -------
445 tail : `str`
446 Last part of the path attribute. Trail will be empty if path ends
447 on a separator.
449 Notes
450 -----
451 If URI ends on a slash returns an empty string. This is the second
452 element returned by `split()`.
454 Equivalent of `os.path.basename`.
455 """
456 return self.split()[1]
458 def dirname(self) -> ResourcePath:
459 """Return the directory component of the path as a new `ResourcePath`.
461 Returns
462 -------
463 head : `ResourcePath`
464 Everything except the tail of path attribute, expanded and
465 normalized as per ResourcePath rules.
467 Notes
468 -----
469 Equivalent of `os.path.dirname`.
470 """
471 return self.split()[0]
473 def parent(self) -> ResourcePath:
474 """Return a `ResourcePath` of the parent directory.
476 Returns
477 -------
478 head : `ResourcePath`
479 Everything except the tail of path attribute, expanded and
480 normalized as per `ResourcePath` rules.
482 Notes
483 -----
484 For a file-like URI this will be the same as calling `dirname()`.
485 """
486 # When self is file-like, return self.dirname()
487 if not self.dirLike:
488 return self.dirname()
489 # When self is dir-like, return its parent directory,
490 # regardless of the presence of a trailing separator
491 originalPath = self._pathLib(self.path)
492 parentPath = originalPath.parent
493 return self.replace(path=str(parentPath), forceDirectory=True)
495 def replace(self, forceDirectory: bool = False, isTemporary: bool = False, **kwargs: Any) -> ResourcePath:
496 """Return new `ResourcePath` with specified components replaced.
498 Parameters
499 ----------
500 forceDirectory : `bool`, optional
501 Parameter passed to ResourcePath constructor to force this
502 new URI to be dir-like.
503 isTemporary : `bool`, optional
504 Indicate that the resulting URI is temporary resource.
505 **kwargs
506 Components of a `urllib.parse.ParseResult` that should be
507 modified for the newly-created `ResourcePath`.
509 Returns
510 -------
511 new : `ResourcePath`
512 New `ResourcePath` object with updated values.
514 Notes
515 -----
516 Does not, for now, allow a change in URI scheme.
517 """
518 # Disallow a change in scheme
519 if "scheme" in kwargs:
520 raise ValueError(f"Can not use replace() method to change URI scheme for {self}")
521 return self.__class__(
522 self._uri._replace(**kwargs), forceDirectory=forceDirectory, isTemporary=isTemporary
523 )
525 def updatedFile(self, newfile: str) -> ResourcePath:
526 """Return new URI with an updated final component of the path.
528 Parameters
529 ----------
530 newfile : `str`
531 File name with no path component.
533 Returns
534 -------
535 updated : `ResourcePath`
537 Notes
538 -----
539 Forces the ResourcePath.dirLike attribute to be false. The new file
540 path will be quoted if necessary.
541 """
542 if self.quotePaths:
543 newfile = urllib.parse.quote(newfile)
544 dir, _ = self._pathModule.split(self.path)
545 newpath = self._pathModule.join(dir, newfile)
547 updated = self.replace(path=newpath)
548 updated.dirLike = False
549 return updated
551 def updatedExtension(self, ext: str | None) -> ResourcePath:
552 """Return a new `ResourcePath` with updated file extension.
554 All file extensions are replaced.
556 Parameters
557 ----------
558 ext : `str` or `None`
559 New extension. If an empty string is given any extension will
560 be removed. If `None` is given there will be no change.
562 Returns
563 -------
564 updated : `ResourcePath`
565 URI with the specified extension. Can return itself if
566 no extension was specified.
567 """
568 if ext is None:
569 return self
571 # Get the extension
572 current = self.getExtension()
574 # Nothing to do if the extension already matches
575 if current == ext:
576 return self
578 # Remove the current extension from the path
579 # .fits.gz counts as one extension do not use os.path.splitext
580 path = self.path
581 if current:
582 path = path[: -len(current)]
584 # Ensure that we have a leading "." on file extension (and we do not
585 # try to modify the empty string)
586 if ext and not ext.startswith("."):
587 ext = "." + ext
589 return self.replace(path=path + ext)
591 def getExtension(self) -> str:
592 """Return the file extension(s) associated with this URI path.
594 Returns
595 -------
596 ext : `str`
597 The file extension (including the ``.``). Can be empty string
598 if there is no file extension. Usually returns only the last
599 file extension unless there is a special extension modifier
600 indicating file compression, in which case the combined
601 extension (e.g. ``.fits.gz``) will be returned.
602 """
603 special = {".gz", ".bz2", ".xz", ".fz"}
605 # Get the file part of the path so as not to be confused by
606 # "." in directory names.
607 basename = self.basename()
608 extensions = self._pathLib(basename).suffixes
610 if not extensions:
611 return ""
613 ext = extensions.pop()
615 # Multiple extensions, decide whether to include the final two
616 if extensions and ext in special:
617 ext = f"{extensions[-1]}{ext}"
619 return ext
621 def join(
622 self, path: str | ResourcePath, isTemporary: bool | None = None, forceDirectory: bool = False
623 ) -> ResourcePath:
624 """Return new `ResourcePath` with additional path components.
626 Parameters
627 ----------
628 path : `str`, `ResourcePath`
629 Additional file components to append to the current URI. Assumed
630 to include a file at the end. Will be quoted depending on the
631 associated URI scheme. If the path looks like a URI with a scheme
632 referring to an absolute location, it will be returned
633 directly (matching the behavior of `os.path.join`). It can
634 also be a `ResourcePath`.
635 isTemporary : `bool`, optional
636 Indicate that the resulting URI represents a temporary resource.
637 Default is ``self.isTemporary``.
638 forceDirectory : `bool`, optional
639 If `True` forces the URI to end with a separator, otherwise given
640 URI is interpreted as is.
642 Returns
643 -------
644 new : `ResourcePath`
645 New URI with any file at the end replaced with the new path
646 components.
648 Notes
649 -----
650 Schemeless URIs assume local path separator but all other URIs assume
651 POSIX separator if the supplied path has directory structure. It
652 may be this never becomes a problem but datastore templates assume
653 POSIX separator is being used.
655 If an absolute `ResourcePath` is given for ``path`` is is assumed that
656 this should be returned directly. Giving a ``path`` of an absolute
657 scheme-less URI is not allowed for safety reasons as it may indicate
658 a mistake in the calling code.
660 Raises
661 ------
662 ValueError
663 Raised if the ``path`` is an absolute scheme-less URI. In that
664 situation it is unclear whether the intent is to return a
665 ``file`` URI or it was a mistake and a relative scheme-less URI
666 was meant.
667 RuntimeError
668 Raised if this attempts to join a temporary URI to a non-temporary
669 URI.
670 """
671 if isTemporary is None:
672 isTemporary = self.isTemporary
673 elif not isTemporary and self.isTemporary:
674 raise RuntimeError("Cannot join temporary URI to non-temporary URI.")
675 # If we have a full URI in path we will use it directly
676 # but without forcing to absolute so that we can trap the
677 # expected option of relative path.
678 path_uri = ResourcePath(
679 path, forceAbsolute=False, forceDirectory=forceDirectory, isTemporary=isTemporary
680 )
681 if path_uri.scheme:
682 # Check for scheme so can distinguish explicit URIs from
683 # absolute scheme-less URIs.
684 return path_uri
686 if path_uri.isabs():
687 # Absolute scheme-less path.
688 raise ValueError(f"Can not join absolute scheme-less {path_uri!r} to another URI.")
690 # If this was originally a ResourcePath extract the unquoted path from
691 # it. Otherwise we use the string we were given to allow "#" to appear
692 # in the filename if given as a plain string.
693 if not isinstance(path, str):
694 path = path_uri.unquoted_path
696 new = self.dirname() # By definition a directory URI
698 # new should be asked about quoting, not self, since dirname can
699 # change the URI scheme for schemeless -> file
700 if new.quotePaths:
701 path = urllib.parse.quote(path)
703 newpath = self._pathModule.normpath(self._pathModule.join(new.path, path))
705 # normpath can strip trailing / so we force directory if the supplied
706 # path ended with a /
707 return new.replace(
708 path=newpath,
709 forceDirectory=(forceDirectory or path.endswith(self._pathModule.sep)),
710 isTemporary=isTemporary,
711 )
713 def relative_to(self, other: ResourcePath) -> str | None:
714 """Return the relative path from this URI to the other URI.
716 Parameters
717 ----------
718 other : `ResourcePath`
719 URI to use to calculate the relative path. Must be a parent
720 of this URI.
722 Returns
723 -------
724 subpath : `str`
725 The sub path of this URI relative to the supplied other URI.
726 Returns `None` if there is no parent child relationship.
727 Scheme and netloc must match.
728 """
729 # Scheme-less absolute other is treated as if it's a file scheme.
730 # Scheme-less relative other can only return non-None if self
731 # is also scheme-less relative and that is handled specifically
732 # in a subclass.
733 if not other.scheme and other.isabs():
734 other = other.abspath()
736 # Scheme-less self is handled elsewhere.
737 if self.scheme != other.scheme:
738 return None
739 if self.netloc != other.netloc:
740 # Special case for localhost vs empty string.
741 # There can be many variants of localhost.
742 local_netlocs = {"", "localhost", "localhost.localdomain", "127.0.0.1"}
743 if not {self.netloc, other.netloc}.issubset(local_netlocs):
744 return None
746 enclosed_path = self._pathLib(self.relativeToPathRoot)
747 parent_path = other.relativeToPathRoot
748 subpath: str | None
749 try:
750 subpath = str(enclosed_path.relative_to(parent_path))
751 except ValueError:
752 subpath = None
753 else:
754 subpath = urllib.parse.unquote(subpath)
755 return subpath
757 def exists(self) -> bool:
758 """Indicate that the resource is available.
760 Returns
761 -------
762 exists : `bool`
763 `True` if the resource exists.
764 """
765 raise NotImplementedError()
767 @classmethod
768 def mexists(cls, uris: Iterable[ResourcePath]) -> dict[ResourcePath, bool]:
769 """Check for existence of multiple URIs at once.
771 Parameters
772 ----------
773 uris : iterable of `ResourcePath`
774 The URIs to test.
776 Returns
777 -------
778 existence : `dict` of [`ResourcePath`, `bool`]
779 Mapping of original URI to boolean indicating existence.
780 """
781 # Group by scheme to allow a subclass to be able to use
782 # specialized implementations.
783 grouped: dict[type, list[ResourcePath]] = {}
784 for uri in uris:
785 uri_class = uri.__class__
786 if uri_class not in grouped:
787 grouped[uri_class] = []
788 grouped[uri_class].append(uri)
790 existence: dict[ResourcePath, bool] = {}
791 for uri_class in grouped:
792 existence.update(uri_class._mexists(grouped[uri_class]))
794 return existence
796 @classmethod
797 def _mexists(cls, uris: Iterable[ResourcePath]) -> dict[ResourcePath, bool]:
798 """Check for existence of multiple URIs at once.
800 Implementation helper method for `mexists`.
802 Parameters
803 ----------
804 uris : iterable of `ResourcePath`
805 The URIs to test.
807 Returns
808 -------
809 existence : `dict` of [`ResourcePath`, `bool`]
810 Mapping of original URI to boolean indicating existence.
811 """
812 exists_executor = concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS)
813 future_exists = {exists_executor.submit(uri.exists): uri for uri in uris}
815 results: dict[ResourcePath, bool] = {}
816 for future in concurrent.futures.as_completed(future_exists):
817 uri = future_exists[future]
818 try:
819 exists = future.result()
820 except Exception:
821 exists = False
822 results[uri] = exists
823 return results
825 def remove(self) -> None:
826 """Remove the resource."""
827 raise NotImplementedError()
829 def isabs(self) -> bool:
830 """Indicate that the resource is fully specified.
832 For non-schemeless URIs this is always true.
834 Returns
835 -------
836 isabs : `bool`
837 `True` in all cases except schemeless URI.
838 """
839 return True
841 def abspath(self) -> ResourcePath:
842 """Return URI using an absolute path.
844 Returns
845 -------
846 abs : `ResourcePath`
847 Absolute URI. For non-schemeless URIs this always returns itself.
848 Schemeless URIs are upgraded to file URIs.
849 """
850 return self
852 def _as_local(self) -> tuple[str, bool]:
853 """Return the location of the (possibly remote) resource as local file.
855 This is a helper function for `as_local` context manager.
857 Returns
858 -------
859 path : `str`
860 If this is a remote resource, it will be a copy of the resource
861 on the local file system, probably in a temporary directory.
862 For a local resource this should be the actual path to the
863 resource.
864 is_temporary : `bool`
865 Indicates if the local path is a temporary file or not.
866 """
867 raise NotImplementedError()
869 @contextlib.contextmanager
870 def as_local(self) -> Iterator[ResourcePath]:
871 """Return the location of the (possibly remote) resource as local file.
873 Yields
874 ------
875 local : `ResourcePath`
876 If this is a remote resource, it will be a copy of the resource
877 on the local file system, probably in a temporary directory.
878 For a local resource this should be the actual path to the
879 resource.
881 Notes
882 -----
883 The context manager will automatically delete any local temporary
884 file.
886 Examples
887 --------
888 Should be used as a context manager:
890 .. code-block:: py
892 with uri.as_local() as local:
893 ospath = local.ospath
894 """
895 if self.dirLike:
896 raise IsADirectoryError(f"Directory-like URI {self} cannot be fetched as local.")
897 local_src, is_temporary = self._as_local()
898 local_uri = ResourcePath(local_src, isTemporary=is_temporary)
900 try:
901 yield local_uri
902 finally:
903 # The caller might have relocated the temporary file.
904 # Do not ever delete if the temporary matches self
905 # (since it may have been that a temporary file was made local
906 # but already was local).
907 if self != local_uri and is_temporary and local_uri.exists():
908 local_uri.remove()
910 @classmethod
911 @contextlib.contextmanager
912 def temporary_uri(
913 cls, prefix: ResourcePath | None = None, suffix: str | None = None
914 ) -> Iterator[ResourcePath]:
915 """Create a temporary file-like URI.
917 Parameters
918 ----------
919 prefix : `ResourcePath`, optional
920 Prefix to use. Without this the path will be formed as a local
921 file URI in a temporary directory. Ensuring that the prefix
922 location exists is the responsibility of the caller.
923 suffix : `str`, optional
924 A file suffix to be used. The ``.`` should be included in this
925 suffix.
927 Yields
928 ------
929 uri : `ResourcePath`
930 The temporary URI. Will be removed when the context is completed.
931 """
932 use_tempdir = False
933 if prefix is None:
934 prefix = ResourcePath(tempfile.mkdtemp(), forceDirectory=True, isTemporary=True)
935 # Record that we need to delete this directory. Can not rely
936 # on isTemporary flag since an external prefix may have that
937 # set as well.
938 use_tempdir = True
940 # Need to create a randomized file name. For consistency do not
941 # use mkstemp for local and something else for remote. Additionally
942 # this method does not create the file to prevent name clashes.
943 characters = "abcdefghijklmnopqrstuvwxyz0123456789_"
944 rng = Random()
945 tempname = "".join(rng.choice(characters) for _ in range(16))
946 if suffix:
947 tempname += suffix
948 temporary_uri = prefix.join(tempname, isTemporary=True)
949 if temporary_uri.dirLike:
950 # If we had a safe way to clean up a remote temporary directory, we
951 # could support this.
952 raise NotImplementedError("temporary_uri cannot be used to create a temporary directory.")
953 try:
954 yield temporary_uri
955 finally:
956 if use_tempdir:
957 shutil.rmtree(prefix.ospath, ignore_errors=True)
958 else:
959 with contextlib.suppress(FileNotFoundError):
960 # It's okay if this does not work because the user removed
961 # the file.
962 temporary_uri.remove()
964 def read(self, size: int = -1) -> bytes:
965 """Open the resource and return the contents in bytes.
967 Parameters
968 ----------
969 size : `int`, optional
970 The number of bytes to read. Negative or omitted indicates
971 that all data should be read.
972 """
973 raise NotImplementedError()
975 def write(self, data: bytes, overwrite: bool = True) -> None:
976 """Write the supplied bytes to the new resource.
978 Parameters
979 ----------
980 data : `bytes`
981 The bytes to write to the resource. The entire contents of the
982 resource will be replaced.
983 overwrite : `bool`, optional
984 If `True` the resource will be overwritten if it exists. Otherwise
985 the write will fail.
986 """
987 raise NotImplementedError()
989 def mkdir(self) -> None:
990 """For a dir-like URI, create the directory resource if needed."""
991 raise NotImplementedError()
993 def isdir(self) -> bool:
994 """Return True if this URI looks like a directory, else False."""
995 return self.dirLike
997 def size(self) -> int:
998 """For non-dir-like URI, return the size of the resource.
1000 Returns
1001 -------
1002 sz : `int`
1003 The size in bytes of the resource associated with this URI.
1004 Returns 0 if dir-like.
1005 """
1006 raise NotImplementedError()
1008 def __str__(self) -> str:
1009 """Convert the URI to its native string form."""
1010 return self.geturl()
1012 def __repr__(self) -> str:
1013 """Return string representation suitable for evaluation."""
1014 return f'ResourcePath("{self.geturl()}")'
1016 def __eq__(self, other: Any) -> bool:
1017 """Compare supplied object with this `ResourcePath`."""
1018 if not isinstance(other, ResourcePath):
1019 return NotImplemented
1020 return self.geturl() == other.geturl()
1022 def __hash__(self) -> int:
1023 """Return hash of this object."""
1024 return hash(str(self))
1026 def __lt__(self, other: ResourcePath) -> bool:
1027 return self.geturl() < other.geturl()
1029 def __le__(self, other: ResourcePath) -> bool:
1030 return self.geturl() <= other.geturl()
1032 def __gt__(self, other: ResourcePath) -> bool:
1033 return self.geturl() > other.geturl()
1035 def __ge__(self, other: ResourcePath) -> bool:
1036 return self.geturl() >= other.geturl()
1038 def __copy__(self) -> ResourcePath:
1039 """Copy constructor.
1041 Object is immutable so copy can return itself.
1042 """
1043 # Implement here because the __new__ method confuses things
1044 return self
1046 def __deepcopy__(self, memo: Any) -> ResourcePath:
1047 """Deepcopy the object.
1049 Object is immutable so copy can return itself.
1050 """
1051 # Implement here because the __new__ method confuses things
1052 return self
1054 def __getnewargs__(self) -> tuple:
1055 """Support pickling."""
1056 return (str(self),)
1058 @classmethod
1059 def _fixDirectorySep(
1060 cls, parsed: urllib.parse.ParseResult, forceDirectory: bool = False
1061 ) -> tuple[urllib.parse.ParseResult, bool]:
1062 """Ensure that a path separator is present on directory paths.
1064 Parameters
1065 ----------
1066 parsed : `~urllib.parse.ParseResult`
1067 The result from parsing a URI using `urllib.parse`.
1068 forceDirectory : `bool`, optional
1069 If `True` forces the URI to end with a separator, otherwise given
1070 URI is interpreted as is. Specifying that the URI is conceptually
1071 equivalent to a directory can break some ambiguities when
1072 interpreting the last element of a path.
1074 Returns
1075 -------
1076 modified : `~urllib.parse.ParseResult`
1077 Update result if a URI is being handled.
1078 dirLike : `bool`
1079 `True` if given parsed URI has a trailing separator or
1080 forceDirectory is True. Otherwise `False`.
1081 """
1082 # assume we are not dealing with a directory like URI
1083 dirLike = False
1085 # Directory separator
1086 sep = cls._pathModule.sep
1088 # URI is dir-like if explicitly stated or if it ends on a separator
1089 endsOnSep = parsed.path.endswith(sep)
1090 if forceDirectory or endsOnSep:
1091 dirLike = True
1092 # only add the separator if it's not already there
1093 if not endsOnSep:
1094 parsed = parsed._replace(path=parsed.path + sep)
1096 return parsed, dirLike
1098 @classmethod
1099 def _fixupPathUri(
1100 cls,
1101 parsed: urllib.parse.ParseResult,
1102 root: ResourcePath | None = None,
1103 forceAbsolute: bool = False,
1104 forceDirectory: bool = False,
1105 ) -> tuple[urllib.parse.ParseResult, bool]:
1106 """Correct any issues with the supplied URI.
1108 Parameters
1109 ----------
1110 parsed : `~urllib.parse.ParseResult`
1111 The result from parsing a URI using `urllib.parse`.
1112 root : `ResourcePath`, ignored
1113 Not used by the this implementation since all URIs are
1114 absolute except for those representing the local file system.
1115 forceAbsolute : `bool`, ignored.
1116 Not used by this implementation. URIs are generally always
1117 absolute.
1118 forceDirectory : `bool`, optional
1119 If `True` forces the URI to end with a separator, otherwise given
1120 URI is interpreted as is. Specifying that the URI is conceptually
1121 equivalent to a directory can break some ambiguities when
1122 interpreting the last element of a path.
1124 Returns
1125 -------
1126 modified : `~urllib.parse.ParseResult`
1127 Update result if a URI is being handled.
1128 dirLike : `bool`
1129 `True` if given parsed URI has a trailing separator or
1130 forceDirectory is True. Otherwise `False`.
1132 Notes
1133 -----
1134 Relative paths are explicitly not supported by RFC8089 but `urllib`
1135 does accept URIs of the form ``file:relative/path.ext``. They need
1136 to be turned into absolute paths before they can be used. This is
1137 always done regardless of the ``forceAbsolute`` parameter.
1139 AWS S3 differentiates between keys with trailing POSIX separators (i.e
1140 `/dir` and `/dir/`) whereas POSIX does not neccessarily.
1142 Scheme-less paths are normalized.
1143 """
1144 return cls._fixDirectorySep(parsed, forceDirectory)
1146 def transfer_from(
1147 self,
1148 src: ResourcePath,
1149 transfer: str,
1150 overwrite: bool = False,
1151 transaction: TransactionProtocol | None = None,
1152 ) -> None:
1153 """Transfer to this URI from another.
1155 Parameters
1156 ----------
1157 src : `ResourcePath`
1158 Source URI.
1159 transfer : `str`
1160 Mode to use for transferring the resource. Generically there are
1161 many standard options: copy, link, symlink, hardlink, relsymlink.
1162 Not all URIs support all modes.
1163 overwrite : `bool`, optional
1164 Allow an existing file to be overwritten. Defaults to `False`.
1165 transaction : `~lsst.resources.utils.TransactionProtocol`, optional
1166 A transaction object that can (depending on implementation)
1167 rollback transfers on error. Not guaranteed to be implemented.
1169 Notes
1170 -----
1171 Conceptually this is hard to scale as the number of URI schemes
1172 grow. The destination URI is more important than the source URI
1173 since that is where all the transfer modes are relevant (with the
1174 complication that "move" deletes the source).
1176 Local file to local file is the fundamental use case but every
1177 other scheme has to support "copy" to local file (with implicit
1178 support for "move") and copy from local file.
1179 All the "link" options tend to be specific to local file systems.
1181 "move" is a "copy" where the remote resource is deleted at the end.
1182 Whether this works depends on the source URI rather than the
1183 destination URI. Reverting a move on transaction rollback is
1184 expected to be problematic if a remote resource was involved.
1185 """
1186 raise NotImplementedError(f"No transfer modes supported by URI scheme {self.scheme}")
1188 def walk(
1189 self, file_filter: str | re.Pattern | None = None
1190 ) -> Iterator[list | tuple[ResourcePath, list[str], list[str]]]:
1191 """Walk the directory tree returning matching files and directories.
1193 Parameters
1194 ----------
1195 file_filter : `str` or `re.Pattern`, optional
1196 Regex to filter out files from the list before it is returned.
1198 Yields
1199 ------
1200 dirpath : `ResourcePath`
1201 Current directory being examined.
1202 dirnames : `list` of `str`
1203 Names of subdirectories within dirpath.
1204 filenames : `list` of `str`
1205 Names of all the files within dirpath.
1206 """
1207 raise NotImplementedError()
1209 @overload
1210 @classmethod
1211 def findFileResources(
1212 cls,
1213 candidates: Iterable[ResourcePathExpression],
1214 file_filter: str | re.Pattern | None,
1215 grouped: Literal[True],
1216 ) -> Iterator[Iterator[ResourcePath]]:
1217 ...
1219 @overload
1220 @classmethod
1221 def findFileResources(
1222 cls,
1223 candidates: Iterable[ResourcePathExpression],
1224 *,
1225 grouped: Literal[True],
1226 ) -> Iterator[Iterator[ResourcePath]]:
1227 ...
1229 @overload
1230 @classmethod
1231 def findFileResources(
1232 cls,
1233 candidates: Iterable[ResourcePathExpression],
1234 file_filter: str | re.Pattern | None = None,
1235 grouped: Literal[False] = False,
1236 ) -> Iterator[ResourcePath]:
1237 ...
1239 @classmethod
1240 def findFileResources(
1241 cls,
1242 candidates: Iterable[ResourcePathExpression],
1243 file_filter: str | re.Pattern | None = None,
1244 grouped: bool = False,
1245 ) -> Iterator[ResourcePath | Iterator[ResourcePath]]:
1246 """Get all the files from a list of values.
1248 Parameters
1249 ----------
1250 candidates : iterable [`str` or `ResourcePath`]
1251 The files to return and directories in which to look for files to
1252 return.
1253 file_filter : `str` or `re.Pattern`, optional
1254 The regex to use when searching for files within directories.
1255 By default returns all the found files.
1256 grouped : `bool`, optional
1257 If `True` the results will be grouped by directory and each
1258 yielded value will be an iterator over URIs. If `False` each
1259 URI will be returned separately.
1261 Yields
1262 ------
1263 found_file: `ResourcePath`
1264 The passed-in URIs and URIs found in passed-in directories.
1265 If grouping is enabled, each of the yielded values will be an
1266 iterator yielding members of the group. Files given explicitly
1267 will be returned as a single group at the end.
1269 Notes
1270 -----
1271 If a value is a file it is yielded immediately without checking that it
1272 exists. If a value is a directory, all the files in the directory
1273 (recursively) that match the regex will be yielded in turn.
1274 """
1275 fileRegex = None if file_filter is None else re.compile(file_filter)
1277 singles = []
1279 # Find all the files of interest
1280 for location in candidates:
1281 uri = ResourcePath(location)
1282 if uri.isdir():
1283 for found in uri.walk(fileRegex):
1284 if not found:
1285 # This means the uri does not exist and by
1286 # convention we ignore it
1287 continue
1288 root, dirs, files = found
1289 if not files:
1290 continue
1291 if grouped:
1292 yield (root.join(name) for name in files)
1293 else:
1294 for name in files:
1295 yield root.join(name)
1296 else:
1297 if grouped:
1298 singles.append(uri)
1299 else:
1300 yield uri
1302 # Finally, return any explicitly given files in one group
1303 if grouped and singles:
1304 yield iter(singles)
1306 @contextlib.contextmanager
1307 def open(
1308 self,
1309 mode: str = "r",
1310 *,
1311 encoding: str | None = None,
1312 prefer_file_temporary: bool = False,
1313 ) -> Iterator[ResourceHandleProtocol]:
1314 """Return a context manager that wraps an object that behaves like an
1315 open file at the location of the URI.
1317 Parameters
1318 ----------
1319 mode : `str`
1320 String indicating the mode in which to open the file. Values are
1321 the same as those accepted by `open`, though intrinsically
1322 read-only URI types may only support read modes, and
1323 `io.IOBase.seekable` is not guaranteed to be `True` on the returned
1324 object.
1325 encoding : `str`, optional
1326 Unicode encoding for text IO; ignored for binary IO. Defaults to
1327 ``locale.getpreferredencoding(False)``, just as `open`
1328 does.
1329 prefer_file_temporary : `bool`, optional
1330 If `True`, for implementations that require transfers from a remote
1331 system to temporary local storage and/or back, use a temporary file
1332 instead of an in-memory buffer; this is generally slower, but it
1333 may be necessary to avoid excessive memory usage by large files.
1334 Ignored by implementations that do not require a temporary.
1336 Yields
1337 ------
1338 cm : `~contextlib.AbstractContextManager`
1339 A context manager that wraps a `ResourceHandleProtocol` file-like
1340 object.
1342 Notes
1343 -----
1344 The default implementation of this method uses a local temporary buffer
1345 (in-memory or file, depending on ``prefer_file_temporary``) with calls
1346 to `read`, `write`, `as_local`, and `transfer_from` as necessary to
1347 read and write from/to remote systems. Remote writes thus occur only
1348 when the context manager is exited. `ResourcePath` implementations
1349 that can return a more efficient native buffer should do so whenever
1350 possible (as is guaranteed for local files). `ResourcePath`
1351 implementations for which `as_local` does not return a temporary are
1352 required to reimplement `open`, though they may delegate to `super`
1353 when ``prefer_file_temporary`` is `False`.
1354 """
1355 if self.dirLike:
1356 raise IsADirectoryError(f"Directory-like URI {self} cannot be opened.")
1357 if "x" in mode and self.exists():
1358 raise FileExistsError(f"File at {self} already exists.")
1359 if prefer_file_temporary:
1360 if "r" in mode or "a" in mode:
1361 local_cm = self.as_local()
1362 else:
1363 local_cm = self.temporary_uri(suffix=self.getExtension())
1364 with local_cm as local_uri:
1365 assert local_uri.isTemporary, (
1366 "ResourcePath implementations for which as_local is not "
1367 "a temporary must reimplement `open`."
1368 )
1369 with open(local_uri.ospath, mode=mode, encoding=encoding) as file_buffer:
1370 if "a" in mode:
1371 file_buffer.seek(0, io.SEEK_END)
1372 yield file_buffer
1373 if "r" not in mode or "+" in mode:
1374 self.transfer_from(local_uri, transfer="copy", overwrite=("x" not in mode))
1375 else:
1376 with self._openImpl(mode, encoding=encoding) as handle:
1377 yield handle
1379 @contextlib.contextmanager
1380 def _openImpl(self, mode: str = "r", *, encoding: str | None = None) -> Iterator[ResourceHandleProtocol]:
1381 """Implement opening of a resource handle.
1383 This private method may be overridden by specific `ResourcePath`
1384 implementations to provide a customized handle like interface.
1386 Parameters
1387 ----------
1388 mode : `str`
1389 The mode the handle should be opened with
1390 encoding : `str`, optional
1391 The byte encoding of any binary text
1393 Yields
1394 ------
1395 handle : `~._resourceHandles.BaseResourceHandle`
1396 A handle that conforms to the
1397 `~._resourceHandles.BaseResourceHandle` interface
1399 Notes
1400 -----
1401 The base implementation of a file handle reads in a files entire
1402 contents into a buffer for manipulation, and then writes it back out
1403 upon close. Subclasses of this class may offer more fine grained
1404 control.
1405 """
1406 in_bytes = self.read() if "r" in mode or "a" in mode else b""
1407 if "b" in mode:
1408 bytes_buffer = io.BytesIO(in_bytes)
1409 if "a" in mode:
1410 bytes_buffer.seek(0, io.SEEK_END)
1411 yield bytes_buffer
1412 out_bytes = bytes_buffer.getvalue()
1413 else:
1414 if encoding is None:
1415 encoding = locale.getpreferredencoding(False)
1416 str_buffer = io.StringIO(in_bytes.decode(encoding))
1417 if "a" in mode:
1418 str_buffer.seek(0, io.SEEK_END)
1419 yield str_buffer
1420 out_bytes = str_buffer.getvalue().encode(encoding)
1421 if "r" not in mode or "+" in mode:
1422 self.write(out_bytes, overwrite=("x" not in mode))
1425ResourcePathExpression = str | urllib.parse.ParseResult | ResourcePath | Path
1426"""Type-annotation alias for objects that can be coerced to ResourcePath.
1427"""