Coverage for python/lsst/resources/_resourcePath.py: 28%
428 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-30 11:34 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-30 11:34 +0000
1# This file is part of lsst-resources.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# Use of this source code is governed by a 3-clause BSD-style
10# license that can be found in the LICENSE file.
12from __future__ import annotations
14import concurrent.futures
15import contextlib
16import copy
17import io
18import locale
19import logging
20import os
21import posixpath
22import re
23import shutil
24import tempfile
25import urllib.parse
26from pathlib import Path, PurePath, PurePosixPath
27from random import Random
29__all__ = ("ResourcePath", "ResourcePathExpression")
31from collections.abc import Iterable, Iterator
32from typing import TYPE_CHECKING, Any, Literal, overload
34from ._resourceHandles._baseResourceHandle import ResourceHandleProtocol
35from .utils import ensure_directory_is_writeable
37if TYPE_CHECKING:
38 from .utils import TransactionProtocol
41log = logging.getLogger(__name__)
43# Regex for looking for URI escapes
44ESCAPES_RE = re.compile(r"%[A-F0-9]{2}")
46# Precomputed escaped hash
47ESCAPED_HASH = urllib.parse.quote("#")
49# Maximum number of worker threads for parallelized operations.
50# If greater than 10, be aware that this number has to be consistent
51# with connection pool sizing (for example in urllib3).
52MAX_WORKERS = 10
55class ResourcePath:
56 """Convenience wrapper around URI parsers.
58 Provides access to URI components and can convert file
59 paths into absolute path URIs. Scheme-less URIs are treated as if
60 they are local file system paths and are converted to absolute URIs.
62 A specialist subclass is created for each supported URI scheme.
64 Parameters
65 ----------
66 uri : `str`, `pathlib.Path`, `urllib.parse.ParseResult`, or `ResourcePath`.
67 URI in string form. Can be scheme-less if referring to a relative
68 path or an absolute path on the local file system.
69 root : `str` or `ResourcePath`, optional
70 When fixing up a relative path in a ``file`` scheme or if scheme-less,
71 use this as the root. Must be absolute. If `None` the current
72 working directory will be used. Can be any supported URI scheme.
73 Not used if ``forceAbsolute`` is `False`.
74 forceAbsolute : `bool`, optional
75 If `True`, scheme-less relative URI will be converted to an absolute
76 path using a ``file`` scheme. If `False` scheme-less URI will remain
77 scheme-less and will not be updated to ``file`` or absolute path unless
78 it is already an absolute path, in which case it will be updated to
79 a ``file`` scheme.
80 forceDirectory: `bool`, optional
81 If `True` forces the URI to end with a separator, otherwise given URI
82 is interpreted as is.
83 isTemporary : `bool`, optional
84 If `True` indicates that this URI points to a temporary resource.
85 The default is `False`, unless ``uri`` is already a `ResourcePath`
86 instance and ``uri.isTemporary is True``.
88 Notes
89 -----
90 A non-standard URI of the form ``file:dir/file.txt`` is always converted
91 to an absolute ``file`` URI.
92 """
94 _pathLib: type[PurePath] = PurePosixPath
95 """Path library to use for this scheme."""
97 _pathModule = posixpath
98 """Path module to use for this scheme."""
100 transferModes: tuple[str, ...] = ("copy", "auto", "move")
101 """Transfer modes supported by this implementation.
103 Move is special in that it is generally a copy followed by an unlink.
104 Whether that unlink works depends critically on whether the source URI
105 implements unlink. If it does not the move will be reported as a failure.
106 """
108 transferDefault: str = "copy"
109 """Default mode to use for transferring if ``auto`` is specified."""
111 quotePaths = True
112 """True if path-like elements modifying a URI should be quoted.
114 All non-schemeless URIs have to internally use quoted paths. Therefore
115 if a new file name is given (e.g. to updatedFile or join) a decision must
116 be made whether to quote it to be consistent.
117 """
119 isLocal = False
120 """If `True` this URI refers to a local file."""
122 # This is not an ABC with abstract methods because the __new__ being
123 # a factory confuses mypy such that it assumes that every constructor
124 # returns a ResourcePath and then determines that all the abstract methods
125 # are still abstract. If they are not marked abstract but just raise
126 # mypy is fine with it.
128 # mypy is confused without these
129 _uri: urllib.parse.ParseResult
130 isTemporary: bool
131 dirLike: bool
133 def __new__(
134 cls,
135 uri: ResourcePathExpression,
136 root: str | ResourcePath | None = None,
137 forceAbsolute: bool = True,
138 forceDirectory: bool = False,
139 isTemporary: bool | None = None,
140 ) -> ResourcePath:
141 """Create and return new specialist ResourcePath subclass."""
142 parsed: urllib.parse.ParseResult
143 dirLike: bool = False
144 subclass: type[ResourcePath] | None = None
146 # Force root to be a ResourcePath -- this simplifies downstream
147 # code.
148 if root is None:
149 root_uri = None
150 elif isinstance(root, str):
151 root_uri = ResourcePath(root, forceDirectory=True, forceAbsolute=True)
152 else:
153 root_uri = root
155 if isinstance(uri, os.PathLike):
156 uri = str(uri)
158 # Record if we need to post process the URI components
159 # or if the instance is already fully configured
160 if isinstance(uri, str):
161 # Since local file names can have special characters in them
162 # we need to quote them for the parser but we can unquote
163 # later. Assume that all other URI schemes are quoted.
164 # Since sometimes people write file:/a/b and not file:///a/b
165 # we should not quote in the explicit case of file:
166 if "://" not in uri and not uri.startswith("file:"):
167 if ESCAPES_RE.search(uri):
168 log.warning("Possible double encoding of %s", uri)
169 else:
170 # Fragments are generally not encoded so we must search
171 # for the fragment boundary ourselves. This is making
172 # an assumption that the filename does not include a "#"
173 # and also that there is no "/" in the fragment itself.
174 to_encode = uri
175 fragment = ""
176 if "#" in uri:
177 dirpos = uri.rfind("/")
178 trailing = uri[dirpos + 1 :]
179 hashpos = trailing.rfind("#")
180 if hashpos != -1:
181 fragment = trailing[hashpos:]
182 to_encode = uri[: dirpos + hashpos + 1]
184 uri = urllib.parse.quote(to_encode) + fragment
186 parsed = urllib.parse.urlparse(uri)
187 elif isinstance(uri, urllib.parse.ParseResult):
188 parsed = copy.copy(uri)
189 # If we are being instantiated with a subclass, rather than
190 # ResourcePath, ensure that that subclass is used directly.
191 # This could lead to inconsistencies if this constructor
192 # is used externally outside of the ResourcePath.replace() method.
193 # S3ResourcePath(urllib.parse.urlparse("file://a/b.txt"))
194 # will be a problem.
195 # This is needed to prevent a schemeless absolute URI become
196 # a file URI unexpectedly when calling updatedFile or
197 # updatedExtension
198 if cls is not ResourcePath:
199 parsed, dirLike = cls._fixDirectorySep(parsed, forceDirectory)
200 subclass = cls
202 elif isinstance(uri, ResourcePath):
203 # Since ResourcePath is immutable we can return the argument
204 # unchanged if it already agrees with forceDirectory, isTemporary,
205 # and forceAbsolute.
206 # We invoke __new__ again with str(self) to add a scheme for
207 # forceAbsolute, but for the others that seems more likely to paper
208 # over logic errors than do something useful, so we just raise.
209 if forceDirectory and not uri.dirLike:
210 raise RuntimeError(
211 f"{uri} is already a file-like ResourcePath; cannot force it to directory."
212 )
213 if isTemporary is not None and isTemporary is not uri.isTemporary:
214 raise RuntimeError(
215 f"{uri} is already a {'temporary' if uri.isTemporary else 'permanent'} "
216 f"ResourcePath; cannot make it {'temporary' if isTemporary else 'permanent'}."
217 )
218 if forceAbsolute and not uri.scheme:
219 return ResourcePath(
220 str(uri),
221 root=root,
222 forceAbsolute=True,
223 forceDirectory=uri.dirLike,
224 isTemporary=uri.isTemporary,
225 )
226 return uri
227 else:
228 raise ValueError(
229 f"Supplied URI must be string, Path, ResourcePath, or ParseResult but got '{uri!r}'"
230 )
232 if subclass is None:
233 # Work out the subclass from the URI scheme
234 if not parsed.scheme:
235 # Root may be specified as a ResourcePath that overrides
236 # the schemeless determination.
237 if (
238 root_uri is not None
239 and root_uri.scheme != "file" # file scheme has different code path
240 and not parsed.path.startswith("/") # Not already absolute path
241 ):
242 if not root_uri.dirLike:
243 raise ValueError(
244 f"Root URI ({root}) was not a directory so can not be joined with"
245 f" path {parsed.path!r}"
246 )
247 # If root is temporary or this schemeless is temporary we
248 # assume this URI is temporary.
249 isTemporary = isTemporary or root_uri.isTemporary
250 joined = root_uri.join(
251 parsed.path, forceDirectory=forceDirectory, isTemporary=isTemporary
252 )
254 # Rather than returning this new ResourcePath directly we
255 # instead extract the path and the scheme and adjust the
256 # URI we were given -- we need to do this to preserve
257 # fragments since join() will drop them.
258 parsed = parsed._replace(scheme=joined.scheme, path=joined.path, netloc=joined.netloc)
259 subclass = type(joined)
261 # Clear the root parameter to indicate that it has
262 # been applied already.
263 root_uri = None
264 else:
265 from .schemeless import SchemelessResourcePath
267 subclass = SchemelessResourcePath
268 elif parsed.scheme == "file":
269 from .file import FileResourcePath
271 subclass = FileResourcePath
272 elif parsed.scheme == "s3":
273 from .s3 import S3ResourcePath
275 subclass = S3ResourcePath
276 elif parsed.scheme.startswith("http"):
277 from .http import HttpResourcePath
279 subclass = HttpResourcePath
280 elif parsed.scheme == "gs":
281 from .gs import GSResourcePath
283 subclass = GSResourcePath
284 elif parsed.scheme == "resource":
285 # Rules for scheme names disallow pkg_resource
286 from .packageresource import PackageResourcePath
288 subclass = PackageResourcePath
289 elif parsed.scheme == "mem":
290 # in-memory datastore object
291 from .mem import InMemoryResourcePath
293 subclass = InMemoryResourcePath
294 else:
295 raise NotImplementedError(
296 f"No URI support for scheme: '{parsed.scheme}' in {parsed.geturl()}"
297 )
299 parsed, dirLike = subclass._fixupPathUri(
300 parsed, root=root_uri, forceAbsolute=forceAbsolute, forceDirectory=forceDirectory
301 )
303 # It is possible for the class to change from schemeless
304 # to file so handle that
305 if parsed.scheme == "file":
306 from .file import FileResourcePath
308 subclass = FileResourcePath
310 # Now create an instance of the correct subclass and set the
311 # attributes directly
312 self = object.__new__(subclass)
313 self._uri = parsed
314 self.dirLike = dirLike
315 if isTemporary is None:
316 isTemporary = False
317 self.isTemporary = isTemporary
318 return self
320 @property
321 def scheme(self) -> str:
322 """Return the URI scheme.
324 Notes
325 -----
326 (``://`` is not part of the scheme).
327 """
328 return self._uri.scheme
330 @property
331 def netloc(self) -> str:
332 """Return the URI network location."""
333 return self._uri.netloc
335 @property
336 def path(self) -> str:
337 """Return the path component of the URI."""
338 return self._uri.path
340 @property
341 def unquoted_path(self) -> str:
342 """Return path component of the URI with any URI quoting reversed."""
343 return urllib.parse.unquote(self._uri.path)
345 @property
346 def ospath(self) -> str:
347 """Return the path component of the URI localized to current OS."""
348 raise AttributeError(f"Non-file URI ({self}) has no local OS path.")
350 @property
351 def relativeToPathRoot(self) -> str:
352 """Return path relative to network location.
354 Effectively, this is the path property with posix separator stripped
355 from the left hand side of the path.
357 Always unquotes.
358 """
359 p = self._pathLib(self.path)
360 relToRoot = str(p.relative_to(p.root))
361 if self.dirLike and not relToRoot.endswith("/"):
362 relToRoot += "/"
363 return urllib.parse.unquote(relToRoot)
365 @property
366 def is_root(self) -> bool:
367 """Return whether this URI points to the root of the network location.
369 This means that the path components refers to the top level.
370 """
371 relpath = self.relativeToPathRoot
372 if relpath == "./":
373 return True
374 return False
376 @property
377 def fragment(self) -> str:
378 """Return the fragment component of the URI."""
379 return self._uri.fragment
381 @property
382 def params(self) -> str:
383 """Return any parameters included in the URI."""
384 return self._uri.params
386 @property
387 def query(self) -> str:
388 """Return any query strings included in the URI."""
389 return self._uri.query
391 def geturl(self) -> str:
392 """Return the URI in string form.
394 Returns
395 -------
396 url : `str`
397 String form of URI.
398 """
399 return self._uri.geturl()
401 def root_uri(self) -> ResourcePath:
402 """Return the base root URI.
404 Returns
405 -------
406 uri : `ResourcePath`
407 root URI.
408 """
409 return self.replace(path="", forceDirectory=True)
411 def split(self) -> tuple[ResourcePath, str]:
412 """Split URI into head and tail.
414 Returns
415 -------
416 head: `ResourcePath`
417 Everything leading up to tail, expanded and normalized as per
418 ResourcePath rules.
419 tail : `str`
420 Last path component. Tail will be empty if path ends on a
421 separator. Tail will never contain separators. It will be
422 unquoted.
424 Notes
425 -----
426 Equivalent to `os.path.split` where head preserves the URI
427 components.
428 """
429 head, tail = self._pathModule.split(self.path)
430 headuri = self._uri._replace(path=head)
432 # The file part should never include quoted metacharacters
433 tail = urllib.parse.unquote(tail)
435 # Schemeless is special in that it can be a relative path
436 # We need to ensure that it stays that way. All other URIs will
437 # be absolute already.
438 forceAbsolute = self._pathModule.isabs(self.path)
439 return ResourcePath(headuri, forceDirectory=True, forceAbsolute=forceAbsolute), tail
441 def basename(self) -> str:
442 """Return the base name, last element of path, of the URI.
444 Returns
445 -------
446 tail : `str`
447 Last part of the path attribute. Trail will be empty if path ends
448 on a separator.
450 Notes
451 -----
452 If URI ends on a slash returns an empty string. This is the second
453 element returned by `split()`.
455 Equivalent of `os.path.basename`.
456 """
457 return self.split()[1]
459 def dirname(self) -> ResourcePath:
460 """Return the directory component of the path as a new `ResourcePath`.
462 Returns
463 -------
464 head : `ResourcePath`
465 Everything except the tail of path attribute, expanded and
466 normalized as per ResourcePath rules.
468 Notes
469 -----
470 Equivalent of `os.path.dirname`.
471 """
472 return self.split()[0]
474 def parent(self) -> ResourcePath:
475 """Return a `ResourcePath` of the parent directory.
477 Returns
478 -------
479 head : `ResourcePath`
480 Everything except the tail of path attribute, expanded and
481 normalized as per `ResourcePath` rules.
483 Notes
484 -----
485 For a file-like URI this will be the same as calling `dirname()`.
486 """
487 # When self is file-like, return self.dirname()
488 if not self.dirLike:
489 return self.dirname()
490 # When self is dir-like, return its parent directory,
491 # regardless of the presence of a trailing separator
492 originalPath = self._pathLib(self.path)
493 parentPath = originalPath.parent
494 return self.replace(path=str(parentPath), forceDirectory=True)
496 def replace(self, forceDirectory: bool = False, isTemporary: bool = False, **kwargs: Any) -> ResourcePath:
497 """Return new `ResourcePath` with specified components replaced.
499 Parameters
500 ----------
501 forceDirectory : `bool`, optional
502 Parameter passed to ResourcePath constructor to force this
503 new URI to be dir-like.
504 isTemporary : `bool`, optional
505 Indicate that the resulting URI is temporary resource.
506 **kwargs
507 Components of a `urllib.parse.ParseResult` that should be
508 modified for the newly-created `ResourcePath`.
510 Returns
511 -------
512 new : `ResourcePath`
513 New `ResourcePath` object with updated values.
515 Notes
516 -----
517 Does not, for now, allow a change in URI scheme.
518 """
519 # Disallow a change in scheme
520 if "scheme" in kwargs:
521 raise ValueError(f"Can not use replace() method to change URI scheme for {self}")
522 return self.__class__(
523 self._uri._replace(**kwargs), forceDirectory=forceDirectory, isTemporary=isTemporary
524 )
526 def updatedFile(self, newfile: str) -> ResourcePath:
527 """Return new URI with an updated final component of the path.
529 Parameters
530 ----------
531 newfile : `str`
532 File name with no path component.
534 Returns
535 -------
536 updated : `ResourcePath`
538 Notes
539 -----
540 Forces the ResourcePath.dirLike attribute to be false. The new file
541 path will be quoted if necessary.
542 """
543 if self.quotePaths:
544 newfile = urllib.parse.quote(newfile)
545 dir, _ = self._pathModule.split(self.path)
546 newpath = self._pathModule.join(dir, newfile)
548 updated = self.replace(path=newpath)
549 updated.dirLike = False
550 return updated
552 def updatedExtension(self, ext: str | None) -> ResourcePath:
553 """Return a new `ResourcePath` with updated file extension.
555 All file extensions are replaced.
557 Parameters
558 ----------
559 ext : `str` or `None`
560 New extension. If an empty string is given any extension will
561 be removed. If `None` is given there will be no change.
563 Returns
564 -------
565 updated : `ResourcePath`
566 URI with the specified extension. Can return itself if
567 no extension was specified.
568 """
569 if ext is None:
570 return self
572 # Get the extension
573 current = self.getExtension()
575 # Nothing to do if the extension already matches
576 if current == ext:
577 return self
579 # Remove the current extension from the path
580 # .fits.gz counts as one extension do not use os.path.splitext
581 path = self.path
582 if current:
583 path = path[: -len(current)]
585 # Ensure that we have a leading "." on file extension (and we do not
586 # try to modify the empty string)
587 if ext and not ext.startswith("."):
588 ext = "." + ext
590 return self.replace(path=path + ext)
592 def getExtension(self) -> str:
593 """Return the file extension(s) associated with this URI path.
595 Returns
596 -------
597 ext : `str`
598 The file extension (including the ``.``). Can be empty string
599 if there is no file extension. Usually returns only the last
600 file extension unless there is a special extension modifier
601 indicating file compression, in which case the combined
602 extension (e.g. ``.fits.gz``) will be returned.
603 """
604 special = {".gz", ".bz2", ".xz", ".fz"}
606 # Get the file part of the path so as not to be confused by
607 # "." in directory names.
608 basename = self.basename()
609 extensions = self._pathLib(basename).suffixes
611 if not extensions:
612 return ""
614 ext = extensions.pop()
616 # Multiple extensions, decide whether to include the final two
617 if extensions and ext in special:
618 ext = f"{extensions[-1]}{ext}"
620 return ext
622 def join(
623 self, path: str | ResourcePath, isTemporary: bool | None = None, forceDirectory: bool = False
624 ) -> ResourcePath:
625 """Return new `ResourcePath` with additional path components.
627 Parameters
628 ----------
629 path : `str`, `ResourcePath`
630 Additional file components to append to the current URI. Assumed
631 to include a file at the end. Will be quoted depending on the
632 associated URI scheme. If the path looks like a URI with a scheme
633 referring to an absolute location, it will be returned
634 directly (matching the behavior of `os.path.join`). It can
635 also be a `ResourcePath`.
636 isTemporary : `bool`, optional
637 Indicate that the resulting URI represents a temporary resource.
638 Default is ``self.isTemporary``.
639 forceDirectory : `bool`, optional
640 If `True` forces the URI to end with a separator, otherwise given
641 URI is interpreted as is.
643 Returns
644 -------
645 new : `ResourcePath`
646 New URI with any file at the end replaced with the new path
647 components.
649 Notes
650 -----
651 Schemeless URIs assume local path separator but all other URIs assume
652 POSIX separator if the supplied path has directory structure. It
653 may be this never becomes a problem but datastore templates assume
654 POSIX separator is being used.
656 If an absolute `ResourcePath` is given for ``path`` is is assumed that
657 this should be returned directly. Giving a ``path`` of an absolute
658 scheme-less URI is not allowed for safety reasons as it may indicate
659 a mistake in the calling code.
661 Raises
662 ------
663 ValueError
664 Raised if the ``path`` is an absolute scheme-less URI. In that
665 situation it is unclear whether the intent is to return a
666 ``file`` URI or it was a mistake and a relative scheme-less URI
667 was meant.
668 RuntimeError
669 Raised if this attempts to join a temporary URI to a non-temporary
670 URI.
671 """
672 if isTemporary is None:
673 isTemporary = self.isTemporary
674 elif not isTemporary and self.isTemporary:
675 raise RuntimeError("Cannot join temporary URI to non-temporary URI.")
676 # If we have a full URI in path we will use it directly
677 # but without forcing to absolute so that we can trap the
678 # expected option of relative path.
679 path_uri = ResourcePath(
680 path, forceAbsolute=False, forceDirectory=forceDirectory, isTemporary=isTemporary
681 )
682 if path_uri.scheme:
683 # Check for scheme so can distinguish explicit URIs from
684 # absolute scheme-less URIs.
685 return path_uri
687 if path_uri.isabs():
688 # Absolute scheme-less path.
689 raise ValueError(f"Can not join absolute scheme-less {path_uri!r} to another URI.")
691 # If this was originally a ResourcePath extract the unquoted path from
692 # it. Otherwise we use the string we were given to allow "#" to appear
693 # in the filename if given as a plain string.
694 if not isinstance(path, str):
695 path = path_uri.unquoted_path
697 new = self.dirname() # By definition a directory URI
699 # new should be asked about quoting, not self, since dirname can
700 # change the URI scheme for schemeless -> file
701 if new.quotePaths:
702 path = urllib.parse.quote(path)
704 newpath = self._pathModule.normpath(self._pathModule.join(new.path, path))
706 # normpath can strip trailing / so we force directory if the supplied
707 # path ended with a /
708 return new.replace(
709 path=newpath,
710 forceDirectory=(forceDirectory or path.endswith(self._pathModule.sep)),
711 isTemporary=isTemporary,
712 )
714 def relative_to(self, other: ResourcePath) -> str | None:
715 """Return the relative path from this URI to the other URI.
717 Parameters
718 ----------
719 other : `ResourcePath`
720 URI to use to calculate the relative path. Must be a parent
721 of this URI.
723 Returns
724 -------
725 subpath : `str`
726 The sub path of this URI relative to the supplied other URI.
727 Returns `None` if there is no parent child relationship.
728 Scheme and netloc must match.
729 """
730 # Scheme-less absolute other is treated as if it's a file scheme.
731 # Scheme-less relative other can only return non-None if self
732 # is also scheme-less relative and that is handled specifically
733 # in a subclass.
734 if not other.scheme and other.isabs():
735 other = other.abspath()
737 # Scheme-less self is handled elsewhere.
738 if self.scheme != other.scheme:
739 return None
740 if self.netloc != other.netloc:
741 # Special case for localhost vs empty string.
742 # There can be many variants of localhost.
743 local_netlocs = {"", "localhost", "localhost.localdomain", "127.0.0.1"}
744 if not {self.netloc, other.netloc}.issubset(local_netlocs):
745 return None
747 enclosed_path = self._pathLib(self.relativeToPathRoot)
748 parent_path = other.relativeToPathRoot
749 subpath: str | None
750 try:
751 subpath = str(enclosed_path.relative_to(parent_path))
752 except ValueError:
753 subpath = None
754 else:
755 subpath = urllib.parse.unquote(subpath)
756 return subpath
758 def exists(self) -> bool:
759 """Indicate that the resource is available.
761 Returns
762 -------
763 exists : `bool`
764 `True` if the resource exists.
765 """
766 raise NotImplementedError()
768 @classmethod
769 def mexists(cls, uris: Iterable[ResourcePath]) -> dict[ResourcePath, bool]:
770 """Check for existence of multiple URIs at once.
772 Parameters
773 ----------
774 uris : iterable of `ResourcePath`
775 The URIs to test.
777 Returns
778 -------
779 existence : `dict` of [`ResourcePath`, `bool`]
780 Mapping of original URI to boolean indicating existence.
781 """
782 # Group by scheme to allow a subclass to be able to use
783 # specialized implementations.
784 grouped: dict[type, list[ResourcePath]] = {}
785 for uri in uris:
786 uri_class = uri.__class__
787 if uri_class not in grouped:
788 grouped[uri_class] = []
789 grouped[uri_class].append(uri)
791 existence: dict[ResourcePath, bool] = {}
792 for uri_class in grouped:
793 existence.update(uri_class._mexists(grouped[uri_class]))
795 return existence
797 @classmethod
798 def _mexists(cls, uris: Iterable[ResourcePath]) -> dict[ResourcePath, bool]:
799 """Check for existence of multiple URIs at once.
801 Implementation helper method for `mexists`.
803 Parameters
804 ----------
805 uris : iterable of `ResourcePath`
806 The URIs to test.
808 Returns
809 -------
810 existence : `dict` of [`ResourcePath`, `bool`]
811 Mapping of original URI to boolean indicating existence.
812 """
813 exists_executor = concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS)
814 future_exists = {exists_executor.submit(uri.exists): uri for uri in uris}
816 results: dict[ResourcePath, bool] = {}
817 for future in concurrent.futures.as_completed(future_exists):
818 uri = future_exists[future]
819 try:
820 exists = future.result()
821 except Exception:
822 exists = False
823 results[uri] = exists
824 return results
826 def remove(self) -> None:
827 """Remove the resource."""
828 raise NotImplementedError()
830 def isabs(self) -> bool:
831 """Indicate that the resource is fully specified.
833 For non-schemeless URIs this is always true.
835 Returns
836 -------
837 isabs : `bool`
838 `True` in all cases except schemeless URI.
839 """
840 return True
842 def abspath(self) -> ResourcePath:
843 """Return URI using an absolute path.
845 Returns
846 -------
847 abs : `ResourcePath`
848 Absolute URI. For non-schemeless URIs this always returns itself.
849 Schemeless URIs are upgraded to file URIs.
850 """
851 return self
853 def _as_local(self) -> tuple[str, bool]:
854 """Return the location of the (possibly remote) resource as local file.
856 This is a helper function for `as_local` context manager.
858 Returns
859 -------
860 path : `str`
861 If this is a remote resource, it will be a copy of the resource
862 on the local file system, probably in a temporary directory.
863 For a local resource this should be the actual path to the
864 resource.
865 is_temporary : `bool`
866 Indicates if the local path is a temporary file or not.
867 """
868 raise NotImplementedError()
870 @contextlib.contextmanager
871 def as_local(self) -> Iterator[ResourcePath]:
872 """Return the location of the (possibly remote) resource as local file.
874 Yields
875 ------
876 local : `ResourcePath`
877 If this is a remote resource, it will be a copy of the resource
878 on the local file system, probably in a temporary directory.
879 For a local resource this should be the actual path to the
880 resource.
882 Notes
883 -----
884 The context manager will automatically delete any local temporary
885 file.
887 Examples
888 --------
889 Should be used as a context manager:
891 .. code-block:: py
893 with uri.as_local() as local:
894 ospath = local.ospath
895 """
896 if self.dirLike:
897 raise IsADirectoryError(f"Directory-like URI {self} cannot be fetched as local.")
898 local_src, is_temporary = self._as_local()
899 local_uri = ResourcePath(local_src, isTemporary=is_temporary)
901 try:
902 yield local_uri
903 finally:
904 # The caller might have relocated the temporary file.
905 # Do not ever delete if the temporary matches self
906 # (since it may have been that a temporary file was made local
907 # but already was local).
908 if self != local_uri and is_temporary and local_uri.exists():
909 local_uri.remove()
911 @classmethod
912 @contextlib.contextmanager
913 def temporary_uri(
914 cls, prefix: ResourcePath | None = None, suffix: str | None = None
915 ) -> Iterator[ResourcePath]:
916 """Create a temporary file-like URI.
918 Parameters
919 ----------
920 prefix : `ResourcePath`, optional
921 Prefix to use. Without this the path will be formed as a local
922 file URI in a temporary directory. Ensuring that the prefix
923 location exists is the responsibility of the caller.
924 suffix : `str`, optional
925 A file suffix to be used. The ``.`` should be included in this
926 suffix.
928 Yields
929 ------
930 uri : `ResourcePath`
931 The temporary URI. Will be removed when the context is completed.
932 """
933 use_tempdir = False
934 if prefix is None:
935 directory = tempfile.mkdtemp()
936 # If the user has set a umask that restricts the owner-write bit,
937 # the directory returned from mkdtemp may not initially be
938 # writeable by us
939 ensure_directory_is_writeable(directory)
941 prefix = ResourcePath(directory, forceDirectory=True, isTemporary=True)
942 # Record that we need to delete this directory. Can not rely
943 # on isTemporary flag since an external prefix may have that
944 # set as well.
945 use_tempdir = True
947 # Need to create a randomized file name. For consistency do not
948 # use mkstemp for local and something else for remote. Additionally
949 # this method does not create the file to prevent name clashes.
950 characters = "abcdefghijklmnopqrstuvwxyz0123456789_"
951 rng = Random()
952 tempname = "".join(rng.choice(characters) for _ in range(16))
953 if suffix:
954 tempname += suffix
955 temporary_uri = prefix.join(tempname, isTemporary=True)
956 if temporary_uri.dirLike:
957 # If we had a safe way to clean up a remote temporary directory, we
958 # could support this.
959 raise NotImplementedError("temporary_uri cannot be used to create a temporary directory.")
960 try:
961 yield temporary_uri
962 finally:
963 if use_tempdir:
964 shutil.rmtree(prefix.ospath, ignore_errors=True)
965 else:
966 with contextlib.suppress(FileNotFoundError):
967 # It's okay if this does not work because the user removed
968 # the file.
969 temporary_uri.remove()
971 def read(self, size: int = -1) -> bytes:
972 """Open the resource and return the contents in bytes.
974 Parameters
975 ----------
976 size : `int`, optional
977 The number of bytes to read. Negative or omitted indicates
978 that all data should be read.
979 """
980 raise NotImplementedError()
982 def write(self, data: bytes, overwrite: bool = True) -> None:
983 """Write the supplied bytes to the new resource.
985 Parameters
986 ----------
987 data : `bytes`
988 The bytes to write to the resource. The entire contents of the
989 resource will be replaced.
990 overwrite : `bool`, optional
991 If `True` the resource will be overwritten if it exists. Otherwise
992 the write will fail.
993 """
994 raise NotImplementedError()
996 def mkdir(self) -> None:
997 """For a dir-like URI, create the directory resource if needed."""
998 raise NotImplementedError()
1000 def isdir(self) -> bool:
1001 """Return True if this URI looks like a directory, else False."""
1002 return self.dirLike
1004 def size(self) -> int:
1005 """For non-dir-like URI, return the size of the resource.
1007 Returns
1008 -------
1009 sz : `int`
1010 The size in bytes of the resource associated with this URI.
1011 Returns 0 if dir-like.
1012 """
1013 raise NotImplementedError()
1015 def __str__(self) -> str:
1016 """Convert the URI to its native string form."""
1017 return self.geturl()
1019 def __repr__(self) -> str:
1020 """Return string representation suitable for evaluation."""
1021 return f'ResourcePath("{self.geturl()}")'
1023 def __eq__(self, other: Any) -> bool:
1024 """Compare supplied object with this `ResourcePath`."""
1025 if not isinstance(other, ResourcePath):
1026 return NotImplemented
1027 return self.geturl() == other.geturl()
1029 def __hash__(self) -> int:
1030 """Return hash of this object."""
1031 return hash(str(self))
1033 def __lt__(self, other: ResourcePath) -> bool:
1034 return self.geturl() < other.geturl()
1036 def __le__(self, other: ResourcePath) -> bool:
1037 return self.geturl() <= other.geturl()
1039 def __gt__(self, other: ResourcePath) -> bool:
1040 return self.geturl() > other.geturl()
1042 def __ge__(self, other: ResourcePath) -> bool:
1043 return self.geturl() >= other.geturl()
1045 def __copy__(self) -> ResourcePath:
1046 """Copy constructor.
1048 Object is immutable so copy can return itself.
1049 """
1050 # Implement here because the __new__ method confuses things
1051 return self
1053 def __deepcopy__(self, memo: Any) -> ResourcePath:
1054 """Deepcopy the object.
1056 Object is immutable so copy can return itself.
1057 """
1058 # Implement here because the __new__ method confuses things
1059 return self
1061 def __getnewargs__(self) -> tuple:
1062 """Support pickling."""
1063 return (str(self),)
1065 @classmethod
1066 def _fixDirectorySep(
1067 cls, parsed: urllib.parse.ParseResult, forceDirectory: bool = False
1068 ) -> tuple[urllib.parse.ParseResult, bool]:
1069 """Ensure that a path separator is present on directory paths.
1071 Parameters
1072 ----------
1073 parsed : `~urllib.parse.ParseResult`
1074 The result from parsing a URI using `urllib.parse`.
1075 forceDirectory : `bool`, optional
1076 If `True` forces the URI to end with a separator, otherwise given
1077 URI is interpreted as is. Specifying that the URI is conceptually
1078 equivalent to a directory can break some ambiguities when
1079 interpreting the last element of a path.
1081 Returns
1082 -------
1083 modified : `~urllib.parse.ParseResult`
1084 Update result if a URI is being handled.
1085 dirLike : `bool`
1086 `True` if given parsed URI has a trailing separator or
1087 forceDirectory is True. Otherwise `False`.
1088 """
1089 # assume we are not dealing with a directory like URI
1090 dirLike = False
1092 # Directory separator
1093 sep = cls._pathModule.sep
1095 # URI is dir-like if explicitly stated or if it ends on a separator
1096 endsOnSep = parsed.path.endswith(sep)
1097 if forceDirectory or endsOnSep:
1098 dirLike = True
1099 # only add the separator if it's not already there
1100 if not endsOnSep:
1101 parsed = parsed._replace(path=parsed.path + sep)
1103 return parsed, dirLike
1105 @classmethod
1106 def _fixupPathUri(
1107 cls,
1108 parsed: urllib.parse.ParseResult,
1109 root: ResourcePath | None = None,
1110 forceAbsolute: bool = False,
1111 forceDirectory: bool = False,
1112 ) -> tuple[urllib.parse.ParseResult, bool]:
1113 """Correct any issues with the supplied URI.
1115 Parameters
1116 ----------
1117 parsed : `~urllib.parse.ParseResult`
1118 The result from parsing a URI using `urllib.parse`.
1119 root : `ResourcePath`, ignored
1120 Not used by the this implementation since all URIs are
1121 absolute except for those representing the local file system.
1122 forceAbsolute : `bool`, ignored.
1123 Not used by this implementation. URIs are generally always
1124 absolute.
1125 forceDirectory : `bool`, optional
1126 If `True` forces the URI to end with a separator, otherwise given
1127 URI is interpreted as is. Specifying that the URI is conceptually
1128 equivalent to a directory can break some ambiguities when
1129 interpreting the last element of a path.
1131 Returns
1132 -------
1133 modified : `~urllib.parse.ParseResult`
1134 Update result if a URI is being handled.
1135 dirLike : `bool`
1136 `True` if given parsed URI has a trailing separator or
1137 forceDirectory is True. Otherwise `False`.
1139 Notes
1140 -----
1141 Relative paths are explicitly not supported by RFC8089 but `urllib`
1142 does accept URIs of the form ``file:relative/path.ext``. They need
1143 to be turned into absolute paths before they can be used. This is
1144 always done regardless of the ``forceAbsolute`` parameter.
1146 AWS S3 differentiates between keys with trailing POSIX separators (i.e
1147 `/dir` and `/dir/`) whereas POSIX does not neccessarily.
1149 Scheme-less paths are normalized.
1150 """
1151 return cls._fixDirectorySep(parsed, forceDirectory)
1153 def transfer_from(
1154 self,
1155 src: ResourcePath,
1156 transfer: str,
1157 overwrite: bool = False,
1158 transaction: TransactionProtocol | None = None,
1159 ) -> None:
1160 """Transfer to this URI from another.
1162 Parameters
1163 ----------
1164 src : `ResourcePath`
1165 Source URI.
1166 transfer : `str`
1167 Mode to use for transferring the resource. Generically there are
1168 many standard options: copy, link, symlink, hardlink, relsymlink.
1169 Not all URIs support all modes.
1170 overwrite : `bool`, optional
1171 Allow an existing file to be overwritten. Defaults to `False`.
1172 transaction : `~lsst.resources.utils.TransactionProtocol`, optional
1173 A transaction object that can (depending on implementation)
1174 rollback transfers on error. Not guaranteed to be implemented.
1176 Notes
1177 -----
1178 Conceptually this is hard to scale as the number of URI schemes
1179 grow. The destination URI is more important than the source URI
1180 since that is where all the transfer modes are relevant (with the
1181 complication that "move" deletes the source).
1183 Local file to local file is the fundamental use case but every
1184 other scheme has to support "copy" to local file (with implicit
1185 support for "move") and copy from local file.
1186 All the "link" options tend to be specific to local file systems.
1188 "move" is a "copy" where the remote resource is deleted at the end.
1189 Whether this works depends on the source URI rather than the
1190 destination URI. Reverting a move on transaction rollback is
1191 expected to be problematic if a remote resource was involved.
1192 """
1193 raise NotImplementedError(f"No transfer modes supported by URI scheme {self.scheme}")
1195 def walk(
1196 self, file_filter: str | re.Pattern | None = None
1197 ) -> Iterator[list | tuple[ResourcePath, list[str], list[str]]]:
1198 """Walk the directory tree returning matching files and directories.
1200 Parameters
1201 ----------
1202 file_filter : `str` or `re.Pattern`, optional
1203 Regex to filter out files from the list before it is returned.
1205 Yields
1206 ------
1207 dirpath : `ResourcePath`
1208 Current directory being examined.
1209 dirnames : `list` of `str`
1210 Names of subdirectories within dirpath.
1211 filenames : `list` of `str`
1212 Names of all the files within dirpath.
1213 """
1214 raise NotImplementedError()
1216 @overload
1217 @classmethod
1218 def findFileResources(
1219 cls,
1220 candidates: Iterable[ResourcePathExpression],
1221 file_filter: str | re.Pattern | None,
1222 grouped: Literal[True],
1223 ) -> Iterator[Iterator[ResourcePath]]:
1224 ...
1226 @overload
1227 @classmethod
1228 def findFileResources(
1229 cls,
1230 candidates: Iterable[ResourcePathExpression],
1231 *,
1232 grouped: Literal[True],
1233 ) -> Iterator[Iterator[ResourcePath]]:
1234 ...
1236 @overload
1237 @classmethod
1238 def findFileResources(
1239 cls,
1240 candidates: Iterable[ResourcePathExpression],
1241 file_filter: str | re.Pattern | None = None,
1242 grouped: Literal[False] = False,
1243 ) -> Iterator[ResourcePath]:
1244 ...
1246 @classmethod
1247 def findFileResources(
1248 cls,
1249 candidates: Iterable[ResourcePathExpression],
1250 file_filter: str | re.Pattern | None = None,
1251 grouped: bool = False,
1252 ) -> Iterator[ResourcePath | Iterator[ResourcePath]]:
1253 """Get all the files from a list of values.
1255 Parameters
1256 ----------
1257 candidates : iterable [`str` or `ResourcePath`]
1258 The files to return and directories in which to look for files to
1259 return.
1260 file_filter : `str` or `re.Pattern`, optional
1261 The regex to use when searching for files within directories.
1262 By default returns all the found files.
1263 grouped : `bool`, optional
1264 If `True` the results will be grouped by directory and each
1265 yielded value will be an iterator over URIs. If `False` each
1266 URI will be returned separately.
1268 Yields
1269 ------
1270 found_file: `ResourcePath`
1271 The passed-in URIs and URIs found in passed-in directories.
1272 If grouping is enabled, each of the yielded values will be an
1273 iterator yielding members of the group. Files given explicitly
1274 will be returned as a single group at the end.
1276 Notes
1277 -----
1278 If a value is a file it is yielded immediately without checking that it
1279 exists. If a value is a directory, all the files in the directory
1280 (recursively) that match the regex will be yielded in turn.
1281 """
1282 fileRegex = None if file_filter is None else re.compile(file_filter)
1284 singles = []
1286 # Find all the files of interest
1287 for location in candidates:
1288 uri = ResourcePath(location)
1289 if uri.isdir():
1290 for found in uri.walk(fileRegex):
1291 if not found:
1292 # This means the uri does not exist and by
1293 # convention we ignore it
1294 continue
1295 root, dirs, files = found
1296 if not files:
1297 continue
1298 if grouped:
1299 yield (root.join(name) for name in files)
1300 else:
1301 for name in files:
1302 yield root.join(name)
1303 else:
1304 if grouped:
1305 singles.append(uri)
1306 else:
1307 yield uri
1309 # Finally, return any explicitly given files in one group
1310 if grouped and singles:
1311 yield iter(singles)
1313 @contextlib.contextmanager
1314 def open(
1315 self,
1316 mode: str = "r",
1317 *,
1318 encoding: str | None = None,
1319 prefer_file_temporary: bool = False,
1320 ) -> Iterator[ResourceHandleProtocol]:
1321 """Return a context manager that wraps an object that behaves like an
1322 open file at the location of the URI.
1324 Parameters
1325 ----------
1326 mode : `str`
1327 String indicating the mode in which to open the file. Values are
1328 the same as those accepted by `open`, though intrinsically
1329 read-only URI types may only support read modes, and
1330 `io.IOBase.seekable` is not guaranteed to be `True` on the returned
1331 object.
1332 encoding : `str`, optional
1333 Unicode encoding for text IO; ignored for binary IO. Defaults to
1334 ``locale.getpreferredencoding(False)``, just as `open`
1335 does.
1336 prefer_file_temporary : `bool`, optional
1337 If `True`, for implementations that require transfers from a remote
1338 system to temporary local storage and/or back, use a temporary file
1339 instead of an in-memory buffer; this is generally slower, but it
1340 may be necessary to avoid excessive memory usage by large files.
1341 Ignored by implementations that do not require a temporary.
1343 Yields
1344 ------
1345 cm : `~contextlib.AbstractContextManager`
1346 A context manager that wraps a `ResourceHandleProtocol` file-like
1347 object.
1349 Notes
1350 -----
1351 The default implementation of this method uses a local temporary buffer
1352 (in-memory or file, depending on ``prefer_file_temporary``) with calls
1353 to `read`, `write`, `as_local`, and `transfer_from` as necessary to
1354 read and write from/to remote systems. Remote writes thus occur only
1355 when the context manager is exited. `ResourcePath` implementations
1356 that can return a more efficient native buffer should do so whenever
1357 possible (as is guaranteed for local files). `ResourcePath`
1358 implementations for which `as_local` does not return a temporary are
1359 required to reimplement `open`, though they may delegate to `super`
1360 when ``prefer_file_temporary`` is `False`.
1361 """
1362 if self.dirLike:
1363 raise IsADirectoryError(f"Directory-like URI {self} cannot be opened.")
1364 if "x" in mode and self.exists():
1365 raise FileExistsError(f"File at {self} already exists.")
1366 if prefer_file_temporary:
1367 if "r" in mode or "a" in mode:
1368 local_cm = self.as_local()
1369 else:
1370 local_cm = self.temporary_uri(suffix=self.getExtension())
1371 with local_cm as local_uri:
1372 assert local_uri.isTemporary, (
1373 "ResourcePath implementations for which as_local is not "
1374 "a temporary must reimplement `open`."
1375 )
1376 with open(local_uri.ospath, mode=mode, encoding=encoding) as file_buffer:
1377 if "a" in mode:
1378 file_buffer.seek(0, io.SEEK_END)
1379 yield file_buffer
1380 if "r" not in mode or "+" in mode:
1381 self.transfer_from(local_uri, transfer="copy", overwrite=("x" not in mode))
1382 else:
1383 with self._openImpl(mode, encoding=encoding) as handle:
1384 yield handle
1386 @contextlib.contextmanager
1387 def _openImpl(self, mode: str = "r", *, encoding: str | None = None) -> Iterator[ResourceHandleProtocol]:
1388 """Implement opening of a resource handle.
1390 This private method may be overridden by specific `ResourcePath`
1391 implementations to provide a customized handle like interface.
1393 Parameters
1394 ----------
1395 mode : `str`
1396 The mode the handle should be opened with
1397 encoding : `str`, optional
1398 The byte encoding of any binary text
1400 Yields
1401 ------
1402 handle : `~._resourceHandles.BaseResourceHandle`
1403 A handle that conforms to the
1404 `~._resourceHandles.BaseResourceHandle` interface
1406 Notes
1407 -----
1408 The base implementation of a file handle reads in a files entire
1409 contents into a buffer for manipulation, and then writes it back out
1410 upon close. Subclasses of this class may offer more fine grained
1411 control.
1412 """
1413 in_bytes = self.read() if "r" in mode or "a" in mode else b""
1414 if "b" in mode:
1415 bytes_buffer = io.BytesIO(in_bytes)
1416 if "a" in mode:
1417 bytes_buffer.seek(0, io.SEEK_END)
1418 yield bytes_buffer
1419 out_bytes = bytes_buffer.getvalue()
1420 else:
1421 if encoding is None:
1422 encoding = locale.getpreferredencoding(False)
1423 str_buffer = io.StringIO(in_bytes.decode(encoding))
1424 if "a" in mode:
1425 str_buffer.seek(0, io.SEEK_END)
1426 yield str_buffer
1427 out_bytes = str_buffer.getvalue().encode(encoding)
1428 if "r" not in mode or "+" in mode:
1429 self.write(out_bytes, overwrite=("x" not in mode))
1432ResourcePathExpression = str | urllib.parse.ParseResult | ResourcePath | Path
1433"""Type-annotation alias for objects that can be coerced to ResourcePath.
1434"""