Coverage for python/lsst/resources/_resourcePath.py: 28%
430 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-23 10:46 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-23 10:46 +0000
1# This file is part of lsst-resources.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# Use of this source code is governed by a 3-clause BSD-style
10# license that can be found in the LICENSE file.
12from __future__ import annotations
14import concurrent.futures
15import contextlib
16import copy
17import io
18import locale
19import logging
20import os
21import posixpath
22import re
23import shutil
24import tempfile
25import urllib.parse
26from pathlib import Path, PurePath, PurePosixPath
27from random import Random
29__all__ = ("ResourcePath", "ResourcePathExpression")
31from collections.abc import Iterable, Iterator
32from typing import TYPE_CHECKING, Any, Literal, overload
34from ._resourceHandles._baseResourceHandle import ResourceHandleProtocol
35from .utils import ensure_directory_is_writeable
37if TYPE_CHECKING:
38 from .utils import TransactionProtocol
41log = logging.getLogger(__name__)
43# Regex for looking for URI escapes
44ESCAPES_RE = re.compile(r"%[A-F0-9]{2}")
46# Precomputed escaped hash
47ESCAPED_HASH = urllib.parse.quote("#")
49# Maximum number of worker threads for parallelized operations.
50# If greater than 10, be aware that this number has to be consistent
51# with connection pool sizing (for example in urllib3).
52MAX_WORKERS = 10
55class ResourcePath: # numpydoc ignore=PR02
56 """Convenience wrapper around URI parsers.
58 Provides access to URI components and can convert file
59 paths into absolute path URIs. Scheme-less URIs are treated as if
60 they are local file system paths and are converted to absolute URIs.
62 A specialist subclass is created for each supported URI scheme.
64 Parameters
65 ----------
66 uri : `str`, `pathlib.Path`, `urllib.parse.ParseResult`, or `ResourcePath`
67 URI in string form. Can be scheme-less if referring to a relative
68 path or an absolute path on the local file system.
69 root : `str` or `ResourcePath`, optional
70 When fixing up a relative path in a ``file`` scheme or if scheme-less,
71 use this as the root. Must be absolute. If `None` the current
72 working directory will be used. Can be any supported URI scheme.
73 Not used if ``forceAbsolute`` is `False`.
74 forceAbsolute : `bool`, optional
75 If `True`, scheme-less relative URI will be converted to an absolute
76 path using a ``file`` scheme. If `False` scheme-less URI will remain
77 scheme-less and will not be updated to ``file`` or absolute path unless
78 it is already an absolute path, in which case it will be updated to
79 a ``file`` scheme.
80 forceDirectory : `bool`, optional
81 If `True` forces the URI to end with a separator, otherwise given URI
82 is interpreted as is.
83 isTemporary : `bool`, optional
84 If `True` indicates that this URI points to a temporary resource.
85 The default is `False`, unless ``uri`` is already a `ResourcePath`
86 instance and ``uri.isTemporary is True``.
88 Notes
89 -----
90 A non-standard URI of the form ``file:dir/file.txt`` is always converted
91 to an absolute ``file`` URI.
92 """
94 _pathLib: type[PurePath] = PurePosixPath
95 """Path library to use for this scheme."""
97 _pathModule = posixpath
98 """Path module to use for this scheme."""
100 transferModes: tuple[str, ...] = ("copy", "auto", "move")
101 """Transfer modes supported by this implementation.
103 Move is special in that it is generally a copy followed by an unlink.
104 Whether that unlink works depends critically on whether the source URI
105 implements unlink. If it does not the move will be reported as a failure.
106 """
108 transferDefault: str = "copy"
109 """Default mode to use for transferring if ``auto`` is specified."""
111 quotePaths = True
112 """True if path-like elements modifying a URI should be quoted.
114 All non-schemeless URIs have to internally use quoted paths. Therefore
115 if a new file name is given (e.g. to updatedFile or join) a decision must
116 be made whether to quote it to be consistent.
117 """
119 isLocal = False
120 """If `True` this URI refers to a local file."""
122 # This is not an ABC with abstract methods because the __new__ being
123 # a factory confuses mypy such that it assumes that every constructor
124 # returns a ResourcePath and then determines that all the abstract methods
125 # are still abstract. If they are not marked abstract but just raise
126 # mypy is fine with it.
128 # mypy is confused without these
129 _uri: urllib.parse.ParseResult
130 isTemporary: bool
131 dirLike: bool
133 def __new__(
134 cls,
135 uri: ResourcePathExpression,
136 root: str | ResourcePath | None = None,
137 forceAbsolute: bool = True,
138 forceDirectory: bool = False,
139 isTemporary: bool | None = None,
140 ) -> ResourcePath:
141 """Create and return new specialist ResourcePath subclass."""
142 parsed: urllib.parse.ParseResult
143 dirLike: bool = False
144 subclass: type[ResourcePath] | None = None
146 # Force root to be a ResourcePath -- this simplifies downstream
147 # code.
148 if root is None:
149 root_uri = None
150 elif isinstance(root, str):
151 root_uri = ResourcePath(root, forceDirectory=True, forceAbsolute=True)
152 else:
153 root_uri = root
155 if isinstance(uri, os.PathLike):
156 uri = str(uri)
158 # Record if we need to post process the URI components
159 # or if the instance is already fully configured
160 if isinstance(uri, str):
161 # Since local file names can have special characters in them
162 # we need to quote them for the parser but we can unquote
163 # later. Assume that all other URI schemes are quoted.
164 # Since sometimes people write file:/a/b and not file:///a/b
165 # we should not quote in the explicit case of file:
166 if "://" not in uri and not uri.startswith("file:"):
167 if ESCAPES_RE.search(uri):
168 log.warning("Possible double encoding of %s", uri)
169 else:
170 # Fragments are generally not encoded so we must search
171 # for the fragment boundary ourselves. This is making
172 # an assumption that the filename does not include a "#"
173 # and also that there is no "/" in the fragment itself.
174 to_encode = uri
175 fragment = ""
176 if "#" in uri:
177 dirpos = uri.rfind("/")
178 trailing = uri[dirpos + 1 :]
179 hashpos = trailing.rfind("#")
180 if hashpos != -1:
181 fragment = trailing[hashpos:]
182 to_encode = uri[: dirpos + hashpos + 1]
184 uri = urllib.parse.quote(to_encode) + fragment
186 parsed = urllib.parse.urlparse(uri)
187 elif isinstance(uri, urllib.parse.ParseResult):
188 parsed = copy.copy(uri)
189 # If we are being instantiated with a subclass, rather than
190 # ResourcePath, ensure that that subclass is used directly.
191 # This could lead to inconsistencies if this constructor
192 # is used externally outside of the ResourcePath.replace() method.
193 # S3ResourcePath(urllib.parse.urlparse("file://a/b.txt"))
194 # will be a problem.
195 # This is needed to prevent a schemeless absolute URI become
196 # a file URI unexpectedly when calling updatedFile or
197 # updatedExtension
198 if cls is not ResourcePath:
199 parsed, dirLike = cls._fixDirectorySep(parsed, forceDirectory)
200 subclass = cls
202 elif isinstance(uri, ResourcePath):
203 # Since ResourcePath is immutable we can return the argument
204 # unchanged if it already agrees with forceDirectory, isTemporary,
205 # and forceAbsolute.
206 # We invoke __new__ again with str(self) to add a scheme for
207 # forceAbsolute, but for the others that seems more likely to paper
208 # over logic errors than do something useful, so we just raise.
209 if forceDirectory and not uri.dirLike:
210 raise RuntimeError(
211 f"{uri} is already a file-like ResourcePath; cannot force it to directory."
212 )
213 if isTemporary is not None and isTemporary is not uri.isTemporary:
214 raise RuntimeError(
215 f"{uri} is already a {'temporary' if uri.isTemporary else 'permanent'} "
216 f"ResourcePath; cannot make it {'temporary' if isTemporary else 'permanent'}."
217 )
218 if forceAbsolute and not uri.scheme:
219 return ResourcePath(
220 str(uri),
221 root=root,
222 forceAbsolute=True,
223 forceDirectory=uri.dirLike,
224 isTemporary=uri.isTemporary,
225 )
226 return uri
227 else:
228 raise ValueError(
229 f"Supplied URI must be string, Path, ResourcePath, or ParseResult but got '{uri!r}'"
230 )
232 if subclass is None:
233 # Work out the subclass from the URI scheme
234 if not parsed.scheme:
235 # Root may be specified as a ResourcePath that overrides
236 # the schemeless determination.
237 if (
238 root_uri is not None
239 and root_uri.scheme != "file" # file scheme has different code path
240 and not parsed.path.startswith("/") # Not already absolute path
241 ):
242 if not root_uri.dirLike:
243 raise ValueError(
244 f"Root URI ({root}) was not a directory so can not be joined with"
245 f" path {parsed.path!r}"
246 )
247 # If root is temporary or this schemeless is temporary we
248 # assume this URI is temporary.
249 isTemporary = isTemporary or root_uri.isTemporary
250 joined = root_uri.join(
251 parsed.path, forceDirectory=forceDirectory, isTemporary=isTemporary
252 )
254 # Rather than returning this new ResourcePath directly we
255 # instead extract the path and the scheme and adjust the
256 # URI we were given -- we need to do this to preserve
257 # fragments since join() will drop them.
258 parsed = parsed._replace(scheme=joined.scheme, path=joined.path, netloc=joined.netloc)
259 subclass = type(joined)
261 # Clear the root parameter to indicate that it has
262 # been applied already.
263 root_uri = None
264 else:
265 from .schemeless import SchemelessResourcePath
267 subclass = SchemelessResourcePath
268 elif parsed.scheme == "file":
269 from .file import FileResourcePath
271 subclass = FileResourcePath
272 elif parsed.scheme == "s3":
273 from .s3 import S3ResourcePath
275 subclass = S3ResourcePath
276 elif parsed.scheme.startswith("http"):
277 from .http import HttpResourcePath
279 subclass = HttpResourcePath
280 elif parsed.scheme == "gs":
281 from .gs import GSResourcePath
283 subclass = GSResourcePath
284 elif parsed.scheme == "resource":
285 # Rules for scheme names disallow pkg_resource
286 from .packageresource import PackageResourcePath
288 subclass = PackageResourcePath
289 elif parsed.scheme == "mem":
290 # in-memory datastore object
291 from .mem import InMemoryResourcePath
293 subclass = InMemoryResourcePath
294 else:
295 raise NotImplementedError(
296 f"No URI support for scheme: '{parsed.scheme}' in {parsed.geturl()}"
297 )
299 parsed, dirLike = subclass._fixupPathUri(
300 parsed, root=root_uri, forceAbsolute=forceAbsolute, forceDirectory=forceDirectory
301 )
303 # It is possible for the class to change from schemeless
304 # to file so handle that
305 if parsed.scheme == "file":
306 from .file import FileResourcePath
308 subclass = FileResourcePath
310 # Now create an instance of the correct subclass and set the
311 # attributes directly
312 self = object.__new__(subclass)
313 self._uri = parsed
314 self.dirLike = dirLike
315 if isTemporary is None:
316 isTemporary = False
317 self.isTemporary = isTemporary
318 return self
320 @property
321 def scheme(self) -> str:
322 """Return the URI scheme.
324 Notes
325 -----
326 (``://`` is not part of the scheme).
327 """
328 return self._uri.scheme
330 @property
331 def netloc(self) -> str:
332 """Return the URI network location."""
333 return self._uri.netloc
335 @property
336 def path(self) -> str:
337 """Return the path component of the URI."""
338 return self._uri.path
340 @property
341 def unquoted_path(self) -> str:
342 """Return path component of the URI with any URI quoting reversed."""
343 return urllib.parse.unquote(self._uri.path)
345 @property
346 def ospath(self) -> str:
347 """Return the path component of the URI localized to current OS."""
348 raise AttributeError(f"Non-file URI ({self}) has no local OS path.")
350 @property
351 def relativeToPathRoot(self) -> str:
352 """Return path relative to network location.
354 Effectively, this is the path property with posix separator stripped
355 from the left hand side of the path.
357 Always unquotes.
358 """
359 p = self._pathLib(self.path)
360 relToRoot = str(p.relative_to(p.root))
361 if self.dirLike and not relToRoot.endswith("/"):
362 relToRoot += "/"
363 return urllib.parse.unquote(relToRoot)
365 @property
366 def is_root(self) -> bool:
367 """Return whether this URI points to the root of the network location.
369 This means that the path components refers to the top level.
370 """
371 relpath = self.relativeToPathRoot
372 if relpath == "./":
373 return True
374 return False
376 @property
377 def fragment(self) -> str:
378 """Return the fragment component of the URI."""
379 return self._uri.fragment
381 @property
382 def params(self) -> str:
383 """Return any parameters included in the URI."""
384 return self._uri.params
386 @property
387 def query(self) -> str:
388 """Return any query strings included in the URI."""
389 return self._uri.query
391 def geturl(self) -> str:
392 """Return the URI in string form.
394 Returns
395 -------
396 url : `str`
397 String form of URI.
398 """
399 return self._uri.geturl()
401 def root_uri(self) -> ResourcePath:
402 """Return the base root URI.
404 Returns
405 -------
406 uri : `ResourcePath`
407 Root URI.
408 """
409 return self.replace(path="", forceDirectory=True)
411 def split(self) -> tuple[ResourcePath, str]:
412 """Split URI into head and tail.
414 Returns
415 -------
416 head: `ResourcePath`
417 Everything leading up to tail, expanded and normalized as per
418 ResourcePath rules.
419 tail : `str`
420 Last path component. Tail will be empty if path ends on a
421 separator. Tail will never contain separators. It will be
422 unquoted.
424 Notes
425 -----
426 Equivalent to `os.path.split` where head preserves the URI
427 components.
428 """
429 head, tail = self._pathModule.split(self.path)
430 headuri = self._uri._replace(path=head)
432 # The file part should never include quoted metacharacters
433 tail = urllib.parse.unquote(tail)
435 # Schemeless is special in that it can be a relative path
436 # We need to ensure that it stays that way. All other URIs will
437 # be absolute already.
438 forceAbsolute = self._pathModule.isabs(self.path)
439 return ResourcePath(headuri, forceDirectory=True, forceAbsolute=forceAbsolute), tail
441 def basename(self) -> str:
442 """Return the base name, last element of path, of the URI.
444 Returns
445 -------
446 tail : `str`
447 Last part of the path attribute. Trail will be empty if path ends
448 on a separator.
450 Notes
451 -----
452 If URI ends on a slash returns an empty string. This is the second
453 element returned by `split()`.
455 Equivalent of `os.path.basename`.
456 """
457 return self.split()[1]
459 def dirname(self) -> ResourcePath:
460 """Return the directory component of the path as a new `ResourcePath`.
462 Returns
463 -------
464 head : `ResourcePath`
465 Everything except the tail of path attribute, expanded and
466 normalized as per ResourcePath rules.
468 Notes
469 -----
470 Equivalent of `os.path.dirname`.
471 """
472 return self.split()[0]
474 def parent(self) -> ResourcePath:
475 """Return a `ResourcePath` of the parent directory.
477 Returns
478 -------
479 head : `ResourcePath`
480 Everything except the tail of path attribute, expanded and
481 normalized as per `ResourcePath` rules.
483 Notes
484 -----
485 For a file-like URI this will be the same as calling `dirname()`.
486 """
487 # When self is file-like, return self.dirname()
488 if not self.dirLike:
489 return self.dirname()
490 # When self is dir-like, return its parent directory,
491 # regardless of the presence of a trailing separator
492 originalPath = self._pathLib(self.path)
493 parentPath = originalPath.parent
494 return self.replace(path=str(parentPath), forceDirectory=True)
496 def replace(self, forceDirectory: bool = False, isTemporary: bool = False, **kwargs: Any) -> ResourcePath:
497 """Return new `ResourcePath` with specified components replaced.
499 Parameters
500 ----------
501 forceDirectory : `bool`, optional
502 Parameter passed to ResourcePath constructor to force this
503 new URI to be dir-like.
504 isTemporary : `bool`, optional
505 Indicate that the resulting URI is temporary resource.
506 **kwargs
507 Components of a `urllib.parse.ParseResult` that should be
508 modified for the newly-created `ResourcePath`.
510 Returns
511 -------
512 new : `ResourcePath`
513 New `ResourcePath` object with updated values.
515 Notes
516 -----
517 Does not, for now, allow a change in URI scheme.
518 """
519 # Disallow a change in scheme
520 if "scheme" in kwargs:
521 raise ValueError(f"Can not use replace() method to change URI scheme for {self}")
522 return self.__class__(
523 self._uri._replace(**kwargs), forceDirectory=forceDirectory, isTemporary=isTemporary
524 )
526 def updatedFile(self, newfile: str) -> ResourcePath:
527 """Return new URI with an updated final component of the path.
529 Parameters
530 ----------
531 newfile : `str`
532 File name with no path component.
534 Returns
535 -------
536 updated : `ResourcePath`
537 Updated `ResourcePath` with new updated final component.
539 Notes
540 -----
541 Forces the ResourcePath.dirLike attribute to be false. The new file
542 path will be quoted if necessary.
543 """
544 if self.quotePaths:
545 newfile = urllib.parse.quote(newfile)
546 dir, _ = self._pathModule.split(self.path)
547 newpath = self._pathModule.join(dir, newfile)
549 updated = self.replace(path=newpath)
550 updated.dirLike = False
551 return updated
553 def updatedExtension(self, ext: str | None) -> ResourcePath:
554 """Return a new `ResourcePath` with updated file extension.
556 All file extensions are replaced.
558 Parameters
559 ----------
560 ext : `str` or `None`
561 New extension. If an empty string is given any extension will
562 be removed. If `None` is given there will be no change.
564 Returns
565 -------
566 updated : `ResourcePath`
567 URI with the specified extension. Can return itself if
568 no extension was specified.
569 """
570 if ext is None:
571 return self
573 # Get the extension
574 current = self.getExtension()
576 # Nothing to do if the extension already matches
577 if current == ext:
578 return self
580 # Remove the current extension from the path
581 # .fits.gz counts as one extension do not use os.path.splitext
582 path = self.path
583 if current:
584 path = path[: -len(current)]
586 # Ensure that we have a leading "." on file extension (and we do not
587 # try to modify the empty string)
588 if ext and not ext.startswith("."):
589 ext = "." + ext
591 return self.replace(path=path + ext)
593 def getExtension(self) -> str:
594 """Return the file extension(s) associated with this URI path.
596 Returns
597 -------
598 ext : `str`
599 The file extension (including the ``.``). Can be empty string
600 if there is no file extension. Usually returns only the last
601 file extension unless there is a special extension modifier
602 indicating file compression, in which case the combined
603 extension (e.g. ``.fits.gz``) will be returned.
604 """
605 special = {".gz", ".bz2", ".xz", ".fz"}
607 # Get the file part of the path so as not to be confused by
608 # "." in directory names.
609 basename = self.basename()
610 extensions = self._pathLib(basename).suffixes
612 if not extensions:
613 return ""
615 ext = extensions.pop()
617 # Multiple extensions, decide whether to include the final two
618 if extensions and ext in special:
619 ext = f"{extensions[-1]}{ext}"
621 return ext
623 def join(
624 self, path: str | ResourcePath, isTemporary: bool | None = None, forceDirectory: bool = False
625 ) -> ResourcePath:
626 """Return new `ResourcePath` with additional path components.
628 Parameters
629 ----------
630 path : `str`, `ResourcePath`
631 Additional file components to append to the current URI. Assumed
632 to include a file at the end. Will be quoted depending on the
633 associated URI scheme. If the path looks like a URI with a scheme
634 referring to an absolute location, it will be returned
635 directly (matching the behavior of `os.path.join`). It can
636 also be a `ResourcePath`.
637 isTemporary : `bool`, optional
638 Indicate that the resulting URI represents a temporary resource.
639 Default is ``self.isTemporary``.
640 forceDirectory : `bool`, optional
641 If `True` forces the URI to end with a separator, otherwise given
642 URI is interpreted as is.
644 Returns
645 -------
646 new : `ResourcePath`
647 New URI with any file at the end replaced with the new path
648 components.
650 Notes
651 -----
652 Schemeless URIs assume local path separator but all other URIs assume
653 POSIX separator if the supplied path has directory structure. It
654 may be this never becomes a problem but datastore templates assume
655 POSIX separator is being used.
657 If an absolute `ResourcePath` is given for ``path`` is is assumed that
658 this should be returned directly. Giving a ``path`` of an absolute
659 scheme-less URI is not allowed for safety reasons as it may indicate
660 a mistake in the calling code.
662 Raises
663 ------
664 ValueError
665 Raised if the ``path`` is an absolute scheme-less URI. In that
666 situation it is unclear whether the intent is to return a
667 ``file`` URI or it was a mistake and a relative scheme-less URI
668 was meant.
669 RuntimeError
670 Raised if this attempts to join a temporary URI to a non-temporary
671 URI.
672 """
673 if isTemporary is None:
674 isTemporary = self.isTemporary
675 elif not isTemporary and self.isTemporary:
676 raise RuntimeError("Cannot join temporary URI to non-temporary URI.")
677 # If we have a full URI in path we will use it directly
678 # but without forcing to absolute so that we can trap the
679 # expected option of relative path.
680 path_uri = ResourcePath(
681 path, forceAbsolute=False, forceDirectory=forceDirectory, isTemporary=isTemporary
682 )
683 if path_uri.scheme:
684 # Check for scheme so can distinguish explicit URIs from
685 # absolute scheme-less URIs.
686 return path_uri
688 if path_uri.isabs():
689 # Absolute scheme-less path.
690 raise ValueError(f"Can not join absolute scheme-less {path_uri!r} to another URI.")
692 # If this was originally a ResourcePath extract the unquoted path from
693 # it. Otherwise we use the string we were given to allow "#" to appear
694 # in the filename if given as a plain string.
695 if not isinstance(path, str):
696 path = path_uri.unquoted_path
698 new = self.dirname() # By definition a directory URI
700 # new should be asked about quoting, not self, since dirname can
701 # change the URI scheme for schemeless -> file
702 if new.quotePaths:
703 path = urllib.parse.quote(path)
705 newpath = self._pathModule.normpath(self._pathModule.join(new.path, path))
707 # normpath can strip trailing / so we force directory if the supplied
708 # path ended with a /
709 return new.replace(
710 path=newpath,
711 forceDirectory=(forceDirectory or path.endswith(self._pathModule.sep)),
712 isTemporary=isTemporary,
713 )
715 def relative_to(self, other: ResourcePath) -> str | None:
716 """Return the relative path from this URI to the other URI.
718 Parameters
719 ----------
720 other : `ResourcePath`
721 URI to use to calculate the relative path. Must be a parent
722 of this URI.
724 Returns
725 -------
726 subpath : `str`
727 The sub path of this URI relative to the supplied other URI.
728 Returns `None` if there is no parent child relationship.
729 Scheme and netloc must match.
730 """
731 # Scheme-less absolute other is treated as if it's a file scheme.
732 # Scheme-less relative other can only return non-None if self
733 # is also scheme-less relative and that is handled specifically
734 # in a subclass.
735 if not other.scheme and other.isabs():
736 other = other.abspath()
738 # Scheme-less self is handled elsewhere.
739 if self.scheme != other.scheme:
740 return None
741 if self.netloc != other.netloc:
742 # Special case for localhost vs empty string.
743 # There can be many variants of localhost.
744 local_netlocs = {"", "localhost", "localhost.localdomain", "127.0.0.1"}
745 if not {self.netloc, other.netloc}.issubset(local_netlocs):
746 return None
748 enclosed_path = self._pathLib(self.relativeToPathRoot)
749 parent_path = other.relativeToPathRoot
750 subpath: str | None
751 try:
752 subpath = str(enclosed_path.relative_to(parent_path))
753 except ValueError:
754 subpath = None
755 else:
756 subpath = urllib.parse.unquote(subpath)
757 return subpath
759 def exists(self) -> bool:
760 """Indicate that the resource is available.
762 Returns
763 -------
764 exists : `bool`
765 `True` if the resource exists.
766 """
767 raise NotImplementedError()
769 @classmethod
770 def mexists(cls, uris: Iterable[ResourcePath]) -> dict[ResourcePath, bool]:
771 """Check for existence of multiple URIs at once.
773 Parameters
774 ----------
775 uris : iterable of `ResourcePath`
776 The URIs to test.
778 Returns
779 -------
780 existence : `dict` of [`ResourcePath`, `bool`]
781 Mapping of original URI to boolean indicating existence.
782 """
783 # Group by scheme to allow a subclass to be able to use
784 # specialized implementations.
785 grouped: dict[type, list[ResourcePath]] = {}
786 for uri in uris:
787 uri_class = uri.__class__
788 if uri_class not in grouped:
789 grouped[uri_class] = []
790 grouped[uri_class].append(uri)
792 existence: dict[ResourcePath, bool] = {}
793 for uri_class in grouped:
794 existence.update(uri_class._mexists(grouped[uri_class]))
796 return existence
798 @classmethod
799 def _mexists(cls, uris: Iterable[ResourcePath]) -> dict[ResourcePath, bool]:
800 """Check for existence of multiple URIs at once.
802 Implementation helper method for `mexists`.
804 Parameters
805 ----------
806 uris : iterable of `ResourcePath`
807 The URIs to test.
809 Returns
810 -------
811 existence : `dict` of [`ResourcePath`, `bool`]
812 Mapping of original URI to boolean indicating existence.
813 """
814 exists_executor = concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS)
815 future_exists = {exists_executor.submit(uri.exists): uri for uri in uris}
817 results: dict[ResourcePath, bool] = {}
818 for future in concurrent.futures.as_completed(future_exists):
819 uri = future_exists[future]
820 try:
821 exists = future.result()
822 except Exception:
823 exists = False
824 results[uri] = exists
825 return results
827 def remove(self) -> None:
828 """Remove the resource."""
829 raise NotImplementedError()
831 def isabs(self) -> bool:
832 """Indicate that the resource is fully specified.
834 For non-schemeless URIs this is always true.
836 Returns
837 -------
838 isabs : `bool`
839 `True` in all cases except schemeless URI.
840 """
841 return True
843 def abspath(self) -> ResourcePath:
844 """Return URI using an absolute path.
846 Returns
847 -------
848 abs : `ResourcePath`
849 Absolute URI. For non-schemeless URIs this always returns itself.
850 Schemeless URIs are upgraded to file URIs.
851 """
852 return self
854 def _as_local(self) -> tuple[str, bool]:
855 """Return the location of the (possibly remote) resource as local file.
857 This is a helper function for `as_local` context manager.
859 Returns
860 -------
861 path : `str`
862 If this is a remote resource, it will be a copy of the resource
863 on the local file system, probably in a temporary directory.
864 For a local resource this should be the actual path to the
865 resource.
866 is_temporary : `bool`
867 Indicates if the local path is a temporary file or not.
868 """
869 raise NotImplementedError()
871 @contextlib.contextmanager
872 def as_local(self) -> Iterator[ResourcePath]:
873 """Return the location of the (possibly remote) resource as local file.
875 Yields
876 ------
877 local : `ResourcePath`
878 If this is a remote resource, it will be a copy of the resource
879 on the local file system, probably in a temporary directory.
880 For a local resource this should be the actual path to the
881 resource.
883 Notes
884 -----
885 The context manager will automatically delete any local temporary
886 file.
888 Examples
889 --------
890 Should be used as a context manager:
892 .. code-block:: py
894 with uri.as_local() as local:
895 ospath = local.ospath
896 """
897 if self.dirLike:
898 raise IsADirectoryError(f"Directory-like URI {self} cannot be fetched as local.")
899 local_src, is_temporary = self._as_local()
900 local_uri = ResourcePath(local_src, isTemporary=is_temporary)
902 try:
903 yield local_uri
904 finally:
905 # The caller might have relocated the temporary file.
906 # Do not ever delete if the temporary matches self
907 # (since it may have been that a temporary file was made local
908 # but already was local).
909 if self != local_uri and is_temporary and local_uri.exists():
910 local_uri.remove()
912 @classmethod
913 @contextlib.contextmanager
914 def temporary_uri(
915 cls, prefix: ResourcePath | None = None, suffix: str | None = None
916 ) -> Iterator[ResourcePath]:
917 """Create a temporary file-like URI.
919 Parameters
920 ----------
921 prefix : `ResourcePath`, optional
922 Prefix to use. Without this the path will be formed as a local
923 file URI in a temporary directory. Ensuring that the prefix
924 location exists is the responsibility of the caller.
925 suffix : `str`, optional
926 A file suffix to be used. The ``.`` should be included in this
927 suffix.
929 Yields
930 ------
931 uri : `ResourcePath`
932 The temporary URI. Will be removed when the context is completed.
933 """
934 use_tempdir = False
935 if prefix is None:
936 directory = tempfile.mkdtemp()
937 # If the user has set a umask that restricts the owner-write bit,
938 # the directory returned from mkdtemp may not initially be
939 # writeable by us
940 ensure_directory_is_writeable(directory)
942 prefix = ResourcePath(directory, forceDirectory=True, isTemporary=True)
943 # Record that we need to delete this directory. Can not rely
944 # on isTemporary flag since an external prefix may have that
945 # set as well.
946 use_tempdir = True
948 # Need to create a randomized file name. For consistency do not
949 # use mkstemp for local and something else for remote. Additionally
950 # this method does not create the file to prevent name clashes.
951 characters = "abcdefghijklmnopqrstuvwxyz0123456789_"
952 rng = Random()
953 tempname = "".join(rng.choice(characters) for _ in range(16))
954 if suffix:
955 tempname += suffix
956 temporary_uri = prefix.join(tempname, isTemporary=True)
957 if temporary_uri.dirLike:
958 # If we had a safe way to clean up a remote temporary directory, we
959 # could support this.
960 raise NotImplementedError("temporary_uri cannot be used to create a temporary directory.")
961 try:
962 yield temporary_uri
963 finally:
964 if use_tempdir:
965 shutil.rmtree(prefix.ospath, ignore_errors=True)
966 else:
967 with contextlib.suppress(FileNotFoundError):
968 # It's okay if this does not work because the user removed
969 # the file.
970 temporary_uri.remove()
972 def read(self, size: int = -1) -> bytes:
973 """Open the resource and return the contents in bytes.
975 Parameters
976 ----------
977 size : `int`, optional
978 The number of bytes to read. Negative or omitted indicates
979 that all data should be read.
980 """
981 raise NotImplementedError()
983 def write(self, data: bytes, overwrite: bool = True) -> None:
984 """Write the supplied bytes to the new resource.
986 Parameters
987 ----------
988 data : `bytes`
989 The bytes to write to the resource. The entire contents of the
990 resource will be replaced.
991 overwrite : `bool`, optional
992 If `True` the resource will be overwritten if it exists. Otherwise
993 the write will fail.
994 """
995 raise NotImplementedError()
997 def mkdir(self) -> None:
998 """For a dir-like URI, create the directory resource if needed."""
999 raise NotImplementedError()
1001 def isdir(self) -> bool:
1002 """Return True if this URI looks like a directory, else False."""
1003 return self.dirLike
1005 def size(self) -> int:
1006 """For non-dir-like URI, return the size of the resource.
1008 Returns
1009 -------
1010 sz : `int`
1011 The size in bytes of the resource associated with this URI.
1012 Returns 0 if dir-like.
1013 """
1014 raise NotImplementedError()
1016 def __str__(self) -> str:
1017 """Convert the URI to its native string form."""
1018 return self.geturl()
1020 def __repr__(self) -> str:
1021 """Return string representation suitable for evaluation."""
1022 return f'ResourcePath("{self.geturl()}")'
1024 def __eq__(self, other: Any) -> bool:
1025 """Compare supplied object with this `ResourcePath`."""
1026 if not isinstance(other, ResourcePath):
1027 return NotImplemented
1028 return self.geturl() == other.geturl()
1030 def __hash__(self) -> int:
1031 """Return hash of this object."""
1032 return hash(str(self))
1034 def __lt__(self, other: ResourcePath) -> bool:
1035 return self.geturl() < other.geturl()
1037 def __le__(self, other: ResourcePath) -> bool:
1038 return self.geturl() <= other.geturl()
1040 def __gt__(self, other: ResourcePath) -> bool:
1041 return self.geturl() > other.geturl()
1043 def __ge__(self, other: ResourcePath) -> bool:
1044 return self.geturl() >= other.geturl()
1046 def __copy__(self) -> ResourcePath:
1047 """Copy constructor.
1049 Object is immutable so copy can return itself.
1050 """
1051 # Implement here because the __new__ method confuses things
1052 return self
1054 def __deepcopy__(self, memo: Any) -> ResourcePath:
1055 """Deepcopy the object.
1057 Object is immutable so copy can return itself.
1058 """
1059 # Implement here because the __new__ method confuses things
1060 return self
1062 def __getnewargs__(self) -> tuple:
1063 """Support pickling."""
1064 return (str(self),)
1066 @classmethod
1067 def _fixDirectorySep(
1068 cls, parsed: urllib.parse.ParseResult, forceDirectory: bool = False
1069 ) -> tuple[urllib.parse.ParseResult, bool]:
1070 """Ensure that a path separator is present on directory paths.
1072 Parameters
1073 ----------
1074 parsed : `~urllib.parse.ParseResult`
1075 The result from parsing a URI using `urllib.parse`.
1076 forceDirectory : `bool`, optional
1077 If `True` forces the URI to end with a separator, otherwise given
1078 URI is interpreted as is. Specifying that the URI is conceptually
1079 equivalent to a directory can break some ambiguities when
1080 interpreting the last element of a path.
1082 Returns
1083 -------
1084 modified : `~urllib.parse.ParseResult`
1085 Update result if a URI is being handled.
1086 dirLike : `bool`
1087 `True` if given parsed URI has a trailing separator or
1088 forceDirectory is True. Otherwise `False`.
1089 """
1090 # assume we are not dealing with a directory like URI
1091 dirLike = False
1093 # Directory separator
1094 sep = cls._pathModule.sep
1096 # URI is dir-like if explicitly stated or if it ends on a separator
1097 endsOnSep = parsed.path.endswith(sep)
1098 if forceDirectory or endsOnSep:
1099 dirLike = True
1100 # only add the separator if it's not already there
1101 if not endsOnSep:
1102 parsed = parsed._replace(path=parsed.path + sep)
1104 return parsed, dirLike
1106 @classmethod
1107 def _fixupPathUri(
1108 cls,
1109 parsed: urllib.parse.ParseResult,
1110 root: ResourcePath | None = None,
1111 forceAbsolute: bool = False,
1112 forceDirectory: bool = False,
1113 ) -> tuple[urllib.parse.ParseResult, bool]:
1114 """Correct any issues with the supplied URI.
1116 Parameters
1117 ----------
1118 parsed : `~urllib.parse.ParseResult`
1119 The result from parsing a URI using `urllib.parse`.
1120 root : `ResourcePath`, ignored
1121 Not used by the this implementation since all URIs are
1122 absolute except for those representing the local file system.
1123 forceAbsolute : `bool`, ignored.
1124 Not used by this implementation. URIs are generally always
1125 absolute.
1126 forceDirectory : `bool`, optional
1127 If `True` forces the URI to end with a separator, otherwise given
1128 URI is interpreted as is. Specifying that the URI is conceptually
1129 equivalent to a directory can break some ambiguities when
1130 interpreting the last element of a path.
1132 Returns
1133 -------
1134 modified : `~urllib.parse.ParseResult`
1135 Update result if a URI is being handled.
1136 dirLike : `bool`
1137 `True` if given parsed URI has a trailing separator or
1138 forceDirectory is True. Otherwise `False`.
1140 Notes
1141 -----
1142 Relative paths are explicitly not supported by RFC8089 but `urllib`
1143 does accept URIs of the form ``file:relative/path.ext``. They need
1144 to be turned into absolute paths before they can be used. This is
1145 always done regardless of the ``forceAbsolute`` parameter.
1147 AWS S3 differentiates between keys with trailing POSIX separators (i.e
1148 `/dir` and `/dir/`) whereas POSIX does not neccessarily.
1150 Scheme-less paths are normalized.
1151 """
1152 return cls._fixDirectorySep(parsed, forceDirectory)
1154 def transfer_from(
1155 self,
1156 src: ResourcePath,
1157 transfer: str,
1158 overwrite: bool = False,
1159 transaction: TransactionProtocol | None = None,
1160 ) -> None:
1161 """Transfer to this URI from another.
1163 Parameters
1164 ----------
1165 src : `ResourcePath`
1166 Source URI.
1167 transfer : `str`
1168 Mode to use for transferring the resource. Generically there are
1169 many standard options: copy, link, symlink, hardlink, relsymlink.
1170 Not all URIs support all modes.
1171 overwrite : `bool`, optional
1172 Allow an existing file to be overwritten. Defaults to `False`.
1173 transaction : `~lsst.resources.utils.TransactionProtocol`, optional
1174 A transaction object that can (depending on implementation)
1175 rollback transfers on error. Not guaranteed to be implemented.
1177 Notes
1178 -----
1179 Conceptually this is hard to scale as the number of URI schemes
1180 grow. The destination URI is more important than the source URI
1181 since that is where all the transfer modes are relevant (with the
1182 complication that "move" deletes the source).
1184 Local file to local file is the fundamental use case but every
1185 other scheme has to support "copy" to local file (with implicit
1186 support for "move") and copy from local file.
1187 All the "link" options tend to be specific to local file systems.
1189 "move" is a "copy" where the remote resource is deleted at the end.
1190 Whether this works depends on the source URI rather than the
1191 destination URI. Reverting a move on transaction rollback is
1192 expected to be problematic if a remote resource was involved.
1193 """
1194 raise NotImplementedError(f"No transfer modes supported by URI scheme {self.scheme}")
1196 def walk(
1197 self, file_filter: str | re.Pattern | None = None
1198 ) -> Iterator[list | tuple[ResourcePath, list[str], list[str]]]:
1199 """Walk the directory tree returning matching files and directories.
1201 Parameters
1202 ----------
1203 file_filter : `str` or `re.Pattern`, optional
1204 Regex to filter out files from the list before it is returned.
1206 Yields
1207 ------
1208 dirpath : `ResourcePath`
1209 Current directory being examined.
1210 dirnames : `list` of `str`
1211 Names of subdirectories within dirpath.
1212 filenames : `list` of `str`
1213 Names of all the files within dirpath.
1214 """
1215 raise NotImplementedError()
1217 @overload
1218 @classmethod
1219 def findFileResources(
1220 cls,
1221 candidates: Iterable[ResourcePathExpression],
1222 file_filter: str | re.Pattern | None,
1223 grouped: Literal[True],
1224 ) -> Iterator[Iterator[ResourcePath]]:
1225 ...
1227 @overload
1228 @classmethod
1229 def findFileResources(
1230 cls,
1231 candidates: Iterable[ResourcePathExpression],
1232 *,
1233 grouped: Literal[True],
1234 ) -> Iterator[Iterator[ResourcePath]]:
1235 ...
1237 @overload
1238 @classmethod
1239 def findFileResources(
1240 cls,
1241 candidates: Iterable[ResourcePathExpression],
1242 file_filter: str | re.Pattern | None = None,
1243 grouped: Literal[False] = False,
1244 ) -> Iterator[ResourcePath]:
1245 ...
1247 @classmethod
1248 def findFileResources(
1249 cls,
1250 candidates: Iterable[ResourcePathExpression],
1251 file_filter: str | re.Pattern | None = None,
1252 grouped: bool = False,
1253 ) -> Iterator[ResourcePath | Iterator[ResourcePath]]:
1254 """Get all the files from a list of values.
1256 Parameters
1257 ----------
1258 candidates : iterable [`str` or `ResourcePath`]
1259 The files to return and directories in which to look for files to
1260 return.
1261 file_filter : `str` or `re.Pattern`, optional
1262 The regex to use when searching for files within directories.
1263 By default returns all the found files.
1264 grouped : `bool`, optional
1265 If `True` the results will be grouped by directory and each
1266 yielded value will be an iterator over URIs. If `False` each
1267 URI will be returned separately.
1269 Yields
1270 ------
1271 found_file: `ResourcePath`
1272 The passed-in URIs and URIs found in passed-in directories.
1273 If grouping is enabled, each of the yielded values will be an
1274 iterator yielding members of the group. Files given explicitly
1275 will be returned as a single group at the end.
1277 Notes
1278 -----
1279 If a value is a file it is yielded immediately without checking that it
1280 exists. If a value is a directory, all the files in the directory
1281 (recursively) that match the regex will be yielded in turn.
1282 """
1283 fileRegex = None if file_filter is None else re.compile(file_filter)
1285 singles = []
1287 # Find all the files of interest
1288 for location in candidates:
1289 uri = ResourcePath(location)
1290 if uri.isdir():
1291 for found in uri.walk(fileRegex):
1292 if not found:
1293 # This means the uri does not exist and by
1294 # convention we ignore it
1295 continue
1296 root, dirs, files = found
1297 if not files:
1298 continue
1299 if grouped:
1300 yield (root.join(name) for name in files)
1301 else:
1302 for name in files:
1303 yield root.join(name)
1304 else:
1305 if grouped:
1306 singles.append(uri)
1307 else:
1308 yield uri
1310 # Finally, return any explicitly given files in one group
1311 if grouped and singles:
1312 yield iter(singles)
1314 @contextlib.contextmanager
1315 def open(
1316 self,
1317 mode: str = "r",
1318 *,
1319 encoding: str | None = None,
1320 prefer_file_temporary: bool = False,
1321 ) -> Iterator[ResourceHandleProtocol]:
1322 """Return a context manager that wraps an object that behaves like an
1323 open file at the location of the URI.
1325 Parameters
1326 ----------
1327 mode : `str`
1328 String indicating the mode in which to open the file. Values are
1329 the same as those accepted by `open`, though intrinsically
1330 read-only URI types may only support read modes, and
1331 `io.IOBase.seekable` is not guaranteed to be `True` on the returned
1332 object.
1333 encoding : `str`, optional
1334 Unicode encoding for text IO; ignored for binary IO. Defaults to
1335 ``locale.getpreferredencoding(False)``, just as `open`
1336 does.
1337 prefer_file_temporary : `bool`, optional
1338 If `True`, for implementations that require transfers from a remote
1339 system to temporary local storage and/or back, use a temporary file
1340 instead of an in-memory buffer; this is generally slower, but it
1341 may be necessary to avoid excessive memory usage by large files.
1342 Ignored by implementations that do not require a temporary.
1344 Yields
1345 ------
1346 cm : `~contextlib.AbstractContextManager`
1347 A context manager that wraps a `ResourceHandleProtocol` file-like
1348 object.
1350 Notes
1351 -----
1352 The default implementation of this method uses a local temporary buffer
1353 (in-memory or file, depending on ``prefer_file_temporary``) with calls
1354 to `read`, `write`, `as_local`, and `transfer_from` as necessary to
1355 read and write from/to remote systems. Remote writes thus occur only
1356 when the context manager is exited. `ResourcePath` implementations
1357 that can return a more efficient native buffer should do so whenever
1358 possible (as is guaranteed for local files). `ResourcePath`
1359 implementations for which `as_local` does not return a temporary are
1360 required to reimplement `open`, though they may delegate to `super`
1361 when ``prefer_file_temporary`` is `False`.
1362 """
1363 if self.dirLike:
1364 raise IsADirectoryError(f"Directory-like URI {self} cannot be opened.")
1365 if "x" in mode and self.exists():
1366 raise FileExistsError(f"File at {self} already exists.")
1367 if prefer_file_temporary:
1368 if "r" in mode or "a" in mode:
1369 local_cm = self.as_local()
1370 else:
1371 local_cm = self.temporary_uri(suffix=self.getExtension())
1372 with local_cm as local_uri:
1373 assert local_uri.isTemporary, (
1374 "ResourcePath implementations for which as_local is not "
1375 "a temporary must reimplement `open`."
1376 )
1377 with open(local_uri.ospath, mode=mode, encoding=encoding) as file_buffer:
1378 if "a" in mode:
1379 file_buffer.seek(0, io.SEEK_END)
1380 yield file_buffer
1381 if "r" not in mode or "+" in mode:
1382 self.transfer_from(local_uri, transfer="copy", overwrite=("x" not in mode))
1383 else:
1384 with self._openImpl(mode, encoding=encoding) as handle:
1385 yield handle
1387 @contextlib.contextmanager
1388 def _openImpl(self, mode: str = "r", *, encoding: str | None = None) -> Iterator[ResourceHandleProtocol]:
1389 """Implement opening of a resource handle.
1391 This private method may be overridden by specific `ResourcePath`
1392 implementations to provide a customized handle like interface.
1394 Parameters
1395 ----------
1396 mode : `str`
1397 The mode the handle should be opened with
1398 encoding : `str`, optional
1399 The byte encoding of any binary text
1401 Yields
1402 ------
1403 handle : `~._resourceHandles.BaseResourceHandle`
1404 A handle that conforms to the
1405 `~._resourceHandles.BaseResourceHandle` interface
1407 Notes
1408 -----
1409 The base implementation of a file handle reads in a files entire
1410 contents into a buffer for manipulation, and then writes it back out
1411 upon close. Subclasses of this class may offer more fine grained
1412 control.
1413 """
1414 in_bytes = self.read() if "r" in mode or "a" in mode else b""
1415 if "b" in mode:
1416 bytes_buffer = io.BytesIO(in_bytes)
1417 if "a" in mode:
1418 bytes_buffer.seek(0, io.SEEK_END)
1419 yield bytes_buffer
1420 out_bytes = bytes_buffer.getvalue()
1421 else:
1422 if encoding is None:
1423 encoding = locale.getpreferredencoding(False)
1424 str_buffer = io.StringIO(in_bytes.decode(encoding))
1425 if "a" in mode:
1426 str_buffer.seek(0, io.SEEK_END)
1427 yield str_buffer
1428 out_bytes = str_buffer.getvalue().encode(encoding)
1429 if "r" not in mode or "+" in mode:
1430 self.write(out_bytes, overwrite=("x" not in mode))
1432 def generate_presigned_get_url(self, *, expiration_time_seconds: int) -> str:
1433 """Return a pre-signed URL that can be used to retrieve this resource
1434 using an HTTP GET without supplying any access credentials.
1436 Parameters
1437 ----------
1438 expiration_time_seconds : `int`
1439 Number of seconds until the generated URL is no longer valid.
1441 Returns
1442 -------
1443 url : `str`
1444 HTTP URL signed for GET.
1445 """
1446 raise NotImplementedError(f"URL signing is not supported for '{self.scheme}'")
1448 def generate_presigned_put_url(self, *, expiration_time_seconds: int) -> str:
1449 """Return a pre-signed URL that can be used to upload a file to this
1450 path using an HTTP PUT without supplying any access credentials.
1452 Parameters
1453 ----------
1454 expiration_time_seconds : `int`
1455 Number of seconds until the generated URL is no longer valid.
1457 Returns
1458 -------
1459 url : `str`
1460 HTTP URL signed for PUT.
1461 """
1462 raise NotImplementedError(f"URL signing is not supported for '{self.scheme}'")
1465ResourcePathExpression = str | urllib.parse.ParseResult | ResourcePath | Path
1466"""Type-annotation alias for objects that can be coerced to ResourcePath.
1467"""