Coverage for python/lsst/resources/_resourcePath.py: 21%
411 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-10 09:42 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-10 09:42 +0000
1# This file is part of lsst-resources.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# Use of this source code is governed by a 3-clause BSD-style
10# license that can be found in the LICENSE file.
12from __future__ import annotations
14import concurrent.futures
15import contextlib
16import copy
17import io
18import locale
19import logging
20import os
21import posixpath
22import re
23import shutil
24import tempfile
25import urllib.parse
26from pathlib import Path, PurePath, PurePosixPath
27from random import Random
29__all__ = ("ResourcePath", "ResourcePathExpression")
31from collections.abc import Iterable, Iterator
32from typing import TYPE_CHECKING, Any, Literal, overload
34from ._resourceHandles._baseResourceHandle import ResourceHandleProtocol
36if TYPE_CHECKING:
37 from .utils import TransactionProtocol
40log = logging.getLogger(__name__)
42# Regex for looking for URI escapes
43ESCAPES_RE = re.compile(r"%[A-F0-9]{2}")
45# Precomputed escaped hash
46ESCAPED_HASH = urllib.parse.quote("#")
48# Maximum number of worker threads for parallelized operations.
49# If greater than 10, be aware that this number has to be consistent
50# with connection pool sizing (for example in urllib3).
51MAX_WORKERS = 10
54class ResourcePath:
55 """Convenience wrapper around URI parsers.
57 Provides access to URI components and can convert file
58 paths into absolute path URIs. Scheme-less URIs are treated as if
59 they are local file system paths and are converted to absolute URIs.
61 A specialist subclass is created for each supported URI scheme.
63 Parameters
64 ----------
65 uri : `str`, `pathlib.Path`, `urllib.parse.ParseResult`, or `ResourcePath`.
66 URI in string form. Can be scheme-less if referring to a relative
67 path or an absolute path on the local file system.
68 root : `str` or `ResourcePath`, optional
69 When fixing up a relative path in a ``file`` scheme or if scheme-less,
70 use this as the root. Must be absolute. If `None` the current
71 working directory will be used. Can be any supported URI scheme.
72 Not used if ``forceAbsolute`` is `False`.
73 forceAbsolute : `bool`, optional
74 If `True`, scheme-less relative URI will be converted to an absolute
75 path using a ``file`` scheme. If `False` scheme-less URI will remain
76 scheme-less and will not be updated to ``file`` or absolute path unless
77 it is already an absolute path, in which case it will be updated to
78 a ``file`` scheme.
79 forceDirectory: `bool`, optional
80 If `True` forces the URI to end with a separator, otherwise given URI
81 is interpreted as is.
82 isTemporary : `bool`, optional
83 If `True` indicates that this URI points to a temporary resource.
84 The default is `False`, unless ``uri`` is already a `ResourcePath`
85 instance and ``uri.isTemporary is True``.
87 Notes
88 -----
89 A non-standard URI of the form ``file:dir/file.txt`` is always converted
90 to an absolute ``file`` URI.
91 """
93 _pathLib: type[PurePath] = PurePosixPath
94 """Path library to use for this scheme."""
96 _pathModule = posixpath
97 """Path module to use for this scheme."""
99 transferModes: tuple[str, ...] = ("copy", "auto", "move")
100 """Transfer modes supported by this implementation.
102 Move is special in that it is generally a copy followed by an unlink.
103 Whether that unlink works depends critically on whether the source URI
104 implements unlink. If it does not the move will be reported as a failure.
105 """
107 transferDefault: str = "copy"
108 """Default mode to use for transferring if ``auto`` is specified."""
110 quotePaths = True
111 """True if path-like elements modifying a URI should be quoted.
113 All non-schemeless URIs have to internally use quoted paths. Therefore
114 if a new file name is given (e.g. to updatedFile or join) a decision must
115 be made whether to quote it to be consistent.
116 """
118 isLocal = False
119 """If `True` this URI refers to a local file."""
121 # This is not an ABC with abstract methods because the __new__ being
122 # a factory confuses mypy such that it assumes that every constructor
123 # returns a ResourcePath and then determines that all the abstract methods
124 # are still abstract. If they are not marked abstract but just raise
125 # mypy is fine with it.
127 # mypy is confused without these
128 _uri: urllib.parse.ParseResult
129 isTemporary: bool
130 dirLike: bool
132 def __new__(
133 cls,
134 uri: ResourcePathExpression,
135 root: str | ResourcePath | None = None,
136 forceAbsolute: bool = True,
137 forceDirectory: bool = False,
138 isTemporary: bool | None = None,
139 ) -> ResourcePath:
140 """Create and return new specialist ResourcePath subclass."""
141 parsed: urllib.parse.ParseResult
142 dirLike: bool = False
143 subclass: type[ResourcePath] | None = None
145 # Force root to be a ResourcePath -- this simplifies downstream
146 # code.
147 if root is None:
148 root_uri = None
149 elif isinstance(root, str):
150 root_uri = ResourcePath(root, forceDirectory=True, forceAbsolute=True)
151 else:
152 root_uri = root
154 if isinstance(uri, os.PathLike):
155 uri = str(uri)
157 # Record if we need to post process the URI components
158 # or if the instance is already fully configured
159 if isinstance(uri, str):
160 # Since local file names can have special characters in them
161 # we need to quote them for the parser but we can unquote
162 # later. Assume that all other URI schemes are quoted.
163 # Since sometimes people write file:/a/b and not file:///a/b
164 # we should not quote in the explicit case of file:
165 if "://" not in uri and not uri.startswith("file:"):
166 if ESCAPES_RE.search(uri):
167 log.warning("Possible double encoding of %s", uri)
168 else:
169 uri = urllib.parse.quote(uri)
170 # Special case hash since we must support fragments
171 # even in schemeless URIs -- although try to only replace
172 # them in file part and not directory part
173 if ESCAPED_HASH in uri:
174 dirpos = uri.rfind("/")
175 # Do replacement after this /
176 uri = uri[: dirpos + 1] + uri[dirpos + 1 :].replace(ESCAPED_HASH, "#")
178 parsed = urllib.parse.urlparse(uri)
179 elif isinstance(uri, urllib.parse.ParseResult):
180 parsed = copy.copy(uri)
181 # If we are being instantiated with a subclass, rather than
182 # ResourcePath, ensure that that subclass is used directly.
183 # This could lead to inconsistencies if this constructor
184 # is used externally outside of the ResourcePath.replace() method.
185 # S3ResourcePath(urllib.parse.urlparse("file://a/b.txt"))
186 # will be a problem.
187 # This is needed to prevent a schemeless absolute URI become
188 # a file URI unexpectedly when calling updatedFile or
189 # updatedExtension
190 if cls is not ResourcePath:
191 parsed, dirLike = cls._fixDirectorySep(parsed, forceDirectory)
192 subclass = cls
194 elif isinstance(uri, ResourcePath):
195 # Since ResourcePath is immutable we can return the argument
196 # unchanged if it already agrees with forceDirectory, isTemporary,
197 # and forceAbsolute.
198 # We invoke __new__ again with str(self) to add a scheme for
199 # forceAbsolute, but for the others that seems more likely to paper
200 # over logic errors than do something useful, so we just raise.
201 if forceDirectory and not uri.dirLike:
202 raise RuntimeError(
203 f"{uri} is already a file-like ResourcePath; cannot force it to directory."
204 )
205 if isTemporary is not None and isTemporary is not uri.isTemporary:
206 raise RuntimeError(
207 f"{uri} is already a {'temporary' if uri.isTemporary else 'permanent'} "
208 f"ResourcePath; cannot make it {'temporary' if isTemporary else 'permanent'}."
209 )
210 if forceAbsolute and not uri.scheme:
211 return ResourcePath(
212 str(uri),
213 root=root,
214 forceAbsolute=True,
215 forceDirectory=uri.dirLike,
216 isTemporary=uri.isTemporary,
217 )
218 return uri
219 else:
220 raise ValueError(
221 f"Supplied URI must be string, Path, ResourcePath, or ParseResult but got '{uri!r}'"
222 )
224 if subclass is None:
225 # Work out the subclass from the URI scheme
226 if not parsed.scheme:
227 # Root may be specified as a ResourcePath that overrides
228 # the schemeless determination.
229 if (
230 root_uri is not None
231 and root_uri.scheme != "file" # file scheme has different code path
232 and not parsed.path.startswith("/") # Not already absolute path
233 ):
234 if not root_uri.dirLike:
235 raise ValueError(
236 f"Root URI ({root}) was not a directory so can not be joined with"
237 f" path {parsed.path!r}"
238 )
239 # If root is temporary or this schemeless is temporary we
240 # assume this URI is temporary.
241 isTemporary = isTemporary or root_uri.isTemporary
242 joined = root_uri.join(
243 parsed.path, forceDirectory=forceDirectory, isTemporary=isTemporary
244 )
246 # Rather than returning this new ResourcePath directly we
247 # instead extract the path and the scheme and adjust the
248 # URI we were given -- we need to do this to preserve
249 # fragments since join() will drop them.
250 parsed = parsed._replace(scheme=joined.scheme, path=joined.path, netloc=joined.netloc)
251 subclass = type(joined)
253 # Clear the root parameter to indicate that it has
254 # been applied already.
255 root_uri = None
256 else:
257 from .schemeless import SchemelessResourcePath
259 subclass = SchemelessResourcePath
260 elif parsed.scheme == "file":
261 from .file import FileResourcePath
263 subclass = FileResourcePath
264 elif parsed.scheme == "s3":
265 from .s3 import S3ResourcePath
267 subclass = S3ResourcePath
268 elif parsed.scheme.startswith("http"):
269 from .http import HttpResourcePath
271 subclass = HttpResourcePath
272 elif parsed.scheme == "gs":
273 from .gs import GSResourcePath
275 subclass = GSResourcePath
276 elif parsed.scheme == "resource":
277 # Rules for scheme names disallow pkg_resource
278 from .packageresource import PackageResourcePath
280 subclass = PackageResourcePath
281 elif parsed.scheme == "mem":
282 # in-memory datastore object
283 from .mem import InMemoryResourcePath
285 subclass = InMemoryResourcePath
286 else:
287 raise NotImplementedError(
288 f"No URI support for scheme: '{parsed.scheme}' in {parsed.geturl()}"
289 )
291 parsed, dirLike = subclass._fixupPathUri(
292 parsed, root=root_uri, forceAbsolute=forceAbsolute, forceDirectory=forceDirectory
293 )
295 # It is possible for the class to change from schemeless
296 # to file so handle that
297 if parsed.scheme == "file":
298 from .file import FileResourcePath
300 subclass = FileResourcePath
302 # Now create an instance of the correct subclass and set the
303 # attributes directly
304 self = object.__new__(subclass)
305 self._uri = parsed
306 self.dirLike = dirLike
307 if isTemporary is None:
308 isTemporary = False
309 self.isTemporary = isTemporary
310 return self
312 @property
313 def scheme(self) -> str:
314 """Return the URI scheme.
316 Notes
317 -----
318 (``://`` is not part of the scheme).
319 """
320 return self._uri.scheme
322 @property
323 def netloc(self) -> str:
324 """Return the URI network location."""
325 return self._uri.netloc
327 @property
328 def path(self) -> str:
329 """Return the path component of the URI."""
330 return self._uri.path
332 @property
333 def unquoted_path(self) -> str:
334 """Return path component of the URI with any URI quoting reversed."""
335 return urllib.parse.unquote(self._uri.path)
337 @property
338 def ospath(self) -> str:
339 """Return the path component of the URI localized to current OS."""
340 raise AttributeError(f"Non-file URI ({self}) has no local OS path.")
342 @property
343 def relativeToPathRoot(self) -> str:
344 """Return path relative to network location.
346 Effectively, this is the path property with posix separator stripped
347 from the left hand side of the path.
349 Always unquotes.
350 """
351 p = self._pathLib(self.path)
352 relToRoot = str(p.relative_to(p.root))
353 if self.dirLike and not relToRoot.endswith("/"):
354 relToRoot += "/"
355 return urllib.parse.unquote(relToRoot)
357 @property
358 def is_root(self) -> bool:
359 """Return whether this URI points to the root of the network location.
361 This means that the path components refers to the top level.
362 """
363 relpath = self.relativeToPathRoot
364 if relpath == "./":
365 return True
366 return False
368 @property
369 def fragment(self) -> str:
370 """Return the fragment component of the URI."""
371 return self._uri.fragment
373 @property
374 def params(self) -> str:
375 """Return any parameters included in the URI."""
376 return self._uri.params
378 @property
379 def query(self) -> str:
380 """Return any query strings included in the URI."""
381 return self._uri.query
383 def geturl(self) -> str:
384 """Return the URI in string form.
386 Returns
387 -------
388 url : `str`
389 String form of URI.
390 """
391 return self._uri.geturl()
393 def root_uri(self) -> ResourcePath:
394 """Return the base root URI.
396 Returns
397 -------
398 uri : `ResourcePath`
399 root URI.
400 """
401 return self.replace(path="", forceDirectory=True)
403 def split(self) -> tuple[ResourcePath, str]:
404 """Split URI into head and tail.
406 Returns
407 -------
408 head: `ResourcePath`
409 Everything leading up to tail, expanded and normalized as per
410 ResourcePath rules.
411 tail : `str`
412 Last path component. Tail will be empty if path ends on a
413 separator. Tail will never contain separators. It will be
414 unquoted.
416 Notes
417 -----
418 Equivalent to `os.path.split` where head preserves the URI
419 components.
420 """
421 head, tail = self._pathModule.split(self.path)
422 headuri = self._uri._replace(path=head)
424 # The file part should never include quoted metacharacters
425 tail = urllib.parse.unquote(tail)
427 # Schemeless is special in that it can be a relative path
428 # We need to ensure that it stays that way. All other URIs will
429 # be absolute already.
430 forceAbsolute = self._pathModule.isabs(self.path)
431 return ResourcePath(headuri, forceDirectory=True, forceAbsolute=forceAbsolute), tail
433 def basename(self) -> str:
434 """Return the base name, last element of path, of the URI.
436 Returns
437 -------
438 tail : `str`
439 Last part of the path attribute. Trail will be empty if path ends
440 on a separator.
442 Notes
443 -----
444 If URI ends on a slash returns an empty string. This is the second
445 element returned by `split()`.
447 Equivalent of `os.path.basename`.
448 """
449 return self.split()[1]
451 def dirname(self) -> ResourcePath:
452 """Return the directory component of the path as a new `ResourcePath`.
454 Returns
455 -------
456 head : `ResourcePath`
457 Everything except the tail of path attribute, expanded and
458 normalized as per ResourcePath rules.
460 Notes
461 -----
462 Equivalent of `os.path.dirname`.
463 """
464 return self.split()[0]
466 def parent(self) -> ResourcePath:
467 """Return a `ResourcePath` of the parent directory.
469 Returns
470 -------
471 head : `ResourcePath`
472 Everything except the tail of path attribute, expanded and
473 normalized as per `ResourcePath` rules.
475 Notes
476 -----
477 For a file-like URI this will be the same as calling `dirname()`.
478 """
479 # When self is file-like, return self.dirname()
480 if not self.dirLike:
481 return self.dirname()
482 # When self is dir-like, return its parent directory,
483 # regardless of the presence of a trailing separator
484 originalPath = self._pathLib(self.path)
485 parentPath = originalPath.parent
486 return self.replace(path=str(parentPath), forceDirectory=True)
488 def replace(self, forceDirectory: bool = False, isTemporary: bool = False, **kwargs: Any) -> ResourcePath:
489 """Return new `ResourcePath` with specified components replaced.
491 Parameters
492 ----------
493 forceDirectory : `bool`, optional
494 Parameter passed to ResourcePath constructor to force this
495 new URI to be dir-like.
496 isTemporary : `bool`, optional
497 Indicate that the resulting URI is temporary resource.
498 **kwargs
499 Components of a `urllib.parse.ParseResult` that should be
500 modified for the newly-created `ResourcePath`.
502 Returns
503 -------
504 new : `ResourcePath`
505 New `ResourcePath` object with updated values.
507 Notes
508 -----
509 Does not, for now, allow a change in URI scheme.
510 """
511 # Disallow a change in scheme
512 if "scheme" in kwargs:
513 raise ValueError(f"Can not use replace() method to change URI scheme for {self}")
514 return self.__class__(
515 self._uri._replace(**kwargs), forceDirectory=forceDirectory, isTemporary=isTemporary
516 )
518 def updatedFile(self, newfile: str) -> ResourcePath:
519 """Return new URI with an updated final component of the path.
521 Parameters
522 ----------
523 newfile : `str`
524 File name with no path component.
526 Returns
527 -------
528 updated : `ResourcePath`
530 Notes
531 -----
532 Forces the ResourcePath.dirLike attribute to be false. The new file
533 path will be quoted if necessary.
534 """
535 if self.quotePaths:
536 newfile = urllib.parse.quote(newfile)
537 dir, _ = self._pathModule.split(self.path)
538 newpath = self._pathModule.join(dir, newfile)
540 updated = self.replace(path=newpath)
541 updated.dirLike = False
542 return updated
544 def updatedExtension(self, ext: str | None) -> ResourcePath:
545 """Return a new `ResourcePath` with updated file extension.
547 All file extensions are replaced.
549 Parameters
550 ----------
551 ext : `str` or `None`
552 New extension. If an empty string is given any extension will
553 be removed. If `None` is given there will be no change.
555 Returns
556 -------
557 updated : `ResourcePath`
558 URI with the specified extension. Can return itself if
559 no extension was specified.
560 """
561 if ext is None:
562 return self
564 # Get the extension
565 current = self.getExtension()
567 # Nothing to do if the extension already matches
568 if current == ext:
569 return self
571 # Remove the current extension from the path
572 # .fits.gz counts as one extension do not use os.path.splitext
573 path = self.path
574 if current:
575 path = path[: -len(current)]
577 # Ensure that we have a leading "." on file extension (and we do not
578 # try to modify the empty string)
579 if ext and not ext.startswith("."):
580 ext = "." + ext
582 return self.replace(path=path + ext)
584 def getExtension(self) -> str:
585 """Return the file extension(s) associated with this URI path.
587 Returns
588 -------
589 ext : `str`
590 The file extension (including the ``.``). Can be empty string
591 if there is no file extension. Usually returns only the last
592 file extension unless there is a special extension modifier
593 indicating file compression, in which case the combined
594 extension (e.g. ``.fits.gz``) will be returned.
595 """
596 special = {".gz", ".bz2", ".xz", ".fz"}
598 # Get the file part of the path so as not to be confused by
599 # "." in directory names.
600 basename = self.basename()
601 extensions = self._pathLib(basename).suffixes
603 if not extensions:
604 return ""
606 ext = extensions.pop()
608 # Multiple extensions, decide whether to include the final two
609 if extensions and ext in special:
610 ext = f"{extensions[-1]}{ext}"
612 return ext
614 def join(
615 self, path: str | ResourcePath, isTemporary: bool | None = None, forceDirectory: bool = False
616 ) -> ResourcePath:
617 """Return new `ResourcePath` with additional path components.
619 Parameters
620 ----------
621 path : `str`, `ResourcePath`
622 Additional file components to append to the current URI. Assumed
623 to include a file at the end. Will be quoted depending on the
624 associated URI scheme. If the path looks like a URI with a scheme
625 referring to an absolute location, it will be returned
626 directly (matching the behavior of `os.path.join`). It can
627 also be a `ResourcePath`.
628 isTemporary : `bool`, optional
629 Indicate that the resulting URI represents a temporary resource.
630 Default is ``self.isTemporary``.
631 forceDirectory : `bool`, optional
632 If `True` forces the URI to end with a separator, otherwise given
633 URI is interpreted as is.
635 Returns
636 -------
637 new : `ResourcePath`
638 New URI with any file at the end replaced with the new path
639 components.
641 Notes
642 -----
643 Schemeless URIs assume local path separator but all other URIs assume
644 POSIX separator if the supplied path has directory structure. It
645 may be this never becomes a problem but datastore templates assume
646 POSIX separator is being used.
648 If an absolute `ResourcePath` is given for ``path`` is is assumed that
649 this should be returned directly. Giving a ``path`` of an absolute
650 scheme-less URI is not allowed for safety reasons as it may indicate
651 a mistake in the calling code.
653 Raises
654 ------
655 ValueError
656 Raised if the ``path`` is an absolute scheme-less URI. In that
657 situation it is unclear whether the intent is to return a
658 ``file`` URI or it was a mistake and a relative scheme-less URI
659 was meant.
660 RuntimeError
661 Raised if this attempts to join a temporary URI to a non-temporary
662 URI.
663 """
664 if isTemporary is None:
665 isTemporary = self.isTemporary
666 elif not isTemporary and self.isTemporary:
667 raise RuntimeError("Cannot join temporary URI to non-temporary URI.")
668 # If we have a full URI in path we will use it directly
669 # but without forcing to absolute so that we can trap the
670 # expected option of relative path.
671 path_uri = ResourcePath(
672 path, forceAbsolute=False, forceDirectory=forceDirectory, isTemporary=isTemporary
673 )
674 if path_uri.scheme:
675 # Check for scheme so can distinguish explicit URIs from
676 # absolute scheme-less URIs.
677 return path_uri
679 if path_uri.isabs():
680 # Absolute scheme-less path.
681 raise ValueError(f"Can not join absolute scheme-less {path_uri!r} to another URI.")
683 # If this was originally a ResourcePath extract the unquoted path from
684 # it. Otherwise we use the string we were given to allow "#" to appear
685 # in the filename if given as a plain string.
686 if not isinstance(path, str):
687 path = path_uri.unquoted_path
689 new = self.dirname() # By definition a directory URI
691 # new should be asked about quoting, not self, since dirname can
692 # change the URI scheme for schemeless -> file
693 if new.quotePaths:
694 path = urllib.parse.quote(path)
696 newpath = self._pathModule.normpath(self._pathModule.join(new.path, path))
698 # normpath can strip trailing / so we force directory if the supplied
699 # path ended with a /
700 return new.replace(
701 path=newpath,
702 forceDirectory=(forceDirectory or path.endswith(self._pathModule.sep)),
703 isTemporary=isTemporary,
704 )
706 def relative_to(self, other: ResourcePath) -> str | None:
707 """Return the relative path from this URI to the other URI.
709 Parameters
710 ----------
711 other : `ResourcePath`
712 URI to use to calculate the relative path. Must be a parent
713 of this URI.
715 Returns
716 -------
717 subpath : `str`
718 The sub path of this URI relative to the supplied other URI.
719 Returns `None` if there is no parent child relationship.
720 Scheme and netloc must match.
721 """
722 # Scheme-less absolute other is treated as if it's a file scheme.
723 # Scheme-less relative other can only return non-None if self
724 # is also scheme-less relative and that is handled specifically
725 # in a subclass.
726 if not other.scheme and other.isabs():
727 other = other.abspath()
729 # Scheme-less self is handled elsewhere.
730 if self.scheme != other.scheme:
731 return None
732 if self.netloc != other.netloc:
733 # Special case for localhost vs empty string.
734 # There can be many variants of localhost.
735 local_netlocs = {"", "localhost", "localhost.localdomain", "127.0.0.1"}
736 if not {self.netloc, other.netloc}.issubset(local_netlocs):
737 return None
739 enclosed_path = self._pathLib(self.relativeToPathRoot)
740 parent_path = other.relativeToPathRoot
741 subpath: str | None
742 try:
743 subpath = str(enclosed_path.relative_to(parent_path))
744 except ValueError:
745 subpath = None
746 else:
747 subpath = urllib.parse.unquote(subpath)
748 return subpath
750 def exists(self) -> bool:
751 """Indicate that the resource is available.
753 Returns
754 -------
755 exists : `bool`
756 `True` if the resource exists.
757 """
758 raise NotImplementedError()
760 @classmethod
761 def mexists(cls, uris: Iterable[ResourcePath]) -> dict[ResourcePath, bool]:
762 """Check for existence of multiple URIs at once.
764 Parameters
765 ----------
766 uris : iterable of `ResourcePath`
767 The URIs to test.
769 Returns
770 -------
771 existence : `dict` of [`ResourcePath`, `bool`]
772 Mapping of original URI to boolean indicating existence.
773 """
774 exists_executor = concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS)
775 future_exists = {exists_executor.submit(uri.exists): uri for uri in uris}
777 results: dict[ResourcePath, bool] = {}
778 for future in concurrent.futures.as_completed(future_exists):
779 uri = future_exists[future]
780 try:
781 exists = future.result()
782 except Exception:
783 exists = False
784 results[uri] = exists
785 return results
787 def remove(self) -> None:
788 """Remove the resource."""
789 raise NotImplementedError()
791 def isabs(self) -> bool:
792 """Indicate that the resource is fully specified.
794 For non-schemeless URIs this is always true.
796 Returns
797 -------
798 isabs : `bool`
799 `True` in all cases except schemeless URI.
800 """
801 return True
803 def abspath(self) -> ResourcePath:
804 """Return URI using an absolute path.
806 Returns
807 -------
808 abs : `ResourcePath`
809 Absolute URI. For non-schemeless URIs this always returns itself.
810 Schemeless URIs are upgraded to file URIs.
811 """
812 return self
814 def _as_local(self) -> tuple[str, bool]:
815 """Return the location of the (possibly remote) resource as local file.
817 This is a helper function for `as_local` context manager.
819 Returns
820 -------
821 path : `str`
822 If this is a remote resource, it will be a copy of the resource
823 on the local file system, probably in a temporary directory.
824 For a local resource this should be the actual path to the
825 resource.
826 is_temporary : `bool`
827 Indicates if the local path is a temporary file or not.
828 """
829 raise NotImplementedError()
831 @contextlib.contextmanager
832 def as_local(self) -> Iterator[ResourcePath]:
833 """Return the location of the (possibly remote) resource as local file.
835 Yields
836 ------
837 local : `ResourcePath`
838 If this is a remote resource, it will be a copy of the resource
839 on the local file system, probably in a temporary directory.
840 For a local resource this should be the actual path to the
841 resource.
843 Notes
844 -----
845 The context manager will automatically delete any local temporary
846 file.
848 Examples
849 --------
850 Should be used as a context manager:
852 .. code-block:: py
854 with uri.as_local() as local:
855 ospath = local.ospath
856 """
857 if self.dirLike:
858 raise IsADirectoryError(f"Directory-like URI {self} cannot be fetched as local.")
859 local_src, is_temporary = self._as_local()
860 local_uri = ResourcePath(local_src, isTemporary=is_temporary)
862 try:
863 yield local_uri
864 finally:
865 # The caller might have relocated the temporary file.
866 # Do not ever delete if the temporary matches self
867 # (since it may have been that a temporary file was made local
868 # but already was local).
869 if self != local_uri and is_temporary and local_uri.exists():
870 local_uri.remove()
872 @classmethod
873 @contextlib.contextmanager
874 def temporary_uri(
875 cls, prefix: ResourcePath | None = None, suffix: str | None = None
876 ) -> Iterator[ResourcePath]:
877 """Create a temporary file-like URI.
879 Parameters
880 ----------
881 prefix : `ResourcePath`, optional
882 Prefix to use. Without this the path will be formed as a local
883 file URI in a temporary directory. Ensuring that the prefix
884 location exists is the responsibility of the caller.
885 suffix : `str`, optional
886 A file suffix to be used. The ``.`` should be included in this
887 suffix.
889 Yields
890 ------
891 uri : `ResourcePath`
892 The temporary URI. Will be removed when the context is completed.
893 """
894 use_tempdir = False
895 if prefix is None:
896 prefix = ResourcePath(tempfile.mkdtemp(), forceDirectory=True, isTemporary=True)
897 # Record that we need to delete this directory. Can not rely
898 # on isTemporary flag since an external prefix may have that
899 # set as well.
900 use_tempdir = True
902 # Need to create a randomized file name. For consistency do not
903 # use mkstemp for local and something else for remote. Additionally
904 # this method does not create the file to prevent name clashes.
905 characters = "abcdefghijklmnopqrstuvwxyz0123456789_"
906 rng = Random()
907 tempname = "".join(rng.choice(characters) for _ in range(16))
908 if suffix:
909 tempname += suffix
910 temporary_uri = prefix.join(tempname, isTemporary=True)
911 if temporary_uri.dirLike:
912 # If we had a safe way to clean up a remote temporary directory, we
913 # could support this.
914 raise NotImplementedError("temporary_uri cannot be used to create a temporary directory.")
915 try:
916 yield temporary_uri
917 finally:
918 if use_tempdir:
919 shutil.rmtree(prefix.ospath, ignore_errors=True)
920 else:
921 try:
922 # It's okay if this does not work because the user removed
923 # the file.
924 temporary_uri.remove()
925 except FileNotFoundError:
926 pass
928 def read(self, size: int = -1) -> bytes:
929 """Open the resource and return the contents in bytes.
931 Parameters
932 ----------
933 size : `int`, optional
934 The number of bytes to read. Negative or omitted indicates
935 that all data should be read.
936 """
937 raise NotImplementedError()
939 def write(self, data: bytes, overwrite: bool = True) -> None:
940 """Write the supplied bytes to the new resource.
942 Parameters
943 ----------
944 data : `bytes`
945 The bytes to write to the resource. The entire contents of the
946 resource will be replaced.
947 overwrite : `bool`, optional
948 If `True` the resource will be overwritten if it exists. Otherwise
949 the write will fail.
950 """
951 raise NotImplementedError()
953 def mkdir(self) -> None:
954 """For a dir-like URI, create the directory resource if needed."""
955 raise NotImplementedError()
957 def isdir(self) -> bool:
958 """Return True if this URI looks like a directory, else False."""
959 return self.dirLike
961 def size(self) -> int:
962 """For non-dir-like URI, return the size of the resource.
964 Returns
965 -------
966 sz : `int`
967 The size in bytes of the resource associated with this URI.
968 Returns 0 if dir-like.
969 """
970 raise NotImplementedError()
972 def __str__(self) -> str:
973 """Convert the URI to its native string form."""
974 return self.geturl()
976 def __repr__(self) -> str:
977 """Return string representation suitable for evaluation."""
978 return f'ResourcePath("{self.geturl()}")'
980 def __eq__(self, other: Any) -> bool:
981 """Compare supplied object with this `ResourcePath`."""
982 if not isinstance(other, ResourcePath):
983 return NotImplemented
984 return self.geturl() == other.geturl()
986 def __hash__(self) -> int:
987 """Return hash of this object."""
988 return hash(str(self))
990 def __lt__(self, other: ResourcePath) -> bool:
991 return self.geturl() < other.geturl()
993 def __le__(self, other: ResourcePath) -> bool:
994 return self.geturl() <= other.geturl()
996 def __gt__(self, other: ResourcePath) -> bool:
997 return self.geturl() > other.geturl()
999 def __ge__(self, other: ResourcePath) -> bool:
1000 return self.geturl() >= other.geturl()
1002 def __copy__(self) -> ResourcePath:
1003 """Copy constructor.
1005 Object is immutable so copy can return itself.
1006 """
1007 # Implement here because the __new__ method confuses things
1008 return self
1010 def __deepcopy__(self, memo: Any) -> ResourcePath:
1011 """Deepcopy the object.
1013 Object is immutable so copy can return itself.
1014 """
1015 # Implement here because the __new__ method confuses things
1016 return self
1018 def __getnewargs__(self) -> tuple:
1019 """Support pickling."""
1020 return (str(self),)
1022 @classmethod
1023 def _fixDirectorySep(
1024 cls, parsed: urllib.parse.ParseResult, forceDirectory: bool = False
1025 ) -> tuple[urllib.parse.ParseResult, bool]:
1026 """Ensure that a path separator is present on directory paths.
1028 Parameters
1029 ----------
1030 parsed : `~urllib.parse.ParseResult`
1031 The result from parsing a URI using `urllib.parse`.
1032 forceDirectory : `bool`, optional
1033 If `True` forces the URI to end with a separator, otherwise given
1034 URI is interpreted as is. Specifying that the URI is conceptually
1035 equivalent to a directory can break some ambiguities when
1036 interpreting the last element of a path.
1038 Returns
1039 -------
1040 modified : `~urllib.parse.ParseResult`
1041 Update result if a URI is being handled.
1042 dirLike : `bool`
1043 `True` if given parsed URI has a trailing separator or
1044 forceDirectory is True. Otherwise `False`.
1045 """
1046 # assume we are not dealing with a directory like URI
1047 dirLike = False
1049 # Directory separator
1050 sep = cls._pathModule.sep
1052 # URI is dir-like if explicitly stated or if it ends on a separator
1053 endsOnSep = parsed.path.endswith(sep)
1054 if forceDirectory or endsOnSep:
1055 dirLike = True
1056 # only add the separator if it's not already there
1057 if not endsOnSep:
1058 parsed = parsed._replace(path=parsed.path + sep)
1060 return parsed, dirLike
1062 @classmethod
1063 def _fixupPathUri(
1064 cls,
1065 parsed: urllib.parse.ParseResult,
1066 root: ResourcePath | None = None,
1067 forceAbsolute: bool = False,
1068 forceDirectory: bool = False,
1069 ) -> tuple[urllib.parse.ParseResult, bool]:
1070 """Correct any issues with the supplied URI.
1072 Parameters
1073 ----------
1074 parsed : `~urllib.parse.ParseResult`
1075 The result from parsing a URI using `urllib.parse`.
1076 root : `ResourcePath`, ignored
1077 Not used by the this implementation since all URIs are
1078 absolute except for those representing the local file system.
1079 forceAbsolute : `bool`, ignored.
1080 Not used by this implementation. URIs are generally always
1081 absolute.
1082 forceDirectory : `bool`, optional
1083 If `True` forces the URI to end with a separator, otherwise given
1084 URI is interpreted as is. Specifying that the URI is conceptually
1085 equivalent to a directory can break some ambiguities when
1086 interpreting the last element of a path.
1088 Returns
1089 -------
1090 modified : `~urllib.parse.ParseResult`
1091 Update result if a URI is being handled.
1092 dirLike : `bool`
1093 `True` if given parsed URI has a trailing separator or
1094 forceDirectory is True. Otherwise `False`.
1096 Notes
1097 -----
1098 Relative paths are explicitly not supported by RFC8089 but `urllib`
1099 does accept URIs of the form ``file:relative/path.ext``. They need
1100 to be turned into absolute paths before they can be used. This is
1101 always done regardless of the ``forceAbsolute`` parameter.
1103 AWS S3 differentiates between keys with trailing POSIX separators (i.e
1104 `/dir` and `/dir/`) whereas POSIX does not neccessarily.
1106 Scheme-less paths are normalized.
1107 """
1108 return cls._fixDirectorySep(parsed, forceDirectory)
1110 def transfer_from(
1111 self,
1112 src: ResourcePath,
1113 transfer: str,
1114 overwrite: bool = False,
1115 transaction: TransactionProtocol | None = None,
1116 ) -> None:
1117 """Transfer to this URI from another.
1119 Parameters
1120 ----------
1121 src : `ResourcePath`
1122 Source URI.
1123 transfer : `str`
1124 Mode to use for transferring the resource. Generically there are
1125 many standard options: copy, link, symlink, hardlink, relsymlink.
1126 Not all URIs support all modes.
1127 overwrite : `bool`, optional
1128 Allow an existing file to be overwritten. Defaults to `False`.
1129 transaction : `~lsst.resources.utils.TransactionProtocol`, optional
1130 A transaction object that can (depending on implementation)
1131 rollback transfers on error. Not guaranteed to be implemented.
1133 Notes
1134 -----
1135 Conceptually this is hard to scale as the number of URI schemes
1136 grow. The destination URI is more important than the source URI
1137 since that is where all the transfer modes are relevant (with the
1138 complication that "move" deletes the source).
1140 Local file to local file is the fundamental use case but every
1141 other scheme has to support "copy" to local file (with implicit
1142 support for "move") and copy from local file.
1143 All the "link" options tend to be specific to local file systems.
1145 "move" is a "copy" where the remote resource is deleted at the end.
1146 Whether this works depends on the source URI rather than the
1147 destination URI. Reverting a move on transaction rollback is
1148 expected to be problematic if a remote resource was involved.
1149 """
1150 raise NotImplementedError(f"No transfer modes supported by URI scheme {self.scheme}")
1152 def walk(
1153 self, file_filter: str | re.Pattern | None = None
1154 ) -> Iterator[list | tuple[ResourcePath, list[str], list[str]]]:
1155 """Walk the directory tree returning matching files and directories.
1157 Parameters
1158 ----------
1159 file_filter : `str` or `re.Pattern`, optional
1160 Regex to filter out files from the list before it is returned.
1162 Yields
1163 ------
1164 dirpath : `ResourcePath`
1165 Current directory being examined.
1166 dirnames : `list` of `str`
1167 Names of subdirectories within dirpath.
1168 filenames : `list` of `str`
1169 Names of all the files within dirpath.
1170 """
1171 raise NotImplementedError()
1173 @overload
1174 @classmethod
1175 def findFileResources(
1176 cls,
1177 candidates: Iterable[ResourcePathExpression],
1178 file_filter: str | re.Pattern | None,
1179 grouped: Literal[True],
1180 ) -> Iterator[Iterator[ResourcePath]]:
1181 ...
1183 @overload
1184 @classmethod
1185 def findFileResources(
1186 cls,
1187 candidates: Iterable[ResourcePathExpression],
1188 *,
1189 grouped: Literal[True],
1190 ) -> Iterator[Iterator[ResourcePath]]:
1191 ...
1193 @overload
1194 @classmethod
1195 def findFileResources(
1196 cls,
1197 candidates: Iterable[ResourcePathExpression],
1198 file_filter: str | re.Pattern | None = None,
1199 grouped: Literal[False] = False,
1200 ) -> Iterator[ResourcePath]:
1201 ...
1203 @classmethod
1204 def findFileResources(
1205 cls,
1206 candidates: Iterable[ResourcePathExpression],
1207 file_filter: str | re.Pattern | None = None,
1208 grouped: bool = False,
1209 ) -> Iterator[ResourcePath | Iterator[ResourcePath]]:
1210 """Get all the files from a list of values.
1212 Parameters
1213 ----------
1214 candidates : iterable [`str` or `ResourcePath`]
1215 The files to return and directories in which to look for files to
1216 return.
1217 file_filter : `str` or `re.Pattern`, optional
1218 The regex to use when searching for files within directories.
1219 By default returns all the found files.
1220 grouped : `bool`, optional
1221 If `True` the results will be grouped by directory and each
1222 yielded value will be an iterator over URIs. If `False` each
1223 URI will be returned separately.
1225 Yields
1226 ------
1227 found_file: `ResourcePath`
1228 The passed-in URIs and URIs found in passed-in directories.
1229 If grouping is enabled, each of the yielded values will be an
1230 iterator yielding members of the group. Files given explicitly
1231 will be returned as a single group at the end.
1233 Notes
1234 -----
1235 If a value is a file it is yielded immediately without checking that it
1236 exists. If a value is a directory, all the files in the directory
1237 (recursively) that match the regex will be yielded in turn.
1238 """
1239 fileRegex = None if file_filter is None else re.compile(file_filter)
1241 singles = []
1243 # Find all the files of interest
1244 for location in candidates:
1245 uri = ResourcePath(location)
1246 if uri.isdir():
1247 for found in uri.walk(fileRegex):
1248 if not found:
1249 # This means the uri does not exist and by
1250 # convention we ignore it
1251 continue
1252 root, dirs, files = found
1253 if not files:
1254 continue
1255 if grouped:
1256 yield (root.join(name) for name in files)
1257 else:
1258 for name in files:
1259 yield root.join(name)
1260 else:
1261 if grouped:
1262 singles.append(uri)
1263 else:
1264 yield uri
1266 # Finally, return any explicitly given files in one group
1267 if grouped and singles:
1268 yield iter(singles)
1270 @contextlib.contextmanager
1271 def open(
1272 self,
1273 mode: str = "r",
1274 *,
1275 encoding: str | None = None,
1276 prefer_file_temporary: bool = False,
1277 ) -> Iterator[ResourceHandleProtocol]:
1278 """Return a context manager that wraps an object that behaves like an
1279 open file at the location of the URI.
1281 Parameters
1282 ----------
1283 mode : `str`
1284 String indicating the mode in which to open the file. Values are
1285 the same as those accepted by `open`, though intrinsically
1286 read-only URI types may only support read modes, and
1287 `io.IOBase.seekable` is not guaranteed to be `True` on the returned
1288 object.
1289 encoding : `str`, optional
1290 Unicode encoding for text IO; ignored for binary IO. Defaults to
1291 ``locale.getpreferredencoding(False)``, just as `open`
1292 does.
1293 prefer_file_temporary : `bool`, optional
1294 If `True`, for implementations that require transfers from a remote
1295 system to temporary local storage and/or back, use a temporary file
1296 instead of an in-memory buffer; this is generally slower, but it
1297 may be necessary to avoid excessive memory usage by large files.
1298 Ignored by implementations that do not require a temporary.
1300 Yields
1301 ------
1302 cm : `~contextlib.AbstractContextManager`
1303 A context manager that wraps a `ResourceHandleProtocol` file-like
1304 object.
1306 Notes
1307 -----
1308 The default implementation of this method uses a local temporary buffer
1309 (in-memory or file, depending on ``prefer_file_temporary``) with calls
1310 to `read`, `write`, `as_local`, and `transfer_from` as necessary to
1311 read and write from/to remote systems. Remote writes thus occur only
1312 when the context manager is exited. `ResourcePath` implementations
1313 that can return a more efficient native buffer should do so whenever
1314 possible (as is guaranteed for local files). `ResourcePath`
1315 implementations for which `as_local` does not return a temporary are
1316 required to reimplement `open`, though they may delegate to `super`
1317 when ``prefer_file_temporary`` is `False`.
1318 """
1319 if self.dirLike:
1320 raise IsADirectoryError(f"Directory-like URI {self} cannot be opened.")
1321 if "x" in mode and self.exists():
1322 raise FileExistsError(f"File at {self} already exists.")
1323 if prefer_file_temporary:
1324 if "r" in mode or "a" in mode:
1325 local_cm = self.as_local()
1326 else:
1327 local_cm = self.temporary_uri(suffix=self.getExtension())
1328 with local_cm as local_uri:
1329 assert local_uri.isTemporary, (
1330 "ResourcePath implementations for which as_local is not "
1331 "a temporary must reimplement `open`."
1332 )
1333 with open(local_uri.ospath, mode=mode, encoding=encoding) as file_buffer:
1334 if "a" in mode:
1335 file_buffer.seek(0, io.SEEK_END)
1336 yield file_buffer
1337 if "r" not in mode or "+" in mode:
1338 self.transfer_from(local_uri, transfer="copy", overwrite=("x" not in mode))
1339 else:
1340 with self._openImpl(mode, encoding=encoding) as handle:
1341 yield handle
1343 @contextlib.contextmanager
1344 def _openImpl(self, mode: str = "r", *, encoding: str | None = None) -> Iterator[ResourceHandleProtocol]:
1345 """Implement opening of a resource handle.
1347 This private method may be overridden by specific `ResourcePath`
1348 implementations to provide a customized handle like interface.
1350 Parameters
1351 ----------
1352 mode : `str`
1353 The mode the handle should be opened with
1354 encoding : `str`, optional
1355 The byte encoding of any binary text
1357 Yields
1358 ------
1359 handle : `~._resourceHandles.BaseResourceHandle`
1360 A handle that conforms to the
1361 `~._resourceHandles.BaseResourceHandle` interface
1363 Notes
1364 -----
1365 The base implementation of a file handle reads in a files entire
1366 contents into a buffer for manipulation, and then writes it back out
1367 upon close. Subclasses of this class may offer more fine grained
1368 control.
1369 """
1370 if "r" in mode or "a" in mode:
1371 in_bytes = self.read()
1372 else:
1373 in_bytes = b""
1374 if "b" in mode:
1375 bytes_buffer = io.BytesIO(in_bytes)
1376 if "a" in mode:
1377 bytes_buffer.seek(0, io.SEEK_END)
1378 yield bytes_buffer
1379 out_bytes = bytes_buffer.getvalue()
1380 else:
1381 if encoding is None:
1382 encoding = locale.getpreferredencoding(False)
1383 str_buffer = io.StringIO(in_bytes.decode(encoding))
1384 if "a" in mode:
1385 str_buffer.seek(0, io.SEEK_END)
1386 yield str_buffer
1387 out_bytes = str_buffer.getvalue().encode(encoding)
1388 if "r" not in mode or "+" in mode:
1389 self.write(out_bytes, overwrite=("x" not in mode))
1392ResourcePathExpression = str | urllib.parse.ParseResult | ResourcePath | Path
1393"""Type-annotation alias for objects that can be coerced to ResourcePath.
1394"""