Coverage for python/lsst/resources/_resourcePath.py: 22%
406 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-02-04 02:38 -0800
« prev ^ index » next coverage.py v6.5.0, created at 2023-02-04 02:38 -0800
1# This file is part of lsst-resources.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# Use of this source code is governed by a 3-clause BSD-style
10# license that can be found in the LICENSE file.
12from __future__ import annotations
14import concurrent.futures
15import contextlib
16import copy
17import io
18import locale
19import logging
20import os
21import posixpath
22import re
23import shutil
24import tempfile
25import urllib.parse
26from pathlib import Path, PurePath, PurePosixPath
27from random import Random
29__all__ = ("ResourcePath", "ResourcePathExpression")
31from typing import (
32 TYPE_CHECKING,
33 Any,
34 Dict,
35 Iterable,
36 Iterator,
37 List,
38 Literal,
39 Optional,
40 Tuple,
41 Type,
42 Union,
43 overload,
44)
46from ._resourceHandles._baseResourceHandle import ResourceHandleProtocol
48if TYPE_CHECKING: 48 ↛ 49line 48 didn't jump to line 49, because the condition on line 48 was never true
49 from .utils import TransactionProtocol
52log = logging.getLogger(__name__)
54# Regex for looking for URI escapes
55ESCAPES_RE = re.compile(r"%[A-F0-9]{2}")
57# Precomputed escaped hash
58ESCAPED_HASH = urllib.parse.quote("#")
60# Maximum number of worker threads for parallelized operations.
61# If greater than 10, be aware that this number has to be consistent
62# with connection pool sizing (for example in urllib3).
63MAX_WORKERS = 10
66ResourcePathExpression = Union[str, urllib.parse.ParseResult, "ResourcePath", Path]
67"""Type-annotation alias for objects that can be coerced to ResourcePath.
68"""
71class ResourcePath:
72 """Convenience wrapper around URI parsers.
74 Provides access to URI components and can convert file
75 paths into absolute path URIs. Scheme-less URIs are treated as if
76 they are local file system paths and are converted to absolute URIs.
78 A specialist subclass is created for each supported URI scheme.
80 Parameters
81 ----------
82 uri : `str`, `Path`, `urllib.parse.ParseResult`, or `ResourcePath`.
83 URI in string form. Can be scheme-less if referring to a local
84 filesystem path.
85 root : `str` or `ResourcePath`, optional
86 When fixing up a relative path in a ``file`` scheme or if scheme-less,
87 use this as the root. Must be absolute. If `None` the current
88 working directory will be used. Can be a file URI.
89 forceAbsolute : `bool`, optional
90 If `True`, scheme-less relative URI will be converted to an absolute
91 path using a ``file`` scheme. If `False` scheme-less URI will remain
92 scheme-less and will not be updated to ``file`` or absolute path.
93 forceDirectory: `bool`, optional
94 If `True` forces the URI to end with a separator, otherwise given URI
95 is interpreted as is.
96 isTemporary : `bool`, optional
97 If `True` indicates that this URI points to a temporary resource.
98 The default is `False`, unless ``uri`` is already a `ResourcePath`
99 instance and ``uri.isTemporary is True``.
100 """
102 _pathLib: Type[PurePath] = PurePosixPath
103 """Path library to use for this scheme."""
105 _pathModule = posixpath
106 """Path module to use for this scheme."""
108 transferModes: Tuple[str, ...] = ("copy", "auto", "move")
109 """Transfer modes supported by this implementation.
111 Move is special in that it is generally a copy followed by an unlink.
112 Whether that unlink works depends critically on whether the source URI
113 implements unlink. If it does not the move will be reported as a failure.
114 """
116 transferDefault: str = "copy"
117 """Default mode to use for transferring if ``auto`` is specified."""
119 quotePaths = True
120 """True if path-like elements modifying a URI should be quoted.
122 All non-schemeless URIs have to internally use quoted paths. Therefore
123 if a new file name is given (e.g. to updatedFile or join) a decision must
124 be made whether to quote it to be consistent.
125 """
127 isLocal = False
128 """If `True` this URI refers to a local file."""
130 # This is not an ABC with abstract methods because the __new__ being
131 # a factory confuses mypy such that it assumes that every constructor
132 # returns a ResourcePath and then determines that all the abstract methods
133 # are still abstract. If they are not marked abstract but just raise
134 # mypy is fine with it.
136 # mypy is confused without these
137 _uri: urllib.parse.ParseResult
138 isTemporary: bool
139 dirLike: bool
141 def __new__(
142 cls,
143 uri: ResourcePathExpression,
144 root: Optional[Union[str, ResourcePath]] = None,
145 forceAbsolute: bool = True,
146 forceDirectory: bool = False,
147 isTemporary: Optional[bool] = None,
148 ) -> ResourcePath:
149 """Create and return new specialist ResourcePath subclass."""
150 parsed: urllib.parse.ParseResult
151 dirLike: bool = False
152 subclass: Optional[Type[ResourcePath]] = None
154 if isinstance(uri, os.PathLike):
155 uri = str(uri)
157 # Record if we need to post process the URI components
158 # or if the instance is already fully configured
159 if isinstance(uri, str):
160 # Since local file names can have special characters in them
161 # we need to quote them for the parser but we can unquote
162 # later. Assume that all other URI schemes are quoted.
163 # Since sometimes people write file:/a/b and not file:///a/b
164 # we should not quote in the explicit case of file:
165 if "://" not in uri and not uri.startswith("file:"):
166 if ESCAPES_RE.search(uri):
167 log.warning("Possible double encoding of %s", uri)
168 else:
169 uri = urllib.parse.quote(uri)
170 # Special case hash since we must support fragments
171 # even in schemeless URIs -- although try to only replace
172 # them in file part and not directory part
173 if ESCAPED_HASH in uri:
174 dirpos = uri.rfind("/")
175 # Do replacement after this /
176 uri = uri[: dirpos + 1] + uri[dirpos + 1 :].replace(ESCAPED_HASH, "#")
178 parsed = urllib.parse.urlparse(uri)
179 elif isinstance(uri, urllib.parse.ParseResult):
180 parsed = copy.copy(uri)
181 # If we are being instantiated with a subclass, rather than
182 # ResourcePath, ensure that that subclass is used directly.
183 # This could lead to inconsistencies if this constructor
184 # is used externally outside of the ResourcePath.replace() method.
185 # S3ResourcePath(urllib.parse.urlparse("file://a/b.txt"))
186 # will be a problem.
187 # This is needed to prevent a schemeless absolute URI become
188 # a file URI unexpectedly when calling updatedFile or
189 # updatedExtension
190 if cls is not ResourcePath:
191 parsed, dirLike = cls._fixDirectorySep(parsed, forceDirectory)
192 subclass = cls
194 elif isinstance(uri, ResourcePath):
195 # Since ResourcePath is immutable we can return the argument
196 # unchanged if it already agrees with forceDirectory, isTemporary,
197 # and forceAbsolute.
198 # We invoke __new__ again with str(self) to add a scheme for
199 # forceAbsolute, but for the others that seems more likely to paper
200 # over logic errors than do something useful, so we just raise.
201 if forceDirectory and not uri.dirLike:
202 raise RuntimeError(
203 f"{uri} is already a file-like ResourcePath; cannot force it to directory."
204 )
205 if isTemporary is not None and isTemporary is not uri.isTemporary:
206 raise RuntimeError(
207 f"{uri} is already a {'temporary' if uri.isTemporary else 'permanent'} "
208 f"ResourcePath; cannot make it {'temporary' if isTemporary else 'permanent'}."
209 )
210 if forceAbsolute and not uri.scheme:
211 return ResourcePath(
212 str(uri),
213 root=root,
214 forceAbsolute=True,
215 forceDirectory=uri.dirLike,
216 isTemporary=uri.isTemporary,
217 )
218 return uri
219 else:
220 raise ValueError(
221 f"Supplied URI must be string, Path, ResourcePath, or ParseResult but got '{uri!r}'"
222 )
224 if subclass is None:
225 # Work out the subclass from the URI scheme
226 if not parsed.scheme:
227 from .schemeless import SchemelessResourcePath
229 subclass = SchemelessResourcePath
230 elif parsed.scheme == "file":
231 from .file import FileResourcePath
233 subclass = FileResourcePath
234 elif parsed.scheme == "s3":
235 from .s3 import S3ResourcePath
237 subclass = S3ResourcePath
238 elif parsed.scheme.startswith("http"):
239 from .http import HttpResourcePath
241 subclass = HttpResourcePath
242 elif parsed.scheme == "gs":
243 from .gs import GSResourcePath
245 subclass = GSResourcePath
246 elif parsed.scheme == "resource":
247 # Rules for scheme names disallow pkg_resource
248 from .packageresource import PackageResourcePath
250 subclass = PackageResourcePath
251 elif parsed.scheme == "mem":
252 # in-memory datastore object
253 from .mem import InMemoryResourcePath
255 subclass = InMemoryResourcePath
256 else:
257 raise NotImplementedError(
258 f"No URI support for scheme: '{parsed.scheme}' in {parsed.geturl()}"
259 )
261 parsed, dirLike = subclass._fixupPathUri(
262 parsed, root=root, forceAbsolute=forceAbsolute, forceDirectory=forceDirectory
263 )
265 # It is possible for the class to change from schemeless
266 # to file so handle that
267 if parsed.scheme == "file":
268 from .file import FileResourcePath
270 subclass = FileResourcePath
272 # Now create an instance of the correct subclass and set the
273 # attributes directly
274 self = object.__new__(subclass)
275 self._uri = parsed
276 self.dirLike = dirLike
277 if isTemporary is None:
278 isTemporary = False
279 self.isTemporary = isTemporary
280 return self
282 @property
283 def scheme(self) -> str:
284 """Return the URI scheme.
286 Notes
287 -----
288 (``://`` is not part of the scheme).
289 """
290 return self._uri.scheme
292 @property
293 def netloc(self) -> str:
294 """Return the URI network location."""
295 return self._uri.netloc
297 @property
298 def path(self) -> str:
299 """Return the path component of the URI."""
300 return self._uri.path
302 @property
303 def unquoted_path(self) -> str:
304 """Return path component of the URI with any URI quoting reversed."""
305 return urllib.parse.unquote(self._uri.path)
307 @property
308 def ospath(self) -> str:
309 """Return the path component of the URI localized to current OS."""
310 raise AttributeError(f"Non-file URI ({self}) has no local OS path.")
312 @property
313 def relativeToPathRoot(self) -> str:
314 """Return path relative to network location.
316 Effectively, this is the path property with posix separator stripped
317 from the left hand side of the path.
319 Always unquotes.
320 """
321 p = self._pathLib(self.path)
322 relToRoot = str(p.relative_to(p.root))
323 if self.dirLike and not relToRoot.endswith("/"):
324 relToRoot += "/"
325 return urllib.parse.unquote(relToRoot)
327 @property
328 def is_root(self) -> bool:
329 """Return whether this URI points to the root of the network location.
331 This means that the path components refers to the top level.
332 """
333 relpath = self.relativeToPathRoot
334 if relpath == "./":
335 return True
336 return False
338 @property
339 def fragment(self) -> str:
340 """Return the fragment component of the URI."""
341 return self._uri.fragment
343 @property
344 def params(self) -> str:
345 """Return any parameters included in the URI."""
346 return self._uri.params
348 @property
349 def query(self) -> str:
350 """Return any query strings included in the URI."""
351 return self._uri.query
353 def geturl(self) -> str:
354 """Return the URI in string form.
356 Returns
357 -------
358 url : `str`
359 String form of URI.
360 """
361 return self._uri.geturl()
363 def root_uri(self) -> ResourcePath:
364 """Return the base root URI.
366 Returns
367 -------
368 uri : `ResourcePath`
369 root URI.
370 """
371 return self.replace(path="", forceDirectory=True)
373 def split(self) -> Tuple[ResourcePath, str]:
374 """Split URI into head and tail.
376 Returns
377 -------
378 head: `ResourcePath`
379 Everything leading up to tail, expanded and normalized as per
380 ResourcePath rules.
381 tail : `str`
382 Last `self.path` component. Tail will be empty if path ends on a
383 separator. Tail will never contain separators. It will be
384 unquoted.
386 Notes
387 -----
388 Equivalent to `os.path.split()` where head preserves the URI
389 components.
390 """
391 head, tail = self._pathModule.split(self.path)
392 headuri = self._uri._replace(path=head)
394 # The file part should never include quoted metacharacters
395 tail = urllib.parse.unquote(tail)
397 # Schemeless is special in that it can be a relative path
398 # We need to ensure that it stays that way. All other URIs will
399 # be absolute already.
400 forceAbsolute = self._pathModule.isabs(self.path)
401 return ResourcePath(headuri, forceDirectory=True, forceAbsolute=forceAbsolute), tail
403 def basename(self) -> str:
404 """Return the base name, last element of path, of the URI.
406 Returns
407 -------
408 tail : `str`
409 Last part of the path attribute. Trail will be empty if path ends
410 on a separator.
412 Notes
413 -----
414 If URI ends on a slash returns an empty string. This is the second
415 element returned by `split()`.
417 Equivalent of `os.path.basename()``.
418 """
419 return self.split()[1]
421 def dirname(self) -> ResourcePath:
422 """Return the directory component of the path as a new `ResourcePath`.
424 Returns
425 -------
426 head : `ResourcePath`
427 Everything except the tail of path attribute, expanded and
428 normalized as per ResourcePath rules.
430 Notes
431 -----
432 Equivalent of `os.path.dirname()`.
433 """
434 return self.split()[0]
436 def parent(self) -> ResourcePath:
437 """Return a `ResourcePath` of the parent directory.
439 Returns
440 -------
441 head : `ResourcePath`
442 Everything except the tail of path attribute, expanded and
443 normalized as per `ResourcePath` rules.
445 Notes
446 -----
447 For a file-like URI this will be the same as calling `dirname()`.
448 """
449 # When self is file-like, return self.dirname()
450 if not self.dirLike:
451 return self.dirname()
452 # When self is dir-like, return its parent directory,
453 # regardless of the presence of a trailing separator
454 originalPath = self._pathLib(self.path)
455 parentPath = originalPath.parent
456 return self.replace(path=str(parentPath), forceDirectory=True)
458 def replace(self, forceDirectory: bool = False, isTemporary: bool = False, **kwargs: Any) -> ResourcePath:
459 """Return new `ResourcePath` with specified components replaced.
461 Parameters
462 ----------
463 forceDirectory : `bool`, optional
464 Parameter passed to ResourcePath constructor to force this
465 new URI to be dir-like.
466 isTemporary : `bool`, optional
467 Indicate that the resulting URI is temporary resource.
468 **kwargs
469 Components of a `urllib.parse.ParseResult` that should be
470 modified for the newly-created `ResourcePath`.
472 Returns
473 -------
474 new : `ResourcePath`
475 New `ResourcePath` object with updated values.
477 Notes
478 -----
479 Does not, for now, allow a change in URI scheme.
480 """
481 # Disallow a change in scheme
482 if "scheme" in kwargs:
483 raise ValueError(f"Can not use replace() method to change URI scheme for {self}")
484 return self.__class__(
485 self._uri._replace(**kwargs), forceDirectory=forceDirectory, isTemporary=isTemporary
486 )
488 def updatedFile(self, newfile: str) -> ResourcePath:
489 """Return new URI with an updated final component of the path.
491 Parameters
492 ----------
493 newfile : `str`
494 File name with no path component.
496 Returns
497 -------
498 updated : `ResourcePath`
500 Notes
501 -----
502 Forces the ResourcePath.dirLike attribute to be false. The new file
503 path will be quoted if necessary.
504 """
505 if self.quotePaths:
506 newfile = urllib.parse.quote(newfile)
507 dir, _ = self._pathModule.split(self.path)
508 newpath = self._pathModule.join(dir, newfile)
510 updated = self.replace(path=newpath)
511 updated.dirLike = False
512 return updated
514 def updatedExtension(self, ext: Optional[str]) -> ResourcePath:
515 """Return a new `ResourcePath` with updated file extension.
517 All file extensions are replaced.
519 Parameters
520 ----------
521 ext : `str` or `None`
522 New extension. If an empty string is given any extension will
523 be removed. If `None` is given there will be no change.
525 Returns
526 -------
527 updated : `ResourcePath`
528 URI with the specified extension. Can return itself if
529 no extension was specified.
530 """
531 if ext is None:
532 return self
534 # Get the extension
535 current = self.getExtension()
537 # Nothing to do if the extension already matches
538 if current == ext:
539 return self
541 # Remove the current extension from the path
542 # .fits.gz counts as one extension do not use os.path.splitext
543 path = self.path
544 if current:
545 path = path[: -len(current)]
547 # Ensure that we have a leading "." on file extension (and we do not
548 # try to modify the empty string)
549 if ext and not ext.startswith("."):
550 ext = "." + ext
552 return self.replace(path=path + ext)
554 def getExtension(self) -> str:
555 """Return the file extension(s) associated with this URI path.
557 Returns
558 -------
559 ext : `str`
560 The file extension (including the ``.``). Can be empty string
561 if there is no file extension. Usually returns only the last
562 file extension unless there is a special extension modifier
563 indicating file compression, in which case the combined
564 extension (e.g. ``.fits.gz``) will be returned.
565 """
566 special = {".gz", ".bz2", ".xz", ".fz"}
568 # Get the file part of the path so as not to be confused by
569 # "." in directory names.
570 basename = self.basename()
571 extensions = self._pathLib(basename).suffixes
573 if not extensions:
574 return ""
576 ext = extensions.pop()
578 # Multiple extensions, decide whether to include the final two
579 if extensions and ext in special:
580 ext = f"{extensions[-1]}{ext}"
582 return ext
584 def join(
585 self, path: Union[str, ResourcePath], isTemporary: Optional[bool] = None, forceDirectory: bool = False
586 ) -> ResourcePath:
587 """Return new `ResourcePath` with additional path components.
589 Parameters
590 ----------
591 path : `str`, `ResourcePath`
592 Additional file components to append to the current URI. Assumed
593 to include a file at the end. Will be quoted depending on the
594 associated URI scheme. If the path looks like a URI with a scheme
595 referring to an absolute location, it will be returned
596 directly (matching the behavior of `os.path.join()`). It can
597 also be a `ResourcePath`.
598 isTemporary : `bool`, optional
599 Indicate that the resulting URI represents a temporary resource.
600 Default is ``self.isTemporary``.
601 forceDirectory : `bool`, optional
602 If `True` forces the URI to end with a separator, otherwise given
603 URI is interpreted as is.
605 Returns
606 -------
607 new : `ResourcePath`
608 New URI with any file at the end replaced with the new path
609 components.
611 Notes
612 -----
613 Schemeless URIs assume local path separator but all other URIs assume
614 POSIX separator if the supplied path has directory structure. It
615 may be this never becomes a problem but datastore templates assume
616 POSIX separator is being used.
618 If an absolute `ResourcePath` is given for ``path`` is is assumed that
619 this should be returned directly. Giving a ``path`` of an absolute
620 scheme-less URI is not allowed for safety reasons as it may indicate
621 a mistake in the calling code.
623 Raises
624 ------
625 ValueError
626 Raised if the ``path`` is an absolute scheme-less URI. In that
627 situation it is unclear whether the intent is to return a
628 ``file`` URI or it was a mistake and a relative scheme-less URI
629 was meant.
630 RuntimeError
631 Raised if this attempts to join a temporary URI to a non-temporary
632 URI.
633 """
634 if isTemporary is None:
635 isTemporary = self.isTemporary
636 elif not isTemporary and self.isTemporary:
637 raise RuntimeError("Cannot join temporary URI to non-temporary URI.")
638 # If we have a full URI in path we will use it directly
639 # but without forcing to absolute so that we can trap the
640 # expected option of relative path.
641 path_uri = ResourcePath(
642 path, forceAbsolute=False, forceDirectory=forceDirectory, isTemporary=isTemporary
643 )
644 if path_uri.scheme:
645 # Check for scheme so can distinguish explicit URIs from
646 # absolute scheme-less URIs.
647 return path_uri
649 if path_uri.isabs():
650 # Absolute scheme-less path.
651 raise ValueError(f"Can not join absolute scheme-less {path_uri!r} to another URI.")
653 # If this was originally a ResourcePath extract the unquoted path from
654 # it. Otherwise we use the string we were given to allow "#" to appear
655 # in the filename if given as a plain string.
656 if not isinstance(path, str):
657 path = path_uri.unquoted_path
659 new = self.dirname() # By definition a directory URI
661 # new should be asked about quoting, not self, since dirname can
662 # change the URI scheme for schemeless -> file
663 if new.quotePaths:
664 path = urllib.parse.quote(path)
666 newpath = self._pathModule.normpath(self._pathModule.join(new.path, path))
668 # normpath can strip trailing / so we force directory if the supplied
669 # path ended with a /
670 return new.replace(
671 path=newpath,
672 forceDirectory=(forceDirectory or path.endswith(self._pathModule.sep)),
673 isTemporary=isTemporary,
674 )
676 def relative_to(self, other: ResourcePath) -> Optional[str]:
677 """Return the relative path from this URI to the other URI.
679 Parameters
680 ----------
681 other : `ResourcePath`
682 URI to use to calculate the relative path. Must be a parent
683 of this URI.
685 Returns
686 -------
687 subpath : `str`
688 The sub path of this URI relative to the supplied other URI.
689 Returns `None` if there is no parent child relationship.
690 Scheme and netloc must match.
691 """
692 # Scheme-less absolute other is treated as if it's a file scheme.
693 # Scheme-less relative other can only return non-None if self
694 # is also scheme-less relative and that is handled specifically
695 # in a subclass.
696 if not other.scheme and other.isabs():
697 other = other.abspath()
699 # Scheme-less self is handled elsewhere.
700 if self.scheme != other.scheme or self.netloc != other.netloc:
701 return None
703 enclosed_path = self._pathLib(self.relativeToPathRoot)
704 parent_path = other.relativeToPathRoot
705 subpath: Optional[str]
706 try:
707 subpath = str(enclosed_path.relative_to(parent_path))
708 except ValueError:
709 subpath = None
710 else:
711 subpath = urllib.parse.unquote(subpath)
712 return subpath
714 def exists(self) -> bool:
715 """Indicate that the resource is available.
717 Returns
718 -------
719 exists : `bool`
720 `True` if the resource exists.
721 """
722 raise NotImplementedError()
724 @classmethod
725 def mexists(cls, uris: Iterable[ResourcePath]) -> Dict[ResourcePath, bool]:
726 """Check for existence of multiple URIs at once.
728 Parameters
729 ----------
730 uris : iterable of `ResourcePath`
731 The URIs to test.
733 Returns
734 -------
735 existence : `dict` of [`ResourcePath`, `bool`]
736 Mapping of original URI to boolean indicating existence.
737 """
738 exists_executor = concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS)
739 future_exists = {exists_executor.submit(uri.exists): uri for uri in uris}
741 results: Dict[ResourcePath, bool] = {}
742 for future in concurrent.futures.as_completed(future_exists):
743 uri = future_exists[future]
744 try:
745 exists = future.result()
746 except Exception:
747 exists = False
748 results[uri] = exists
749 return results
751 def remove(self) -> None:
752 """Remove the resource."""
753 raise NotImplementedError()
755 def isabs(self) -> bool:
756 """Indicate that the resource is fully specified.
758 For non-schemeless URIs this is always true.
760 Returns
761 -------
762 isabs : `bool`
763 `True` in all cases except schemeless URI.
764 """
765 return True
767 def abspath(self) -> ResourcePath:
768 """Return URI using an absolute path.
770 Returns
771 -------
772 abs : `ResourcePath`
773 Absolute URI. For non-schemeless URIs this always returns itself.
774 Schemeless URIs are upgraded to file URIs.
775 """
776 return self
778 def _as_local(self) -> Tuple[str, bool]:
779 """Return the location of the (possibly remote) resource as local file.
781 This is a helper function for `as_local` context manager.
783 Returns
784 -------
785 path : `str`
786 If this is a remote resource, it will be a copy of the resource
787 on the local file system, probably in a temporary directory.
788 For a local resource this should be the actual path to the
789 resource.
790 is_temporary : `bool`
791 Indicates if the local path is a temporary file or not.
792 """
793 raise NotImplementedError()
795 @contextlib.contextmanager
796 def as_local(self) -> Iterator[ResourcePath]:
797 """Return the location of the (possibly remote) resource as local file.
799 Yields
800 ------
801 local : `ResourcePath`
802 If this is a remote resource, it will be a copy of the resource
803 on the local file system, probably in a temporary directory.
804 For a local resource this should be the actual path to the
805 resource.
807 Notes
808 -----
809 The context manager will automatically delete any local temporary
810 file.
812 Examples
813 --------
814 Should be used as a context manager:
816 .. code-block:: py
818 with uri.as_local() as local:
819 ospath = local.ospath
820 """
821 if self.dirLike:
822 raise IsADirectoryError(f"Directory-like URI {self} cannot be fetched as local.")
823 local_src, is_temporary = self._as_local()
824 local_uri = ResourcePath(local_src, isTemporary=is_temporary)
826 try:
827 yield local_uri
828 finally:
829 # The caller might have relocated the temporary file.
830 # Do not ever delete if the temporary matches self
831 # (since it may have been that a temporary file was made local
832 # but already was local).
833 if self != local_uri and is_temporary and local_uri.exists():
834 local_uri.remove()
836 @classmethod
837 @contextlib.contextmanager
838 def temporary_uri(
839 cls, prefix: Optional[ResourcePath] = None, suffix: Optional[str] = None
840 ) -> Iterator[ResourcePath]:
841 """Create a temporary file-like URI.
843 Parameters
844 ----------
845 prefix : `ResourcePath`, optional
846 Prefix to use. Without this the path will be formed as a local
847 file URI in a temporary directory. Ensuring that the prefix
848 location exists is the responsibility of the caller.
849 suffix : `str`, optional
850 A file suffix to be used. The ``.`` should be included in this
851 suffix.
853 Yields
854 ------
855 uri : `ResourcePath`
856 The temporary URI. Will be removed when the context is completed.
857 """
858 use_tempdir = False
859 if prefix is None:
860 prefix = ResourcePath(tempfile.mkdtemp(), forceDirectory=True, isTemporary=True)
861 # Record that we need to delete this directory. Can not rely
862 # on isTemporary flag since an external prefix may have that
863 # set as well.
864 use_tempdir = True
866 # Need to create a randomized file name. For consistency do not
867 # use mkstemp for local and something else for remote. Additionally
868 # this method does not create the file to prevent name clashes.
869 characters = "abcdefghijklmnopqrstuvwxyz0123456789_"
870 rng = Random()
871 tempname = "".join(rng.choice(characters) for _ in range(16))
872 if suffix:
873 tempname += suffix
874 temporary_uri = prefix.join(tempname, isTemporary=True)
875 if temporary_uri.dirLike:
876 # If we had a safe way to clean up a remote temporary directory, we
877 # could support this.
878 raise NotImplementedError("temporary_uri cannot be used to create a temporary directory.")
879 try:
880 yield temporary_uri
881 finally:
882 if use_tempdir:
883 shutil.rmtree(prefix.ospath, ignore_errors=True)
884 else:
885 try:
886 # It's okay if this does not work because the user removed
887 # the file.
888 temporary_uri.remove()
889 except FileNotFoundError:
890 pass
892 def read(self, size: int = -1) -> bytes:
893 """Open the resource and return the contents in bytes.
895 Parameters
896 ----------
897 size : `int`, optional
898 The number of bytes to read. Negative or omitted indicates
899 that all data should be read.
900 """
901 raise NotImplementedError()
903 def write(self, data: bytes, overwrite: bool = True) -> None:
904 """Write the supplied bytes to the new resource.
906 Parameters
907 ----------
908 data : `bytes`
909 The bytes to write to the resource. The entire contents of the
910 resource will be replaced.
911 overwrite : `bool`, optional
912 If `True` the resource will be overwritten if it exists. Otherwise
913 the write will fail.
914 """
915 raise NotImplementedError()
917 def mkdir(self) -> None:
918 """For a dir-like URI, create the directory resource if needed."""
919 raise NotImplementedError()
921 def isdir(self) -> bool:
922 """Return True if this URI looks like a directory, else False."""
923 return self.dirLike
925 def size(self) -> int:
926 """For non-dir-like URI, return the size of the resource.
928 Returns
929 -------
930 sz : `int`
931 The size in bytes of the resource associated with this URI.
932 Returns 0 if dir-like.
933 """
934 raise NotImplementedError()
936 def __str__(self) -> str:
937 """Convert the URI to its native string form."""
938 return self.geturl()
940 def __repr__(self) -> str:
941 """Return string representation suitable for evaluation."""
942 return f'ResourcePath("{self.geturl()}")'
944 def __eq__(self, other: Any) -> bool:
945 """Compare supplied object with this `ResourcePath`."""
946 if not isinstance(other, ResourcePath):
947 return NotImplemented
948 return self.geturl() == other.geturl()
950 def __hash__(self) -> int:
951 """Return hash of this object."""
952 return hash(str(self))
954 def __lt__(self, other: ResourcePath) -> bool:
955 return self.geturl() < other.geturl()
957 def __le__(self, other: ResourcePath) -> bool:
958 return self.geturl() <= other.geturl()
960 def __gt__(self, other: ResourcePath) -> bool:
961 return self.geturl() > other.geturl()
963 def __ge__(self, other: ResourcePath) -> bool:
964 return self.geturl() >= other.geturl()
966 def __copy__(self) -> ResourcePath:
967 """Copy constructor.
969 Object is immutable so copy can return itself.
970 """
971 # Implement here because the __new__ method confuses things
972 return self
974 def __deepcopy__(self, memo: Any) -> ResourcePath:
975 """Deepcopy the object.
977 Object is immutable so copy can return itself.
978 """
979 # Implement here because the __new__ method confuses things
980 return self
982 def __getnewargs__(self) -> Tuple:
983 """Support pickling."""
984 return (str(self),)
986 @classmethod
987 def _fixDirectorySep(
988 cls, parsed: urllib.parse.ParseResult, forceDirectory: bool = False
989 ) -> Tuple[urllib.parse.ParseResult, bool]:
990 """Ensure that a path separator is present on directory paths.
992 Parameters
993 ----------
994 parsed : `~urllib.parse.ParseResult`
995 The result from parsing a URI using `urllib.parse`.
996 forceDirectory : `bool`, optional
997 If `True` forces the URI to end with a separator, otherwise given
998 URI is interpreted as is. Specifying that the URI is conceptually
999 equivalent to a directory can break some ambiguities when
1000 interpreting the last element of a path.
1002 Returns
1003 -------
1004 modified : `~urllib.parse.ParseResult`
1005 Update result if a URI is being handled.
1006 dirLike : `bool`
1007 `True` if given parsed URI has a trailing separator or
1008 forceDirectory is True. Otherwise `False`.
1009 """
1010 # assume we are not dealing with a directory like URI
1011 dirLike = False
1013 # Directory separator
1014 sep = cls._pathModule.sep
1016 # URI is dir-like if explicitly stated or if it ends on a separator
1017 endsOnSep = parsed.path.endswith(sep)
1018 if forceDirectory or endsOnSep:
1019 dirLike = True
1020 # only add the separator if it's not already there
1021 if not endsOnSep:
1022 parsed = parsed._replace(path=parsed.path + sep)
1024 return parsed, dirLike
1026 @classmethod
1027 def _fixupPathUri(
1028 cls,
1029 parsed: urllib.parse.ParseResult,
1030 root: Optional[Union[str, ResourcePath]] = None,
1031 forceAbsolute: bool = False,
1032 forceDirectory: bool = False,
1033 ) -> Tuple[urllib.parse.ParseResult, bool]:
1034 """Correct any issues with the supplied URI.
1036 Parameters
1037 ----------
1038 parsed : `~urllib.parse.ParseResult`
1039 The result from parsing a URI using `urllib.parse`.
1040 root : `str` or `ResourcePath`, ignored
1041 Not used by the this implementation since all URIs are
1042 absolute except for those representing the local file system.
1043 forceAbsolute : `bool`, ignored.
1044 Not used by this implementation. URIs are generally always
1045 absolute.
1046 forceDirectory : `bool`, optional
1047 If `True` forces the URI to end with a separator, otherwise given
1048 URI is interpreted as is. Specifying that the URI is conceptually
1049 equivalent to a directory can break some ambiguities when
1050 interpreting the last element of a path.
1052 Returns
1053 -------
1054 modified : `~urllib.parse.ParseResult`
1055 Update result if a URI is being handled.
1056 dirLike : `bool`
1057 `True` if given parsed URI has a trailing separator or
1058 forceDirectory is True. Otherwise `False`.
1060 Notes
1061 -----
1062 Relative paths are explicitly not supported by RFC8089 but `urllib`
1063 does accept URIs of the form ``file:relative/path.ext``. They need
1064 to be turned into absolute paths before they can be used. This is
1065 always done regardless of the ``forceAbsolute`` parameter.
1067 AWS S3 differentiates between keys with trailing POSIX separators (i.e
1068 `/dir` and `/dir/`) whereas POSIX does not neccessarily.
1070 Scheme-less paths are normalized.
1071 """
1072 return cls._fixDirectorySep(parsed, forceDirectory)
1074 def transfer_from(
1075 self,
1076 src: ResourcePath,
1077 transfer: str,
1078 overwrite: bool = False,
1079 transaction: Optional[TransactionProtocol] = None,
1080 ) -> None:
1081 """Transfer to this URI from another.
1083 Parameters
1084 ----------
1085 src : `ResourcePath`
1086 Source URI.
1087 transfer : `str`
1088 Mode to use for transferring the resource. Generically there are
1089 many standard options: copy, link, symlink, hardlink, relsymlink.
1090 Not all URIs support all modes.
1091 overwrite : `bool`, optional
1092 Allow an existing file to be overwritten. Defaults to `False`.
1093 transaction : `~lsst.resources.utils.TransactionProtocol`, optional
1094 A transaction object that can (depending on implementation)
1095 rollback transfers on error. Not guaranteed to be implemented.
1097 Notes
1098 -----
1099 Conceptually this is hard to scale as the number of URI schemes
1100 grow. The destination URI is more important than the source URI
1101 since that is where all the transfer modes are relevant (with the
1102 complication that "move" deletes the source).
1104 Local file to local file is the fundamental use case but every
1105 other scheme has to support "copy" to local file (with implicit
1106 support for "move") and copy from local file.
1107 All the "link" options tend to be specific to local file systems.
1109 "move" is a "copy" where the remote resource is deleted at the end.
1110 Whether this works depends on the source URI rather than the
1111 destination URI. Reverting a move on transaction rollback is
1112 expected to be problematic if a remote resource was involved.
1113 """
1114 raise NotImplementedError(f"No transfer modes supported by URI scheme {self.scheme}")
1116 def walk(
1117 self, file_filter: Optional[Union[str, re.Pattern]] = None
1118 ) -> Iterator[Union[List, Tuple[ResourcePath, List[str], List[str]]]]:
1119 """Walk the directory tree returning matching files and directories.
1121 Parameters
1122 ----------
1123 file_filter : `str` or `re.Pattern`, optional
1124 Regex to filter out files from the list before it is returned.
1126 Yields
1127 ------
1128 dirpath : `ResourcePath`
1129 Current directory being examined.
1130 dirnames : `list` of `str`
1131 Names of subdirectories within dirpath.
1132 filenames : `list` of `str`
1133 Names of all the files within dirpath.
1134 """
1135 raise NotImplementedError()
1137 @overload
1138 @classmethod
1139 def findFileResources(
1140 cls,
1141 candidates: Iterable[ResourcePathExpression],
1142 file_filter: Optional[Union[str, re.Pattern]],
1143 grouped: Literal[True],
1144 ) -> Iterator[Iterator[ResourcePath]]:
1145 ...
1147 @overload
1148 @classmethod
1149 def findFileResources(
1150 cls,
1151 candidates: Iterable[ResourcePathExpression],
1152 *,
1153 grouped: Literal[True],
1154 ) -> Iterator[Iterator[ResourcePath]]:
1155 ...
1157 @overload
1158 @classmethod
1159 def findFileResources(
1160 cls,
1161 candidates: Iterable[ResourcePathExpression],
1162 file_filter: Optional[Union[str, re.Pattern]] = None,
1163 grouped: Literal[False] = False,
1164 ) -> Iterator[ResourcePath]:
1165 ...
1167 @classmethod
1168 def findFileResources(
1169 cls,
1170 candidates: Iterable[ResourcePathExpression],
1171 file_filter: Optional[Union[str, re.Pattern]] = None,
1172 grouped: bool = False,
1173 ) -> Iterator[Union[ResourcePath, Iterator[ResourcePath]]]:
1174 """Get all the files from a list of values.
1176 Parameters
1177 ----------
1178 candidates : iterable [`str` or `ResourcePath`]
1179 The files to return and directories in which to look for files to
1180 return.
1181 file_filter : `str` or `re.Pattern`, optional
1182 The regex to use when searching for files within directories.
1183 By default returns all the found files.
1184 grouped : `bool`, optional
1185 If `True` the results will be grouped by directory and each
1186 yielded value will be an iterator over URIs. If `False` each
1187 URI will be returned separately.
1189 Yields
1190 ------
1191 found_file: `ResourcePath`
1192 The passed-in URIs and URIs found in passed-in directories.
1193 If grouping is enabled, each of the yielded values will be an
1194 iterator yielding members of the group. Files given explicitly
1195 will be returned as a single group at the end.
1197 Notes
1198 -----
1199 If a value is a file it is yielded immediately without checking that it
1200 exists. If a value is a directory, all the files in the directory
1201 (recursively) that match the regex will be yielded in turn.
1202 """
1203 fileRegex = None if file_filter is None else re.compile(file_filter)
1205 singles = []
1207 # Find all the files of interest
1208 for location in candidates:
1209 uri = ResourcePath(location)
1210 if uri.isdir():
1211 for found in uri.walk(fileRegex):
1212 if not found:
1213 # This means the uri does not exist and by
1214 # convention we ignore it
1215 continue
1216 root, dirs, files = found
1217 if not files:
1218 continue
1219 if grouped:
1220 yield (root.join(name) for name in files)
1221 else:
1222 for name in files:
1223 yield root.join(name)
1224 else:
1225 if grouped:
1226 singles.append(uri)
1227 else:
1228 yield uri
1230 # Finally, return any explicitly given files in one group
1231 if grouped and singles:
1232 yield iter(singles)
1234 @contextlib.contextmanager
1235 def open(
1236 self,
1237 mode: str = "r",
1238 *,
1239 encoding: Optional[str] = None,
1240 prefer_file_temporary: bool = False,
1241 ) -> Iterator[ResourceHandleProtocol]:
1242 """Return a context manager that wraps an object that behaves like an
1243 open file at the location of the URI.
1245 Parameters
1246 ----------
1247 mode : `str`
1248 String indicating the mode in which to open the file. Values are
1249 the same as those accepted by `builtins.open`, though intrinsically
1250 read-only URI types may only support read modes, and
1251 `io.IOBase.seekable` is not guaranteed to be `True` on the returned
1252 object.
1253 encoding : `str`, optional
1254 Unicode encoding for text IO; ignored for binary IO. Defaults to
1255 ``locale.getpreferredencoding(False)``, just as `builtins.open`
1256 does.
1257 prefer_file_temporary : `bool`, optional
1258 If `True`, for implementations that require transfers from a remote
1259 system to temporary local storage and/or back, use a temporary file
1260 instead of an in-memory buffer; this is generally slower, but it
1261 may be necessary to avoid excessive memory usage by large files.
1262 Ignored by implementations that do not require a temporary.
1264 Returns
1265 -------
1266 cm : `contextlib.ContextManager`
1267 A context manager that wraps a file-like object.
1269 Notes
1270 -----
1271 The default implementation of this method uses a local temporary buffer
1272 (in-memory or file, depending on ``prefer_file_temporary``) with calls
1273 to `read`, `write`, `as_local`, and `transfer_from` as necessary to
1274 read and write from/to remote systems. Remote writes thus occur only
1275 when the context manager is exited. `ResourcePath` implementations
1276 that can return a more efficient native buffer should do so whenever
1277 possible (as is guaranteed for local files). `ResourcePath`
1278 implementations for which `as_local` does not return a temporary are
1279 required to reimplement `open`, though they may delegate to `super`
1280 when `prefer_file_temporary` is `False`.
1281 """
1282 if self.dirLike:
1283 raise IsADirectoryError(f"Directory-like URI {self} cannot be opened.")
1284 if "x" in mode and self.exists():
1285 raise FileExistsError(f"File at {self} already exists.")
1286 if prefer_file_temporary:
1287 if "r" in mode or "a" in mode:
1288 local_cm = self.as_local()
1289 else:
1290 local_cm = self.temporary_uri(suffix=self.getExtension())
1291 with local_cm as local_uri:
1292 assert local_uri.isTemporary, (
1293 "ResourcePath implementations for which as_local is not "
1294 "a temporary must reimplement `open`."
1295 )
1296 with open(local_uri.ospath, mode=mode, encoding=encoding) as file_buffer:
1297 if "a" in mode:
1298 file_buffer.seek(0, io.SEEK_END)
1299 yield file_buffer
1300 if "r" not in mode or "+" in mode:
1301 self.transfer_from(local_uri, transfer="copy", overwrite=("x" not in mode))
1302 else:
1303 with self._openImpl(mode, encoding=encoding) as handle:
1304 yield handle
1306 @contextlib.contextmanager
1307 def _openImpl(
1308 self, mode: str = "r", *, encoding: Optional[str] = None
1309 ) -> Iterator[ResourceHandleProtocol]:
1310 """Implement opening of a resource handle.
1312 This private method may be overridden by specific `ResourcePath`
1313 implementations to provide a customized handle like interface.
1315 Parameters
1316 ----------
1317 mode : `str`
1318 The mode the handle should be opened with
1319 encoding : `str`, optional
1320 The byte encoding of any binary text
1322 Yields
1323 ------
1324 handle : `BaseResourceHandle`
1325 A handle that conforms to the `BaseResourcehandle interface
1327 Notes
1328 -----
1329 The base implementation of a file handle reads in a files entire
1330 contents into a buffer for manipulation, and then writes it back out
1331 upon close. Subclasses of this class may offer more fine grained
1332 control.
1333 """
1334 if "r" in mode or "a" in mode:
1335 in_bytes = self.read()
1336 else:
1337 in_bytes = b""
1338 if "b" in mode:
1339 bytes_buffer = io.BytesIO(in_bytes)
1340 if "a" in mode:
1341 bytes_buffer.seek(0, io.SEEK_END)
1342 yield bytes_buffer
1343 out_bytes = bytes_buffer.getvalue()
1344 else:
1345 if encoding is None:
1346 encoding = locale.getpreferredencoding(False)
1347 str_buffer = io.StringIO(in_bytes.decode(encoding))
1348 if "a" in mode:
1349 str_buffer.seek(0, io.SEEK_END)
1350 yield str_buffer
1351 out_bytes = str_buffer.getvalue().encode(encoding)
1352 if "r" not in mode or "+" in mode:
1353 self.write(out_bytes, overwrite=("x" not in mode))