Coverage for python/lsst/resources/_resourcePath.py: 22%
401 statements
« prev ^ index » next coverage.py v6.5.0, created at 2022-12-02 01:57 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2022-12-02 01:57 +0000
1# This file is part of lsst-resources.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# Use of this source code is governed by a 3-clause BSD-style
10# license that can be found in the LICENSE file.
12from __future__ import annotations
14import concurrent.futures
15import contextlib
16import copy
17import io
18import locale
19import logging
20import os
21import posixpath
22import re
23import shutil
24import tempfile
25import urllib.parse
26from pathlib import Path, PurePath, PurePosixPath
27from random import Random
29__all__ = ("ResourcePath", "ResourcePathExpression")
31from typing import (
32 IO,
33 TYPE_CHECKING,
34 Any,
35 Dict,
36 Iterable,
37 Iterator,
38 List,
39 Literal,
40 Optional,
41 Tuple,
42 Type,
43 Union,
44 overload,
45)
47if TYPE_CHECKING: 47 ↛ 48line 47 didn't jump to line 48, because the condition on line 47 was never true
48 from .utils import TransactionProtocol
51log = logging.getLogger(__name__)
53# Regex for looking for URI escapes
54ESCAPES_RE = re.compile(r"%[A-F0-9]{2}")
56# Precomputed escaped hash
57ESCAPED_HASH = urllib.parse.quote("#")
59# Maximum number of worker threads for parallelized operations.
60# If greater than 10, be aware that this number has to be consistent
61# with connection pool sizing (for example in urllib3).
62MAX_WORKERS = 10
65ResourcePathExpression = Union[str, urllib.parse.ParseResult, "ResourcePath", Path]
66"""Type-annotation alias for objects that can be coerced to ResourcePath.
67"""
70class ResourcePath:
71 """Convenience wrapper around URI parsers.
73 Provides access to URI components and can convert file
74 paths into absolute path URIs. Scheme-less URIs are treated as if
75 they are local file system paths and are converted to absolute URIs.
77 A specialist subclass is created for each supported URI scheme.
79 Parameters
80 ----------
81 uri : `str`, `Path`, `urllib.parse.ParseResult`, or `ResourcePath`.
82 URI in string form. Can be scheme-less if referring to a local
83 filesystem path.
84 root : `str` or `ResourcePath`, optional
85 When fixing up a relative path in a ``file`` scheme or if scheme-less,
86 use this as the root. Must be absolute. If `None` the current
87 working directory will be used. Can be a file URI.
88 forceAbsolute : `bool`, optional
89 If `True`, scheme-less relative URI will be converted to an absolute
90 path using a ``file`` scheme. If `False` scheme-less URI will remain
91 scheme-less and will not be updated to ``file`` or absolute path.
92 forceDirectory: `bool`, optional
93 If `True` forces the URI to end with a separator, otherwise given URI
94 is interpreted as is.
95 isTemporary : `bool`, optional
96 If `True` indicates that this URI points to a temporary resource.
97 The default is `False`, unless ``uri`` is already a `ResourcePath`
98 instance and ``uri.isTemporary is True``.
99 """
101 _pathLib: Type[PurePath] = PurePosixPath
102 """Path library to use for this scheme."""
104 _pathModule = posixpath
105 """Path module to use for this scheme."""
107 transferModes: Tuple[str, ...] = ("copy", "auto", "move")
108 """Transfer modes supported by this implementation.
110 Move is special in that it is generally a copy followed by an unlink.
111 Whether that unlink works depends critically on whether the source URI
112 implements unlink. If it does not the move will be reported as a failure.
113 """
115 transferDefault: str = "copy"
116 """Default mode to use for transferring if ``auto`` is specified."""
118 quotePaths = True
119 """True if path-like elements modifying a URI should be quoted.
121 All non-schemeless URIs have to internally use quoted paths. Therefore
122 if a new file name is given (e.g. to updatedFile or join) a decision must
123 be made whether to quote it to be consistent.
124 """
126 isLocal = False
127 """If `True` this URI refers to a local file."""
129 # This is not an ABC with abstract methods because the __new__ being
130 # a factory confuses mypy such that it assumes that every constructor
131 # returns a ResourcePath and then determines that all the abstract methods
132 # are still abstract. If they are not marked abstract but just raise
133 # mypy is fine with it.
135 # mypy is confused without these
136 _uri: urllib.parse.ParseResult
137 isTemporary: bool
138 dirLike: bool
140 def __new__(
141 cls,
142 uri: ResourcePathExpression,
143 root: Optional[Union[str, ResourcePath]] = None,
144 forceAbsolute: bool = True,
145 forceDirectory: bool = False,
146 isTemporary: Optional[bool] = None,
147 ) -> ResourcePath:
148 """Create and return new specialist ResourcePath subclass."""
149 parsed: urllib.parse.ParseResult
150 dirLike: bool = False
151 subclass: Optional[Type[ResourcePath]] = None
153 if isinstance(uri, os.PathLike):
154 uri = str(uri)
156 # Record if we need to post process the URI components
157 # or if the instance is already fully configured
158 if isinstance(uri, str):
159 # Since local file names can have special characters in them
160 # we need to quote them for the parser but we can unquote
161 # later. Assume that all other URI schemes are quoted.
162 # Since sometimes people write file:/a/b and not file:///a/b
163 # we should not quote in the explicit case of file:
164 if "://" not in uri and not uri.startswith("file:"):
165 if ESCAPES_RE.search(uri):
166 log.warning("Possible double encoding of %s", uri)
167 else:
168 uri = urllib.parse.quote(uri)
169 # Special case hash since we must support fragments
170 # even in schemeless URIs -- although try to only replace
171 # them in file part and not directory part
172 if ESCAPED_HASH in uri:
173 dirpos = uri.rfind("/")
174 # Do replacement after this /
175 uri = uri[: dirpos + 1] + uri[dirpos + 1 :].replace(ESCAPED_HASH, "#")
177 parsed = urllib.parse.urlparse(uri)
178 elif isinstance(uri, urllib.parse.ParseResult):
179 parsed = copy.copy(uri)
180 # If we are being instantiated with a subclass, rather than
181 # ResourcePath, ensure that that subclass is used directly.
182 # This could lead to inconsistencies if this constructor
183 # is used externally outside of the ResourcePath.replace() method.
184 # S3ResourcePath(urllib.parse.urlparse("file://a/b.txt"))
185 # will be a problem.
186 # This is needed to prevent a schemeless absolute URI become
187 # a file URI unexpectedly when calling updatedFile or
188 # updatedExtension
189 if cls is not ResourcePath:
190 parsed, dirLike = cls._fixDirectorySep(parsed, forceDirectory)
191 subclass = cls
193 elif isinstance(uri, ResourcePath):
194 # Since ResourcePath is immutable we can return the argument
195 # unchanged if it already agrees with forceDirectory, isTemporary,
196 # and forceAbsolute.
197 # We invoke __new__ again with str(self) to add a scheme for
198 # forceAbsolute, but for the others that seems more likely to paper
199 # over logic errors than do something useful, so we just raise.
200 if forceDirectory and not uri.dirLike:
201 raise RuntimeError(
202 f"{uri} is already a file-like ResourcePath; cannot force it to directory."
203 )
204 if isTemporary is not None and isTemporary is not uri.isTemporary:
205 raise RuntimeError(
206 f"{uri} is already a {'temporary' if uri.isTemporary else 'permanent'} "
207 f"ResourcePath; cannot make it {'temporary' if isTemporary else 'permanent'}."
208 )
209 if forceAbsolute and not uri.scheme:
210 return ResourcePath(
211 str(uri),
212 root=root,
213 forceAbsolute=True,
214 forceDirectory=uri.dirLike,
215 isTemporary=uri.isTemporary,
216 )
217 return uri
218 else:
219 raise ValueError(
220 f"Supplied URI must be string, Path, ResourcePath, or ParseResult but got '{uri!r}'"
221 )
223 if subclass is None:
224 # Work out the subclass from the URI scheme
225 if not parsed.scheme:
226 from .schemeless import SchemelessResourcePath
228 subclass = SchemelessResourcePath
229 elif parsed.scheme == "file":
230 from .file import FileResourcePath
232 subclass = FileResourcePath
233 elif parsed.scheme == "s3":
234 from .s3 import S3ResourcePath
236 subclass = S3ResourcePath
237 elif parsed.scheme.startswith("http"):
238 from .http import HttpResourcePath
240 subclass = HttpResourcePath
241 elif parsed.scheme == "gs":
242 from .gs import GSResourcePath
244 subclass = GSResourcePath
245 elif parsed.scheme == "resource":
246 # Rules for scheme names disallow pkg_resource
247 from .packageresource import PackageResourcePath
249 subclass = PackageResourcePath
250 elif parsed.scheme == "mem":
251 # in-memory datastore object
252 from .mem import InMemoryResourcePath
254 subclass = InMemoryResourcePath
255 else:
256 raise NotImplementedError(
257 f"No URI support for scheme: '{parsed.scheme}' in {parsed.geturl()}"
258 )
260 parsed, dirLike = subclass._fixupPathUri(
261 parsed, root=root, forceAbsolute=forceAbsolute, forceDirectory=forceDirectory
262 )
264 # It is possible for the class to change from schemeless
265 # to file so handle that
266 if parsed.scheme == "file":
267 from .file import FileResourcePath
269 subclass = FileResourcePath
271 # Now create an instance of the correct subclass and set the
272 # attributes directly
273 self = object.__new__(subclass)
274 self._uri = parsed
275 self.dirLike = dirLike
276 if isTemporary is None:
277 isTemporary = False
278 self.isTemporary = isTemporary
279 return self
281 @property
282 def scheme(self) -> str:
283 """Return the URI scheme.
285 Notes
286 -----
287 (``://`` is not part of the scheme).
288 """
289 return self._uri.scheme
291 @property
292 def netloc(self) -> str:
293 """Return the URI network location."""
294 return self._uri.netloc
296 @property
297 def path(self) -> str:
298 """Return the path component of the URI."""
299 return self._uri.path
301 @property
302 def unquoted_path(self) -> str:
303 """Return path component of the URI with any URI quoting reversed."""
304 return urllib.parse.unquote(self._uri.path)
306 @property
307 def ospath(self) -> str:
308 """Return the path component of the URI localized to current OS."""
309 raise AttributeError(f"Non-file URI ({self}) has no local OS path.")
311 @property
312 def relativeToPathRoot(self) -> str:
313 """Return path relative to network location.
315 Effectively, this is the path property with posix separator stripped
316 from the left hand side of the path.
318 Always unquotes.
319 """
320 p = self._pathLib(self.path)
321 relToRoot = str(p.relative_to(p.root))
322 if self.dirLike and not relToRoot.endswith("/"):
323 relToRoot += "/"
324 return urllib.parse.unquote(relToRoot)
326 @property
327 def is_root(self) -> bool:
328 """Return whether this URI points to the root of the network location.
330 This means that the path components refers to the top level.
331 """
332 relpath = self.relativeToPathRoot
333 if relpath == "./":
334 return True
335 return False
337 @property
338 def fragment(self) -> str:
339 """Return the fragment component of the URI."""
340 return self._uri.fragment
342 @property
343 def params(self) -> str:
344 """Return any parameters included in the URI."""
345 return self._uri.params
347 @property
348 def query(self) -> str:
349 """Return any query strings included in the URI."""
350 return self._uri.query
352 def geturl(self) -> str:
353 """Return the URI in string form.
355 Returns
356 -------
357 url : `str`
358 String form of URI.
359 """
360 return self._uri.geturl()
362 def root_uri(self) -> ResourcePath:
363 """Return the base root URI.
365 Returns
366 -------
367 uri : `ResourcePath`
368 root URI.
369 """
370 return self.replace(path="", forceDirectory=True)
372 def split(self) -> Tuple[ResourcePath, str]:
373 """Split URI into head and tail.
375 Returns
376 -------
377 head: `ResourcePath`
378 Everything leading up to tail, expanded and normalized as per
379 ResourcePath rules.
380 tail : `str`
381 Last `self.path` component. Tail will be empty if path ends on a
382 separator. Tail will never contain separators. It will be
383 unquoted.
385 Notes
386 -----
387 Equivalent to `os.path.split()` where head preserves the URI
388 components.
389 """
390 head, tail = self._pathModule.split(self.path)
391 headuri = self._uri._replace(path=head)
393 # The file part should never include quoted metacharacters
394 tail = urllib.parse.unquote(tail)
396 # Schemeless is special in that it can be a relative path
397 # We need to ensure that it stays that way. All other URIs will
398 # be absolute already.
399 forceAbsolute = self._pathModule.isabs(self.path)
400 return ResourcePath(headuri, forceDirectory=True, forceAbsolute=forceAbsolute), tail
402 def basename(self) -> str:
403 """Return the base name, last element of path, of the URI.
405 Returns
406 -------
407 tail : `str`
408 Last part of the path attribute. Trail will be empty if path ends
409 on a separator.
411 Notes
412 -----
413 If URI ends on a slash returns an empty string. This is the second
414 element returned by `split()`.
416 Equivalent of `os.path.basename()``.
417 """
418 return self.split()[1]
420 def dirname(self) -> ResourcePath:
421 """Return the directory component of the path as a new `ResourcePath`.
423 Returns
424 -------
425 head : `ResourcePath`
426 Everything except the tail of path attribute, expanded and
427 normalized as per ResourcePath rules.
429 Notes
430 -----
431 Equivalent of `os.path.dirname()`.
432 """
433 return self.split()[0]
435 def parent(self) -> ResourcePath:
436 """Return a `ResourcePath` of the parent directory.
438 Returns
439 -------
440 head : `ResourcePath`
441 Everything except the tail of path attribute, expanded and
442 normalized as per `ResourcePath` rules.
444 Notes
445 -----
446 For a file-like URI this will be the same as calling `dirname()`.
447 """
448 # When self is file-like, return self.dirname()
449 if not self.dirLike:
450 return self.dirname()
451 # When self is dir-like, return its parent directory,
452 # regardless of the presence of a trailing separator
453 originalPath = self._pathLib(self.path)
454 parentPath = originalPath.parent
455 return self.replace(path=str(parentPath), forceDirectory=True)
457 def replace(self, forceDirectory: bool = False, isTemporary: bool = False, **kwargs: Any) -> ResourcePath:
458 """Return new `ResourcePath` with specified components replaced.
460 Parameters
461 ----------
462 forceDirectory : `bool`, optional
463 Parameter passed to ResourcePath constructor to force this
464 new URI to be dir-like.
465 isTemporary : `bool`, optional
466 Indicate that the resulting URI is temporary resource.
467 **kwargs
468 Components of a `urllib.parse.ParseResult` that should be
469 modified for the newly-created `ResourcePath`.
471 Returns
472 -------
473 new : `ResourcePath`
474 New `ResourcePath` object with updated values.
476 Notes
477 -----
478 Does not, for now, allow a change in URI scheme.
479 """
480 # Disallow a change in scheme
481 if "scheme" in kwargs:
482 raise ValueError(f"Can not use replace() method to change URI scheme for {self}")
483 return self.__class__(
484 self._uri._replace(**kwargs), forceDirectory=forceDirectory, isTemporary=isTemporary
485 )
487 def updatedFile(self, newfile: str) -> ResourcePath:
488 """Return new URI with an updated final component of the path.
490 Parameters
491 ----------
492 newfile : `str`
493 File name with no path component.
495 Returns
496 -------
497 updated : `ResourcePath`
499 Notes
500 -----
501 Forces the ResourcePath.dirLike attribute to be false. The new file
502 path will be quoted if necessary.
503 """
504 if self.quotePaths:
505 newfile = urllib.parse.quote(newfile)
506 dir, _ = self._pathModule.split(self.path)
507 newpath = self._pathModule.join(dir, newfile)
509 updated = self.replace(path=newpath)
510 updated.dirLike = False
511 return updated
513 def updatedExtension(self, ext: Optional[str]) -> ResourcePath:
514 """Return a new `ResourcePath` with updated file extension.
516 All file extensions are replaced.
518 Parameters
519 ----------
520 ext : `str` or `None`
521 New extension. If an empty string is given any extension will
522 be removed. If `None` is given there will be no change.
524 Returns
525 -------
526 updated : `ResourcePath`
527 URI with the specified extension. Can return itself if
528 no extension was specified.
529 """
530 if ext is None:
531 return self
533 # Get the extension
534 current = self.getExtension()
536 # Nothing to do if the extension already matches
537 if current == ext:
538 return self
540 # Remove the current extension from the path
541 # .fits.gz counts as one extension do not use os.path.splitext
542 path = self.path
543 if current:
544 path = path[: -len(current)]
546 # Ensure that we have a leading "." on file extension (and we do not
547 # try to modify the empty string)
548 if ext and not ext.startswith("."):
549 ext = "." + ext
551 return self.replace(path=path + ext)
553 def getExtension(self) -> str:
554 """Return the file extension(s) associated with this URI path.
556 Returns
557 -------
558 ext : `str`
559 The file extension (including the ``.``). Can be empty string
560 if there is no file extension. Usually returns only the last
561 file extension unless there is a special extension modifier
562 indicating file compression, in which case the combined
563 extension (e.g. ``.fits.gz``) will be returned.
564 """
565 special = {".gz", ".bz2", ".xz", ".fz"}
567 # Get the file part of the path so as not to be confused by
568 # "." in directory names.
569 basename = self.basename()
570 extensions = self._pathLib(basename).suffixes
572 if not extensions:
573 return ""
575 ext = extensions.pop()
577 # Multiple extensions, decide whether to include the final two
578 if extensions and ext in special:
579 ext = f"{extensions[-1]}{ext}"
581 return ext
583 def join(
584 self, path: Union[str, ResourcePath], isTemporary: Optional[bool] = None, forceDirectory: bool = False
585 ) -> ResourcePath:
586 """Return new `ResourcePath` with additional path components.
588 Parameters
589 ----------
590 path : `str`, `ResourcePath`
591 Additional file components to append to the current URI. Assumed
592 to include a file at the end. Will be quoted depending on the
593 associated URI scheme. If the path looks like a URI with a scheme
594 referring to an absolute location, it will be returned
595 directly (matching the behavior of `os.path.join()`). It can
596 also be a `ResourcePath`.
597 isTemporary : `bool`, optional
598 Indicate that the resulting URI represents a temporary resource.
599 Default is ``self.isTemporary``.
600 forceDirectory : `bool`, optional
601 If `True` forces the URI to end with a separator, otherwise given
602 URI is interpreted as is.
604 Returns
605 -------
606 new : `ResourcePath`
607 New URI with any file at the end replaced with the new path
608 components.
610 Notes
611 -----
612 Schemeless URIs assume local path separator but all other URIs assume
613 POSIX separator if the supplied path has directory structure. It
614 may be this never becomes a problem but datastore templates assume
615 POSIX separator is being used.
617 If an absolute `ResourcePath` is given for ``path`` is is assumed that
618 this should be returned directly. Giving a ``path`` of an absolute
619 scheme-less URI is not allowed for safety reasons as it may indicate
620 a mistake in the calling code.
622 Raises
623 ------
624 ValueError
625 Raised if the ``path`` is an absolute scheme-less URI. In that
626 situation it is unclear whether the intent is to return a
627 ``file`` URI or it was a mistake and a relative scheme-less URI
628 was meant.
629 RuntimeError
630 Raised if this attempts to join a temporary URI to a non-temporary
631 URI.
632 """
633 if isTemporary is None:
634 isTemporary = self.isTemporary
635 elif not isTemporary and self.isTemporary:
636 raise RuntimeError("Cannot join temporary URI to non-temporary URI.")
637 # If we have a full URI in path we will use it directly
638 # but without forcing to absolute so that we can trap the
639 # expected option of relative path.
640 path_uri = ResourcePath(
641 path, forceAbsolute=False, forceDirectory=forceDirectory, isTemporary=isTemporary
642 )
643 if path_uri.scheme:
644 # Check for scheme so can distinguish explicit URIs from
645 # absolute scheme-less URIs.
646 return path_uri
648 if path_uri.isabs():
649 # Absolute scheme-less path.
650 raise ValueError(f"Can not join absolute scheme-less {path_uri!r} to another URI.")
652 # If this was originally a ResourcePath extract the unquoted path from
653 # it. Otherwise we use the string we were given to allow "#" to appear
654 # in the filename if given as a plain string.
655 if not isinstance(path, str):
656 path = path_uri.unquoted_path
658 new = self.dirname() # By definition a directory URI
660 # new should be asked about quoting, not self, since dirname can
661 # change the URI scheme for schemeless -> file
662 if new.quotePaths:
663 path = urllib.parse.quote(path)
665 newpath = self._pathModule.normpath(self._pathModule.join(new.path, path))
667 # normpath can strip trailing / so we force directory if the supplied
668 # path ended with a /
669 return new.replace(
670 path=newpath,
671 forceDirectory=(forceDirectory or path.endswith(self._pathModule.sep)),
672 isTemporary=isTemporary,
673 )
675 def relative_to(self, other: ResourcePath) -> Optional[str]:
676 """Return the relative path from this URI to the other URI.
678 Parameters
679 ----------
680 other : `ResourcePath`
681 URI to use to calculate the relative path. Must be a parent
682 of this URI.
684 Returns
685 -------
686 subpath : `str`
687 The sub path of this URI relative to the supplied other URI.
688 Returns `None` if there is no parent child relationship.
689 Scheme and netloc must match.
690 """
691 # Scheme-less absolute other is treated as if it's a file scheme.
692 # Scheme-less relative other can only return non-None if self
693 # is also scheme-less relative and that is handled specifically
694 # in a subclass.
695 if not other.scheme and other.isabs():
696 other = other.abspath()
698 # Scheme-less self is handled elsewhere.
699 if self.scheme != other.scheme or self.netloc != other.netloc:
700 return None
702 enclosed_path = self._pathLib(self.relativeToPathRoot)
703 parent_path = other.relativeToPathRoot
704 subpath: Optional[str]
705 try:
706 subpath = str(enclosed_path.relative_to(parent_path))
707 except ValueError:
708 subpath = None
709 else:
710 subpath = urllib.parse.unquote(subpath)
711 return subpath
713 def exists(self) -> bool:
714 """Indicate that the resource is available.
716 Returns
717 -------
718 exists : `bool`
719 `True` if the resource exists.
720 """
721 raise NotImplementedError()
723 @classmethod
724 def mexists(cls, uris: Iterable[ResourcePath]) -> Dict[ResourcePath, bool]:
725 """Check for existence of multiple URIs at once.
727 Parameters
728 ----------
729 uris : iterable of `ResourcePath`
730 The URIs to test.
732 Returns
733 -------
734 existence : `dict` of [`ResourcePath`, `bool`]
735 Mapping of original URI to boolean indicating existence.
736 """
737 exists_executor = concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS)
738 future_exists = {exists_executor.submit(uri.exists): uri for uri in uris}
740 results: Dict[ResourcePath, bool] = {}
741 for future in concurrent.futures.as_completed(future_exists):
742 uri = future_exists[future]
743 try:
744 exists = future.result()
745 except Exception:
746 exists = False
747 results[uri] = exists
748 return results
750 def remove(self) -> None:
751 """Remove the resource."""
752 raise NotImplementedError()
754 def isabs(self) -> bool:
755 """Indicate that the resource is fully specified.
757 For non-schemeless URIs this is always true.
759 Returns
760 -------
761 isabs : `bool`
762 `True` in all cases except schemeless URI.
763 """
764 return True
766 def abspath(self) -> ResourcePath:
767 """Return URI using an absolute path.
769 Returns
770 -------
771 abs : `ResourcePath`
772 Absolute URI. For non-schemeless URIs this always returns itself.
773 Schemeless URIs are upgraded to file URIs.
774 """
775 return self
777 def _as_local(self) -> Tuple[str, bool]:
778 """Return the location of the (possibly remote) resource as local file.
780 This is a helper function for `as_local` context manager.
782 Returns
783 -------
784 path : `str`
785 If this is a remote resource, it will be a copy of the resource
786 on the local file system, probably in a temporary directory.
787 For a local resource this should be the actual path to the
788 resource.
789 is_temporary : `bool`
790 Indicates if the local path is a temporary file or not.
791 """
792 raise NotImplementedError()
794 @contextlib.contextmanager
795 def as_local(self) -> Iterator[ResourcePath]:
796 """Return the location of the (possibly remote) resource as local file.
798 Yields
799 ------
800 local : `ResourcePath`
801 If this is a remote resource, it will be a copy of the resource
802 on the local file system, probably in a temporary directory.
803 For a local resource this should be the actual path to the
804 resource.
806 Notes
807 -----
808 The context manager will automatically delete any local temporary
809 file.
811 Examples
812 --------
813 Should be used as a context manager:
815 .. code-block:: py
817 with uri.as_local() as local:
818 ospath = local.ospath
819 """
820 if self.dirLike:
821 raise IsADirectoryError(f"Directory-like URI {self} cannot be fetched as local.")
822 local_src, is_temporary = self._as_local()
823 local_uri = ResourcePath(local_src, isTemporary=is_temporary)
825 try:
826 yield local_uri
827 finally:
828 # The caller might have relocated the temporary file.
829 # Do not ever delete if the temporary matches self
830 # (since it may have been that a temporary file was made local
831 # but already was local).
832 if self != local_uri and is_temporary and local_uri.exists():
833 local_uri.remove()
835 @classmethod
836 @contextlib.contextmanager
837 def temporary_uri(
838 cls, prefix: Optional[ResourcePath] = None, suffix: Optional[str] = None
839 ) -> Iterator[ResourcePath]:
840 """Create a temporary file-like URI.
842 Parameters
843 ----------
844 prefix : `ResourcePath`, optional
845 Prefix to use. Without this the path will be formed as a local
846 file URI in a temporary directory. Ensuring that the prefix
847 location exists is the responsibility of the caller.
848 suffix : `str`, optional
849 A file suffix to be used. The ``.`` should be included in this
850 suffix.
852 Yields
853 ------
854 uri : `ResourcePath`
855 The temporary URI. Will be removed when the context is completed.
856 """
857 use_tempdir = False
858 if prefix is None:
859 prefix = ResourcePath(tempfile.mkdtemp(), forceDirectory=True, isTemporary=True)
860 # Record that we need to delete this directory. Can not rely
861 # on isTemporary flag since an external prefix may have that
862 # set as well.
863 use_tempdir = True
865 # Need to create a randomized file name. For consistency do not
866 # use mkstemp for local and something else for remote. Additionally
867 # this method does not create the file to prevent name clashes.
868 characters = "abcdefghijklmnopqrstuvwxyz0123456789_"
869 rng = Random()
870 tempname = "".join(rng.choice(characters) for _ in range(16))
871 if suffix:
872 tempname += suffix
873 temporary_uri = prefix.join(tempname, isTemporary=True)
874 if temporary_uri.dirLike:
875 # If we had a safe way to clean up a remote temporary directory, we
876 # could support this.
877 raise NotImplementedError("temporary_uri cannot be used to create a temporary directory.")
878 try:
879 yield temporary_uri
880 finally:
881 if use_tempdir:
882 shutil.rmtree(prefix.ospath, ignore_errors=True)
883 else:
884 try:
885 # It's okay if this does not work because the user removed
886 # the file.
887 temporary_uri.remove()
888 except FileNotFoundError:
889 pass
891 def read(self, size: int = -1) -> bytes:
892 """Open the resource and return the contents in bytes.
894 Parameters
895 ----------
896 size : `int`, optional
897 The number of bytes to read. Negative or omitted indicates
898 that all data should be read.
899 """
900 raise NotImplementedError()
902 def write(self, data: bytes, overwrite: bool = True) -> None:
903 """Write the supplied bytes to the new resource.
905 Parameters
906 ----------
907 data : `bytes`
908 The bytes to write to the resource. The entire contents of the
909 resource will be replaced.
910 overwrite : `bool`, optional
911 If `True` the resource will be overwritten if it exists. Otherwise
912 the write will fail.
913 """
914 raise NotImplementedError()
916 def mkdir(self) -> None:
917 """For a dir-like URI, create the directory resource if needed."""
918 raise NotImplementedError()
920 def isdir(self) -> bool:
921 """Return True if this URI looks like a directory, else False."""
922 return self.dirLike
924 def size(self) -> int:
925 """For non-dir-like URI, return the size of the resource.
927 Returns
928 -------
929 sz : `int`
930 The size in bytes of the resource associated with this URI.
931 Returns 0 if dir-like.
932 """
933 raise NotImplementedError()
935 def __str__(self) -> str:
936 """Convert the URI to its native string form."""
937 return self.geturl()
939 def __repr__(self) -> str:
940 """Return string representation suitable for evaluation."""
941 return f'ResourcePath("{self.geturl()}")'
943 def __eq__(self, other: Any) -> bool:
944 """Compare supplied object with this `ResourcePath`."""
945 if not isinstance(other, ResourcePath):
946 return NotImplemented
947 return self.geturl() == other.geturl()
949 def __hash__(self) -> int:
950 """Return hash of this object."""
951 return hash(str(self))
953 def __lt__(self, other: ResourcePath) -> bool:
954 return self.geturl() < other.geturl()
956 def __le__(self, other: ResourcePath) -> bool:
957 return self.geturl() <= other.geturl()
959 def __gt__(self, other: ResourcePath) -> bool:
960 return self.geturl() > other.geturl()
962 def __ge__(self, other: ResourcePath) -> bool:
963 return self.geturl() >= other.geturl()
965 def __copy__(self) -> ResourcePath:
966 """Copy constructor.
968 Object is immutable so copy can return itself.
969 """
970 # Implement here because the __new__ method confuses things
971 return self
973 def __deepcopy__(self, memo: Any) -> ResourcePath:
974 """Deepcopy the object.
976 Object is immutable so copy can return itself.
977 """
978 # Implement here because the __new__ method confuses things
979 return self
981 def __getnewargs__(self) -> Tuple:
982 """Support pickling."""
983 return (str(self),)
985 @classmethod
986 def _fixDirectorySep(
987 cls, parsed: urllib.parse.ParseResult, forceDirectory: bool = False
988 ) -> Tuple[urllib.parse.ParseResult, bool]:
989 """Ensure that a path separator is present on directory paths.
991 Parameters
992 ----------
993 parsed : `~urllib.parse.ParseResult`
994 The result from parsing a URI using `urllib.parse`.
995 forceDirectory : `bool`, optional
996 If `True` forces the URI to end with a separator, otherwise given
997 URI is interpreted as is. Specifying that the URI is conceptually
998 equivalent to a directory can break some ambiguities when
999 interpreting the last element of a path.
1001 Returns
1002 -------
1003 modified : `~urllib.parse.ParseResult`
1004 Update result if a URI is being handled.
1005 dirLike : `bool`
1006 `True` if given parsed URI has a trailing separator or
1007 forceDirectory is True. Otherwise `False`.
1008 """
1009 # assume we are not dealing with a directory like URI
1010 dirLike = False
1012 # Directory separator
1013 sep = cls._pathModule.sep
1015 # URI is dir-like if explicitly stated or if it ends on a separator
1016 endsOnSep = parsed.path.endswith(sep)
1017 if forceDirectory or endsOnSep:
1018 dirLike = True
1019 # only add the separator if it's not already there
1020 if not endsOnSep:
1021 parsed = parsed._replace(path=parsed.path + sep)
1023 return parsed, dirLike
1025 @classmethod
1026 def _fixupPathUri(
1027 cls,
1028 parsed: urllib.parse.ParseResult,
1029 root: Optional[Union[str, ResourcePath]] = None,
1030 forceAbsolute: bool = False,
1031 forceDirectory: bool = False,
1032 ) -> Tuple[urllib.parse.ParseResult, bool]:
1033 """Correct any issues with the supplied URI.
1035 Parameters
1036 ----------
1037 parsed : `~urllib.parse.ParseResult`
1038 The result from parsing a URI using `urllib.parse`.
1039 root : `str` or `ResourcePath`, ignored
1040 Not used by the this implementation since all URIs are
1041 absolute except for those representing the local file system.
1042 forceAbsolute : `bool`, ignored.
1043 Not used by this implementation. URIs are generally always
1044 absolute.
1045 forceDirectory : `bool`, optional
1046 If `True` forces the URI to end with a separator, otherwise given
1047 URI is interpreted as is. Specifying that the URI is conceptually
1048 equivalent to a directory can break some ambiguities when
1049 interpreting the last element of a path.
1051 Returns
1052 -------
1053 modified : `~urllib.parse.ParseResult`
1054 Update result if a URI is being handled.
1055 dirLike : `bool`
1056 `True` if given parsed URI has a trailing separator or
1057 forceDirectory is True. Otherwise `False`.
1059 Notes
1060 -----
1061 Relative paths are explicitly not supported by RFC8089 but `urllib`
1062 does accept URIs of the form ``file:relative/path.ext``. They need
1063 to be turned into absolute paths before they can be used. This is
1064 always done regardless of the ``forceAbsolute`` parameter.
1066 AWS S3 differentiates between keys with trailing POSIX separators (i.e
1067 `/dir` and `/dir/`) whereas POSIX does not neccessarily.
1069 Scheme-less paths are normalized.
1070 """
1071 return cls._fixDirectorySep(parsed, forceDirectory)
1073 def transfer_from(
1074 self,
1075 src: ResourcePath,
1076 transfer: str,
1077 overwrite: bool = False,
1078 transaction: Optional[TransactionProtocol] = None,
1079 ) -> None:
1080 """Transfer to this URI from another.
1082 Parameters
1083 ----------
1084 src : `ResourcePath`
1085 Source URI.
1086 transfer : `str`
1087 Mode to use for transferring the resource. Generically there are
1088 many standard options: copy, link, symlink, hardlink, relsymlink.
1089 Not all URIs support all modes.
1090 overwrite : `bool`, optional
1091 Allow an existing file to be overwritten. Defaults to `False`.
1092 transaction : `~lsst.resources.utils.TransactionProtocol`, optional
1093 A transaction object that can (depending on implementation)
1094 rollback transfers on error. Not guaranteed to be implemented.
1096 Notes
1097 -----
1098 Conceptually this is hard to scale as the number of URI schemes
1099 grow. The destination URI is more important than the source URI
1100 since that is where all the transfer modes are relevant (with the
1101 complication that "move" deletes the source).
1103 Local file to local file is the fundamental use case but every
1104 other scheme has to support "copy" to local file (with implicit
1105 support for "move") and copy from local file.
1106 All the "link" options tend to be specific to local file systems.
1108 "move" is a "copy" where the remote resource is deleted at the end.
1109 Whether this works depends on the source URI rather than the
1110 destination URI. Reverting a move on transaction rollback is
1111 expected to be problematic if a remote resource was involved.
1112 """
1113 raise NotImplementedError(f"No transfer modes supported by URI scheme {self.scheme}")
1115 def walk(
1116 self, file_filter: Optional[Union[str, re.Pattern]] = None
1117 ) -> Iterator[Union[List, Tuple[ResourcePath, List[str], List[str]]]]:
1118 """Walk the directory tree returning matching files and directories.
1120 Parameters
1121 ----------
1122 file_filter : `str` or `re.Pattern`, optional
1123 Regex to filter out files from the list before it is returned.
1125 Yields
1126 ------
1127 dirpath : `ResourcePath`
1128 Current directory being examined.
1129 dirnames : `list` of `str`
1130 Names of subdirectories within dirpath.
1131 filenames : `list` of `str`
1132 Names of all the files within dirpath.
1133 """
1134 raise NotImplementedError()
1136 @overload
1137 @classmethod
1138 def findFileResources(
1139 cls,
1140 candidates: Iterable[ResourcePathExpression],
1141 file_filter: Optional[Union[str, re.Pattern]],
1142 grouped: Literal[True],
1143 ) -> Iterator[Iterator[ResourcePath]]:
1144 ...
1146 @overload
1147 @classmethod
1148 def findFileResources(
1149 cls,
1150 candidates: Iterable[ResourcePathExpression],
1151 *,
1152 grouped: Literal[True],
1153 ) -> Iterator[Iterator[ResourcePath]]:
1154 ...
1156 @overload
1157 @classmethod
1158 def findFileResources(
1159 cls,
1160 candidates: Iterable[ResourcePathExpression],
1161 file_filter: Optional[Union[str, re.Pattern]] = None,
1162 grouped: Literal[False] = False,
1163 ) -> Iterator[ResourcePath]:
1164 ...
1166 @classmethod
1167 def findFileResources(
1168 cls,
1169 candidates: Iterable[ResourcePathExpression],
1170 file_filter: Optional[Union[str, re.Pattern]] = None,
1171 grouped: bool = False,
1172 ) -> Iterator[Union[ResourcePath, Iterator[ResourcePath]]]:
1173 """Get all the files from a list of values.
1175 Parameters
1176 ----------
1177 candidates : iterable [`str` or `ResourcePath`]
1178 The files to return and directories in which to look for files to
1179 return.
1180 file_filter : `str` or `re.Pattern`, optional
1181 The regex to use when searching for files within directories.
1182 By default returns all the found files.
1183 grouped : `bool`, optional
1184 If `True` the results will be grouped by directory and each
1185 yielded value will be an iterator over URIs. If `False` each
1186 URI will be returned separately.
1188 Yields
1189 ------
1190 found_file: `ResourcePath`
1191 The passed-in URIs and URIs found in passed-in directories.
1192 If grouping is enabled, each of the yielded values will be an
1193 iterator yielding members of the group. Files given explicitly
1194 will be returned as a single group at the end.
1196 Notes
1197 -----
1198 If a value is a file it is yielded immediately without checking that it
1199 exists. If a value is a directory, all the files in the directory
1200 (recursively) that match the regex will be yielded in turn.
1201 """
1202 fileRegex = None if file_filter is None else re.compile(file_filter)
1204 singles = []
1206 # Find all the files of interest
1207 for location in candidates:
1208 uri = ResourcePath(location)
1209 if uri.isdir():
1210 for found in uri.walk(fileRegex):
1211 if not found:
1212 # This means the uri does not exist and by
1213 # convention we ignore it
1214 continue
1215 root, dirs, files = found
1216 if not files:
1217 continue
1218 if grouped:
1219 yield (root.join(name) for name in files)
1220 else:
1221 for name in files:
1222 yield root.join(name)
1223 else:
1224 if grouped:
1225 singles.append(uri)
1226 else:
1227 yield uri
1229 # Finally, return any explicitly given files in one group
1230 if grouped and singles:
1231 yield iter(singles)
1233 @contextlib.contextmanager
1234 def open(
1235 self,
1236 mode: str = "r",
1237 *,
1238 encoding: Optional[str] = None,
1239 prefer_file_temporary: bool = False,
1240 ) -> Iterator[IO]:
1241 """Return a context manager that wraps an object that behaves like an
1242 open file at the location of the URI.
1244 Parameters
1245 ----------
1246 mode : `str`
1247 String indicating the mode in which to open the file. Values are
1248 the same as those accepted by `builtins.open`, though intrinsically
1249 read-only URI types may only support read modes, and
1250 `io.IOBase.seekable` is not guaranteed to be `True` on the returned
1251 object.
1252 encoding : `str`, optional
1253 Unicode encoding for text IO; ignored for binary IO. Defaults to
1254 ``locale.getpreferredencoding(False)``, just as `builtins.open`
1255 does.
1256 prefer_file_temporary : `bool`, optional
1257 If `True`, for implementations that require transfers from a remote
1258 system to temporary local storage and/or back, use a temporary file
1259 instead of an in-memory buffer; this is generally slower, but it
1260 may be necessary to avoid excessive memory usage by large files.
1261 Ignored by implementations that do not require a temporary.
1263 Returns
1264 -------
1265 cm : `contextlib.ContextManager`
1266 A context manager that wraps a file-like object.
1268 Notes
1269 -----
1270 The default implementation of this method uses a local temporary buffer
1271 (in-memory or file, depending on ``prefer_file_temporary``) with calls
1272 to `read`, `write`, `as_local`, and `transfer_from` as necessary to
1273 read and write from/to remote systems. Remote writes thus occur only
1274 when the context manager is exited. `ResourcePath` implementations
1275 that can return a more efficient native buffer should do so whenever
1276 possible (as is guaranteed for local files). `ResourcePath`
1277 implementations for which `as_local` does not return a temporary are
1278 required to reimplement `open`, though they may delegate to `super`
1279 when `prefer_file_temporary` is `False`.
1280 """
1281 if self.dirLike:
1282 raise IsADirectoryError(f"Directory-like URI {self} cannot be opened.")
1283 if "x" in mode and self.exists():
1284 raise FileExistsError(f"File at {self} already exists.")
1285 if prefer_file_temporary:
1286 if "r" in mode or "a" in mode:
1287 local_cm = self.as_local()
1288 else:
1289 local_cm = self.temporary_uri(suffix=self.getExtension())
1290 with local_cm as local_uri:
1291 assert local_uri.isTemporary, (
1292 "ResourcePath implementations for which as_local is not "
1293 "a temporary must reimplement `open`."
1294 )
1295 with open(local_uri.ospath, mode=mode, encoding=encoding) as file_buffer:
1296 if "a" in mode:
1297 file_buffer.seek(0, io.SEEK_END)
1298 yield file_buffer
1299 if "r" not in mode or "+" in mode:
1300 self.transfer_from(local_uri, transfer="copy", overwrite=("x" not in mode))
1301 else:
1302 if "r" in mode or "a" in mode:
1303 in_bytes = self.read()
1304 else:
1305 in_bytes = b""
1306 if "b" in mode:
1307 bytes_buffer = io.BytesIO(in_bytes)
1308 if "a" in mode:
1309 bytes_buffer.seek(0, io.SEEK_END)
1310 yield bytes_buffer
1311 out_bytes = bytes_buffer.getvalue()
1312 else:
1313 if encoding is None:
1314 encoding = locale.getpreferredencoding(False)
1315 str_buffer = io.StringIO(in_bytes.decode(encoding))
1316 if "a" in mode:
1317 str_buffer.seek(0, io.SEEK_END)
1318 yield str_buffer
1319 out_bytes = str_buffer.getvalue().encode(encoding)
1320 if "r" not in mode or "+" in mode:
1321 self.write(out_bytes, overwrite=("x" not in mode))