Coverage for python/lsst/resources/_resourcePath.py: 20%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of lsst-resources.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# Use of this source code is governed by a 3-clause BSD-style
10# license that can be found in the LICENSE file.
12from __future__ import annotations
14import concurrent.futures
15import contextlib
16import copy
17import io
18import locale
19import logging
20import os
21import posixpath
22import re
23import shutil
24import tempfile
25import urllib.parse
26from pathlib import Path, PurePath, PurePosixPath
27from random import Random
29__all__ = ("ResourcePath", "ResourcePathExpression")
31from typing import IO, TYPE_CHECKING, Any, Dict, Iterable, Iterator, List, Optional, Tuple, Type, Union
33if TYPE_CHECKING: 33 ↛ 34line 33 didn't jump to line 34, because the condition on line 33 was never true
34 from .utils import TransactionProtocol
37log = logging.getLogger(__name__)
39# Regex for looking for URI escapes
40ESCAPES_RE = re.compile(r"%[A-F0-9]{2}")
42# Precomputed escaped hash
43ESCAPED_HASH = urllib.parse.quote("#")
45# Maximum number of worker threads for parallelized operations.
46# If greater than 10, be aware that this number has to be consistent
47# with connection pool sizing (for example in urllib3).
48MAX_WORKERS = 10
51ResourcePathExpression = Union[str, urllib.parse.ParseResult, "ResourcePath", Path]
52"""Type-annotation alias for objects that can be coerced to ResourcePath.
53"""
56class ResourcePath:
57 """Convenience wrapper around URI parsers.
59 Provides access to URI components and can convert file
60 paths into absolute path URIs. Scheme-less URIs are treated as if
61 they are local file system paths and are converted to absolute URIs.
63 A specialist subclass is created for each supported URI scheme.
65 Parameters
66 ----------
67 uri : `str`, `Path`, `urllib.parse.ParseResult`, or `ResourcePath`.
68 URI in string form. Can be scheme-less if referring to a local
69 filesystem path.
70 root : `str` or `ResourcePath`, optional
71 When fixing up a relative path in a ``file`` scheme or if scheme-less,
72 use this as the root. Must be absolute. If `None` the current
73 working directory will be used. Can be a file URI.
74 forceAbsolute : `bool`, optional
75 If `True`, scheme-less relative URI will be converted to an absolute
76 path using a ``file`` scheme. If `False` scheme-less URI will remain
77 scheme-less and will not be updated to ``file`` or absolute path.
78 forceDirectory: `bool`, optional
79 If `True` forces the URI to end with a separator, otherwise given URI
80 is interpreted as is.
81 isTemporary : `bool`, optional
82 If `True` indicates that this URI points to a temporary resource.
83 The default is `False`, unless ``uri`` is already a `ResourcePath`
84 instance and ``uri.isTemporary is True``.
85 """
87 _pathLib: Type[PurePath] = PurePosixPath
88 """Path library to use for this scheme."""
90 _pathModule = posixpath
91 """Path module to use for this scheme."""
93 transferModes: Tuple[str, ...] = ("copy", "auto", "move")
94 """Transfer modes supported by this implementation.
96 Move is special in that it is generally a copy followed by an unlink.
97 Whether that unlink works depends critically on whether the source URI
98 implements unlink. If it does not the move will be reported as a failure.
99 """
101 transferDefault: str = "copy"
102 """Default mode to use for transferring if ``auto`` is specified."""
104 quotePaths = True
105 """True if path-like elements modifying a URI should be quoted.
107 All non-schemeless URIs have to internally use quoted paths. Therefore
108 if a new file name is given (e.g. to updatedFile or join) a decision must
109 be made whether to quote it to be consistent.
110 """
112 isLocal = False
113 """If `True` this URI refers to a local file."""
115 # This is not an ABC with abstract methods because the __new__ being
116 # a factory confuses mypy such that it assumes that every constructor
117 # returns a ResourcePath and then determines that all the abstract methods
118 # are still abstract. If they are not marked abstract but just raise
119 # mypy is fine with it.
121 # mypy is confused without these
122 _uri: urllib.parse.ParseResult
123 isTemporary: bool
124 dirLike: bool
126 def __new__(
127 cls,
128 uri: ResourcePathExpression,
129 root: Optional[Union[str, ResourcePath]] = None,
130 forceAbsolute: bool = True,
131 forceDirectory: bool = False,
132 isTemporary: Optional[bool] = None,
133 ) -> ResourcePath:
134 """Create and return new specialist ResourcePath subclass."""
135 parsed: urllib.parse.ParseResult
136 dirLike: bool = False
137 subclass: Optional[Type[ResourcePath]] = None
139 if isinstance(uri, os.PathLike):
140 uri = str(uri)
142 # Record if we need to post process the URI components
143 # or if the instance is already fully configured
144 if isinstance(uri, str):
145 # Since local file names can have special characters in them
146 # we need to quote them for the parser but we can unquote
147 # later. Assume that all other URI schemes are quoted.
148 # Since sometimes people write file:/a/b and not file:///a/b
149 # we should not quote in the explicit case of file:
150 if "://" not in uri and not uri.startswith("file:"):
151 if ESCAPES_RE.search(uri):
152 log.warning("Possible double encoding of %s", uri)
153 else:
154 uri = urllib.parse.quote(uri)
155 # Special case hash since we must support fragments
156 # even in schemeless URIs -- although try to only replace
157 # them in file part and not directory part
158 if ESCAPED_HASH in uri:
159 dirpos = uri.rfind("/")
160 # Do replacement after this /
161 uri = uri[: dirpos + 1] + uri[dirpos + 1 :].replace(ESCAPED_HASH, "#")
163 parsed = urllib.parse.urlparse(uri)
164 elif isinstance(uri, urllib.parse.ParseResult):
165 parsed = copy.copy(uri)
166 # If we are being instantiated with a subclass, rather than
167 # ResourcePath, ensure that that subclass is used directly.
168 # This could lead to inconsistencies if this constructor
169 # is used externally outside of the ResourcePath.replace() method.
170 # S3ResourcePath(urllib.parse.urlparse("file://a/b.txt"))
171 # will be a problem.
172 # This is needed to prevent a schemeless absolute URI become
173 # a file URI unexpectedly when calling updatedFile or
174 # updatedExtension
175 if cls is not ResourcePath:
176 parsed, dirLike = cls._fixDirectorySep(parsed, forceDirectory)
177 subclass = cls
179 elif isinstance(uri, ResourcePath):
180 # Since ResourcePath is immutable we can return the argument
181 # unchanged if it already agrees with forceDirectory, isTemporary,
182 # and forceAbsolute.
183 # We invoke __new__ again with str(self) to add a scheme for
184 # forceAbsolute, but for the others that seems more likely to paper
185 # over logic errors than do something useful, so we just raise.
186 if forceDirectory and not uri.dirLike:
187 raise RuntimeError(
188 f"{uri} is already a file-like ResourcePath; cannot force it to directory."
189 )
190 if isTemporary is not None and isTemporary is not uri.isTemporary:
191 raise RuntimeError(
192 f"{uri} is already a {'temporary' if uri.isTemporary else 'permanent'} "
193 f"ResourcePath; cannot make it {'temporary' if isTemporary else 'permanent'}."
194 )
195 if forceAbsolute and not uri.scheme:
196 return ResourcePath(
197 str(uri),
198 root=root,
199 forceAbsolute=True,
200 forceDirectory=uri.dirLike,
201 isTemporary=uri.isTemporary,
202 )
203 return uri
204 else:
205 raise ValueError(
206 f"Supplied URI must be string, Path, ResourcePath, or ParseResult but got '{uri!r}'"
207 )
209 if subclass is None:
210 # Work out the subclass from the URI scheme
211 if not parsed.scheme:
212 from .schemeless import SchemelessResourcePath
214 subclass = SchemelessResourcePath
215 elif parsed.scheme == "file":
216 from .file import FileResourcePath
218 subclass = FileResourcePath
219 elif parsed.scheme == "s3":
220 from .s3 import S3ResourcePath
222 subclass = S3ResourcePath
223 elif parsed.scheme.startswith("http"):
224 from .http import HttpResourcePath
226 subclass = HttpResourcePath
227 elif parsed.scheme == "resource":
228 # Rules for scheme names disallow pkg_resource
229 from .packageresource import PackageResourcePath
231 subclass = PackageResourcePath
232 elif parsed.scheme == "mem":
233 # in-memory datastore object
234 from .mem import InMemoryResourcePath
236 subclass = InMemoryResourcePath
237 else:
238 raise NotImplementedError(
239 f"No URI support for scheme: '{parsed.scheme}' in {parsed.geturl()}"
240 )
242 parsed, dirLike = subclass._fixupPathUri(
243 parsed, root=root, forceAbsolute=forceAbsolute, forceDirectory=forceDirectory
244 )
246 # It is possible for the class to change from schemeless
247 # to file so handle that
248 if parsed.scheme == "file":
249 from .file import FileResourcePath
251 subclass = FileResourcePath
253 # Now create an instance of the correct subclass and set the
254 # attributes directly
255 self = object.__new__(subclass)
256 self._uri = parsed
257 self.dirLike = dirLike
258 if isTemporary is None:
259 isTemporary = False
260 self.isTemporary = isTemporary
261 return self
263 @property
264 def scheme(self) -> str:
265 """Return the URI scheme.
267 Notes
268 -----
269 (``://`` is not part of the scheme).
270 """
271 return self._uri.scheme
273 @property
274 def netloc(self) -> str:
275 """Return the URI network location."""
276 return self._uri.netloc
278 @property
279 def path(self) -> str:
280 """Return the path component of the URI."""
281 return self._uri.path
283 @property
284 def unquoted_path(self) -> str:
285 """Return path component of the URI with any URI quoting reversed."""
286 return urllib.parse.unquote(self._uri.path)
288 @property
289 def ospath(self) -> str:
290 """Return the path component of the URI localized to current OS."""
291 raise AttributeError(f"Non-file URI ({self}) has no local OS path.")
293 @property
294 def relativeToPathRoot(self) -> str:
295 """Return path relative to network location.
297 Effectively, this is the path property with posix separator stripped
298 from the left hand side of the path.
300 Always unquotes.
301 """
302 p = self._pathLib(self.path)
303 relToRoot = str(p.relative_to(p.root))
304 if self.dirLike and not relToRoot.endswith("/"):
305 relToRoot += "/"
306 return urllib.parse.unquote(relToRoot)
308 @property
309 def is_root(self) -> bool:
310 """Return whether this URI points to the root of the network location.
312 This means that the path components refers to the top level.
313 """
314 relpath = self.relativeToPathRoot
315 if relpath == "./":
316 return True
317 return False
319 @property
320 def fragment(self) -> str:
321 """Return the fragment component of the URI."""
322 return self._uri.fragment
324 @property
325 def params(self) -> str:
326 """Return any parameters included in the URI."""
327 return self._uri.params
329 @property
330 def query(self) -> str:
331 """Return any query strings included in the URI."""
332 return self._uri.query
334 def geturl(self) -> str:
335 """Return the URI in string form.
337 Returns
338 -------
339 url : `str`
340 String form of URI.
341 """
342 return self._uri.geturl()
344 def root_uri(self) -> ResourcePath:
345 """Return the base root URI.
347 Returns
348 -------
349 uri : `ResourcePath`
350 root URI.
351 """
352 return self.replace(path="", forceDirectory=True)
354 def split(self) -> Tuple[ResourcePath, str]:
355 """Split URI into head and tail.
357 Returns
358 -------
359 head: `ResourcePath`
360 Everything leading up to tail, expanded and normalized as per
361 ResourcePath rules.
362 tail : `str`
363 Last `self.path` component. Tail will be empty if path ends on a
364 separator. Tail will never contain separators. It will be
365 unquoted.
367 Notes
368 -----
369 Equivalent to `os.path.split()` where head preserves the URI
370 components.
371 """
372 head, tail = self._pathModule.split(self.path)
373 headuri = self._uri._replace(path=head)
375 # The file part should never include quoted metacharacters
376 tail = urllib.parse.unquote(tail)
378 # Schemeless is special in that it can be a relative path
379 # We need to ensure that it stays that way. All other URIs will
380 # be absolute already.
381 forceAbsolute = self._pathModule.isabs(self.path)
382 return ResourcePath(headuri, forceDirectory=True, forceAbsolute=forceAbsolute), tail
384 def basename(self) -> str:
385 """Return the base name, last element of path, of the URI.
387 Returns
388 -------
389 tail : `str`
390 Last part of the path attribute. Trail will be empty if path ends
391 on a separator.
393 Notes
394 -----
395 If URI ends on a slash returns an empty string. This is the second
396 element returned by `split()`.
398 Equivalent of `os.path.basename()``.
399 """
400 return self.split()[1]
402 def dirname(self) -> ResourcePath:
403 """Return the directory component of the path as a new `ResourcePath`.
405 Returns
406 -------
407 head : `ResourcePath`
408 Everything except the tail of path attribute, expanded and
409 normalized as per ResourcePath rules.
411 Notes
412 -----
413 Equivalent of `os.path.dirname()`.
414 """
415 return self.split()[0]
417 def parent(self) -> ResourcePath:
418 """Return a `ResourcePath` of the parent directory.
420 Returns
421 -------
422 head : `ResourcePath`
423 Everything except the tail of path attribute, expanded and
424 normalized as per `ResourcePath` rules.
426 Notes
427 -----
428 For a file-like URI this will be the same as calling `dirname()`.
429 """
430 # When self is file-like, return self.dirname()
431 if not self.dirLike:
432 return self.dirname()
433 # When self is dir-like, return its parent directory,
434 # regardless of the presence of a trailing separator
435 originalPath = self._pathLib(self.path)
436 parentPath = originalPath.parent
437 return self.replace(path=str(parentPath), forceDirectory=True)
439 def replace(self, forceDirectory: bool = False, isTemporary: bool = False, **kwargs: Any) -> ResourcePath:
440 """Return new `ResourcePath` with specified components replaced.
442 Parameters
443 ----------
444 forceDirectory : `bool`, optional
445 Parameter passed to ResourcePath constructor to force this
446 new URI to be dir-like.
447 isTemporary : `bool`, optional
448 Indicate that the resulting URI is temporary resource.
449 **kwargs
450 Components of a `urllib.parse.ParseResult` that should be
451 modified for the newly-created `ResourcePath`.
453 Returns
454 -------
455 new : `ResourcePath`
456 New `ResourcePath` object with updated values.
458 Notes
459 -----
460 Does not, for now, allow a change in URI scheme.
461 """
462 # Disallow a change in scheme
463 if "scheme" in kwargs:
464 raise ValueError(f"Can not use replace() method to change URI scheme for {self}")
465 return self.__class__(
466 self._uri._replace(**kwargs), forceDirectory=forceDirectory, isTemporary=isTemporary
467 )
469 def updatedFile(self, newfile: str) -> ResourcePath:
470 """Return new URI with an updated final component of the path.
472 Parameters
473 ----------
474 newfile : `str`
475 File name with no path component.
477 Returns
478 -------
479 updated : `ResourcePath`
481 Notes
482 -----
483 Forces the ResourcePath.dirLike attribute to be false. The new file
484 path will be quoted if necessary.
485 """
486 if self.quotePaths:
487 newfile = urllib.parse.quote(newfile)
488 dir, _ = self._pathModule.split(self.path)
489 newpath = self._pathModule.join(dir, newfile)
491 updated = self.replace(path=newpath)
492 updated.dirLike = False
493 return updated
495 def updatedExtension(self, ext: Optional[str]) -> ResourcePath:
496 """Return a new `ResourcePath` with updated file extension.
498 All file extensions are replaced.
500 Parameters
501 ----------
502 ext : `str` or `None`
503 New extension. If an empty string is given any extension will
504 be removed. If `None` is given there will be no change.
506 Returns
507 -------
508 updated : `ResourcePath`
509 URI with the specified extension. Can return itself if
510 no extension was specified.
511 """
512 if ext is None:
513 return self
515 # Get the extension
516 current = self.getExtension()
518 # Nothing to do if the extension already matches
519 if current == ext:
520 return self
522 # Remove the current extension from the path
523 # .fits.gz counts as one extension do not use os.path.splitext
524 path = self.path
525 if current:
526 path = path[: -len(current)]
528 # Ensure that we have a leading "." on file extension (and we do not
529 # try to modify the empty string)
530 if ext and not ext.startswith("."):
531 ext = "." + ext
533 return self.replace(path=path + ext)
535 def getExtension(self) -> str:
536 """Return the file extension(s) associated with this URI path.
538 Returns
539 -------
540 ext : `str`
541 The file extension (including the ``.``). Can be empty string
542 if there is no file extension. Usually returns only the last
543 file extension unless there is a special extension modifier
544 indicating file compression, in which case the combined
545 extension (e.g. ``.fits.gz``) will be returned.
546 """
547 special = {".gz", ".bz2", ".xz", ".fz"}
549 # Get the file part of the path so as not to be confused by
550 # "." in directory names.
551 basename = self.basename()
552 extensions = self._pathLib(basename).suffixes
554 if not extensions:
555 return ""
557 ext = extensions.pop()
559 # Multiple extensions, decide whether to include the final two
560 if extensions and ext in special:
561 ext = f"{extensions[-1]}{ext}"
563 return ext
565 def join(
566 self, path: Union[str, ResourcePath], isTemporary: Optional[bool] = None, forceDirectory: bool = False
567 ) -> ResourcePath:
568 """Return new `ResourcePath` with additional path components.
570 Parameters
571 ----------
572 path : `str`, `ResourcePath`
573 Additional file components to append to the current URI. Assumed
574 to include a file at the end. Will be quoted depending on the
575 associated URI scheme. If the path looks like a URI with a scheme
576 referring to an absolute location, it will be returned
577 directly (matching the behavior of `os.path.join()`). It can
578 also be a `ResourcePath`.
579 isTemporary : `bool`, optional
580 Indicate that the resulting URI represents a temporary resource.
581 Default is ``self.isTemporary``.
582 forceDirectory : `bool`, optional
583 If `True` forces the URI to end with a separator, otherwise given
584 URI is interpreted as is.
586 Returns
587 -------
588 new : `ResourcePath`
589 New URI with any file at the end replaced with the new path
590 components.
592 Notes
593 -----
594 Schemeless URIs assume local path separator but all other URIs assume
595 POSIX separator if the supplied path has directory structure. It
596 may be this never becomes a problem but datastore templates assume
597 POSIX separator is being used.
599 If an absolute `ResourcePath` is given for ``path`` is is assumed that
600 this should be returned directly. Giving a ``path`` of an absolute
601 scheme-less URI is not allowed for safety reasons as it may indicate
602 a mistake in the calling code.
604 Raises
605 ------
606 ValueError
607 Raised if the ``path`` is an absolute scheme-less URI. In that
608 situation it is unclear whether the intent is to return a
609 ``file`` URI or it was a mistake and a relative scheme-less URI
610 was meant.
611 RuntimeError
612 Raised if this attempts to join a temporary URI to a non-temporary
613 URI.
614 """
615 if isTemporary is None:
616 isTemporary = self.isTemporary
617 elif not isTemporary and self.isTemporary:
618 raise RuntimeError("Cannot join temporary URI to non-temporary URI.")
619 # If we have a full URI in path we will use it directly
620 # but without forcing to absolute so that we can trap the
621 # expected option of relative path.
622 path_uri = ResourcePath(
623 path, forceAbsolute=False, forceDirectory=forceDirectory, isTemporary=isTemporary
624 )
625 if path_uri.scheme:
626 # Check for scheme so can distinguish explicit URIs from
627 # absolute scheme-less URIs.
628 return path_uri
630 if path_uri.isabs():
631 # Absolute scheme-less path.
632 raise ValueError(f"Can not join absolute scheme-less {path_uri!r} to another URI.")
634 # If this was originally a ResourcePath extract the unquoted path from
635 # it. Otherwise we use the string we were given to allow "#" to appear
636 # in the filename if given as a plain string.
637 if not isinstance(path, str):
638 path = path_uri.unquoted_path
640 new = self.dirname() # By definition a directory URI
642 # new should be asked about quoting, not self, since dirname can
643 # change the URI scheme for schemeless -> file
644 if new.quotePaths:
645 path = urllib.parse.quote(path)
647 newpath = self._pathModule.normpath(self._pathModule.join(new.path, path))
649 # normpath can strip trailing / so we force directory if the supplied
650 # path ended with a /
651 return new.replace(
652 path=newpath,
653 forceDirectory=(forceDirectory or path.endswith(self._pathModule.sep)),
654 isTemporary=isTemporary,
655 )
657 def relative_to(self, other: ResourcePath) -> Optional[str]:
658 """Return the relative path from this URI to the other URI.
660 Parameters
661 ----------
662 other : `ResourcePath`
663 URI to use to calculate the relative path. Must be a parent
664 of this URI.
666 Returns
667 -------
668 subpath : `str`
669 The sub path of this URI relative to the supplied other URI.
670 Returns `None` if there is no parent child relationship.
671 Scheme and netloc must match.
672 """
673 # Scheme-less absolute other is treated as if it's a file scheme.
674 # Scheme-less relative other can only return non-None if self
675 # is also scheme-less relative and that is handled specifically
676 # in a subclass.
677 if not other.scheme and other.isabs():
678 other = other.abspath()
680 # Scheme-less self is handled elsewhere.
681 if self.scheme != other.scheme or self.netloc != other.netloc:
682 return None
684 enclosed_path = self._pathLib(self.relativeToPathRoot)
685 parent_path = other.relativeToPathRoot
686 subpath: Optional[str]
687 try:
688 subpath = str(enclosed_path.relative_to(parent_path))
689 except ValueError:
690 subpath = None
691 else:
692 subpath = urllib.parse.unquote(subpath)
693 return subpath
695 def exists(self) -> bool:
696 """Indicate that the resource is available.
698 Returns
699 -------
700 exists : `bool`
701 `True` if the resource exists.
702 """
703 raise NotImplementedError()
705 @classmethod
706 def mexists(cls, uris: Iterable[ResourcePath]) -> Dict[ResourcePath, bool]:
707 """Check for existence of multiple URIs at once.
709 Parameters
710 ----------
711 uris : iterable of `ResourcePath`
712 The URIs to test.
714 Returns
715 -------
716 existence : `dict` of [`ResourcePath`, `bool`]
717 Mapping of original URI to boolean indicating existence.
718 """
719 exists_executor = concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS)
720 future_exists = {exists_executor.submit(uri.exists): uri for uri in uris}
722 results: Dict[ResourcePath, bool] = {}
723 for future in concurrent.futures.as_completed(future_exists):
724 uri = future_exists[future]
725 try:
726 exists = future.result()
727 except Exception:
728 exists = False
729 results[uri] = exists
730 return results
732 def remove(self) -> None:
733 """Remove the resource."""
734 raise NotImplementedError()
736 def isabs(self) -> bool:
737 """Indicate that the resource is fully specified.
739 For non-schemeless URIs this is always true.
741 Returns
742 -------
743 isabs : `bool`
744 `True` in all cases except schemeless URI.
745 """
746 return True
748 def abspath(self) -> ResourcePath:
749 """Return URI using an absolute path.
751 Returns
752 -------
753 abs : `ResourcePath`
754 Absolute URI. For non-schemeless URIs this always returns itself.
755 Schemeless URIs are upgraded to file URIs.
756 """
757 return self
759 def _as_local(self) -> Tuple[str, bool]:
760 """Return the location of the (possibly remote) resource as local file.
762 This is a helper function for `as_local` context manager.
764 Returns
765 -------
766 path : `str`
767 If this is a remote resource, it will be a copy of the resource
768 on the local file system, probably in a temporary directory.
769 For a local resource this should be the actual path to the
770 resource.
771 is_temporary : `bool`
772 Indicates if the local path is a temporary file or not.
773 """
774 raise NotImplementedError()
776 @contextlib.contextmanager
777 def as_local(self) -> Iterator[ResourcePath]:
778 """Return the location of the (possibly remote) resource as local file.
780 Yields
781 ------
782 local : `ResourcePath`
783 If this is a remote resource, it will be a copy of the resource
784 on the local file system, probably in a temporary directory.
785 For a local resource this should be the actual path to the
786 resource.
788 Notes
789 -----
790 The context manager will automatically delete any local temporary
791 file.
793 Examples
794 --------
795 Should be used as a context manager:
797 .. code-block:: py
799 with uri.as_local() as local:
800 ospath = local.ospath
801 """
802 if self.dirLike:
803 raise TypeError(f"Directory-like URI {self} cannot be fetched as local.")
804 local_src, is_temporary = self._as_local()
805 local_uri = ResourcePath(local_src, isTemporary=is_temporary)
807 try:
808 yield local_uri
809 finally:
810 # The caller might have relocated the temporary file
811 if is_temporary and local_uri.exists():
812 local_uri.remove()
814 @classmethod
815 @contextlib.contextmanager
816 def temporary_uri(
817 cls, prefix: Optional[ResourcePath] = None, suffix: Optional[str] = None
818 ) -> Iterator[ResourcePath]:
819 """Create a temporary file-like URI.
821 Parameters
822 ----------
823 prefix : `ResourcePath`, optional
824 Prefix to use. Without this the path will be formed as a local
825 file URI in a temporary directory. Ensuring that the prefix
826 location exists is the responsibility of the caller.
827 suffix : `str`, optional
828 A file suffix to be used. The ``.`` should be included in this
829 suffix.
831 Yields
832 ------
833 uri : `ResourcePath`
834 The temporary URI. Will be removed when the context is completed.
835 """
836 use_tempdir = False
837 if prefix is None:
838 prefix = ResourcePath(tempfile.mkdtemp(), forceDirectory=True, isTemporary=True)
839 # Record that we need to delete this directory. Can not rely
840 # on isTemporary flag since an external prefix may have that
841 # set as well.
842 use_tempdir = True
844 # Need to create a randomized file name. For consistency do not
845 # use mkstemp for local and something else for remote. Additionally
846 # this method does not create the file to prevent name clashes.
847 characters = "abcdefghijklmnopqrstuvwxyz0123456789_"
848 rng = Random()
849 tempname = "".join(rng.choice(characters) for _ in range(16))
850 if suffix:
851 tempname += suffix
852 temporary_uri = prefix.join(tempname, isTemporary=True)
853 if temporary_uri.dirLike:
854 # If we had a safe way to clean up a remote temporary directory, we
855 # could support this.
856 raise NotImplementedError("temporary_uri cannot be used to create a temporary directory.")
857 try:
858 yield temporary_uri
859 finally:
860 if use_tempdir:
861 shutil.rmtree(prefix.ospath, ignore_errors=True)
862 else:
863 try:
864 # It's okay if this does not work because the user removed
865 # the file.
866 temporary_uri.remove()
867 except FileNotFoundError:
868 pass
870 def read(self, size: int = -1) -> bytes:
871 """Open the resource and return the contents in bytes.
873 Parameters
874 ----------
875 size : `int`, optional
876 The number of bytes to read. Negative or omitted indicates
877 that all data should be read.
878 """
879 raise NotImplementedError()
881 def write(self, data: bytes, overwrite: bool = True) -> None:
882 """Write the supplied bytes to the new resource.
884 Parameters
885 ----------
886 data : `bytes`
887 The bytes to write to the resource. The entire contents of the
888 resource will be replaced.
889 overwrite : `bool`, optional
890 If `True` the resource will be overwritten if it exists. Otherwise
891 the write will fail.
892 """
893 raise NotImplementedError()
895 def mkdir(self) -> None:
896 """For a dir-like URI, create the directory resource if needed."""
897 raise NotImplementedError()
899 def isdir(self) -> bool:
900 """Return True if this URI looks like a directory, else False."""
901 return self.dirLike
903 def size(self) -> int:
904 """For non-dir-like URI, return the size of the resource.
906 Returns
907 -------
908 sz : `int`
909 The size in bytes of the resource associated with this URI.
910 Returns 0 if dir-like.
911 """
912 raise NotImplementedError()
914 def __str__(self) -> str:
915 """Convert the URI to its native string form."""
916 return self.geturl()
918 def __repr__(self) -> str:
919 """Return string representation suitable for evaluation."""
920 return f'ResourcePath("{self.geturl()}")'
922 def __eq__(self, other: Any) -> bool:
923 """Compare supplied object with this `ResourcePath`."""
924 if not isinstance(other, ResourcePath):
925 return NotImplemented
926 return self.geturl() == other.geturl()
928 def __hash__(self) -> int:
929 """Return hash of this object."""
930 return hash(str(self))
932 def __copy__(self) -> ResourcePath:
933 """Copy constructor.
935 Object is immutable so copy can return itself.
936 """
937 # Implement here because the __new__ method confuses things
938 return self
940 def __deepcopy__(self, memo: Any) -> ResourcePath:
941 """Deepcopy the object.
943 Object is immutable so copy can return itself.
944 """
945 # Implement here because the __new__ method confuses things
946 return self
948 def __getnewargs__(self) -> Tuple:
949 """Support pickling."""
950 return (str(self),)
952 @classmethod
953 def _fixDirectorySep(
954 cls, parsed: urllib.parse.ParseResult, forceDirectory: bool = False
955 ) -> Tuple[urllib.parse.ParseResult, bool]:
956 """Ensure that a path separator is present on directory paths.
958 Parameters
959 ----------
960 parsed : `~urllib.parse.ParseResult`
961 The result from parsing a URI using `urllib.parse`.
962 forceDirectory : `bool`, optional
963 If `True` forces the URI to end with a separator, otherwise given
964 URI is interpreted as is. Specifying that the URI is conceptually
965 equivalent to a directory can break some ambiguities when
966 interpreting the last element of a path.
968 Returns
969 -------
970 modified : `~urllib.parse.ParseResult`
971 Update result if a URI is being handled.
972 dirLike : `bool`
973 `True` if given parsed URI has a trailing separator or
974 forceDirectory is True. Otherwise `False`.
975 """
976 # assume we are not dealing with a directory like URI
977 dirLike = False
979 # Directory separator
980 sep = cls._pathModule.sep
982 # URI is dir-like if explicitly stated or if it ends on a separator
983 endsOnSep = parsed.path.endswith(sep)
984 if forceDirectory or endsOnSep:
985 dirLike = True
986 # only add the separator if it's not already there
987 if not endsOnSep:
988 parsed = parsed._replace(path=parsed.path + sep)
990 return parsed, dirLike
992 @classmethod
993 def _fixupPathUri(
994 cls,
995 parsed: urllib.parse.ParseResult,
996 root: Optional[Union[str, ResourcePath]] = None,
997 forceAbsolute: bool = False,
998 forceDirectory: bool = False,
999 ) -> Tuple[urllib.parse.ParseResult, bool]:
1000 """Correct any issues with the supplied URI.
1002 Parameters
1003 ----------
1004 parsed : `~urllib.parse.ParseResult`
1005 The result from parsing a URI using `urllib.parse`.
1006 root : `str` or `ResourcePath`, ignored
1007 Not used by the this implementation since all URIs are
1008 absolute except for those representing the local file system.
1009 forceAbsolute : `bool`, ignored.
1010 Not used by this implementation. URIs are generally always
1011 absolute.
1012 forceDirectory : `bool`, optional
1013 If `True` forces the URI to end with a separator, otherwise given
1014 URI is interpreted as is. Specifying that the URI is conceptually
1015 equivalent to a directory can break some ambiguities when
1016 interpreting the last element of a path.
1018 Returns
1019 -------
1020 modified : `~urllib.parse.ParseResult`
1021 Update result if a URI is being handled.
1022 dirLike : `bool`
1023 `True` if given parsed URI has a trailing separator or
1024 forceDirectory is True. Otherwise `False`.
1026 Notes
1027 -----
1028 Relative paths are explicitly not supported by RFC8089 but `urllib`
1029 does accept URIs of the form ``file:relative/path.ext``. They need
1030 to be turned into absolute paths before they can be used. This is
1031 always done regardless of the ``forceAbsolute`` parameter.
1033 AWS S3 differentiates between keys with trailing POSIX separators (i.e
1034 `/dir` and `/dir/`) whereas POSIX does not neccessarily.
1036 Scheme-less paths are normalized.
1037 """
1038 return cls._fixDirectorySep(parsed, forceDirectory)
1040 def transfer_from(
1041 self,
1042 src: ResourcePath,
1043 transfer: str,
1044 overwrite: bool = False,
1045 transaction: Optional[TransactionProtocol] = None,
1046 ) -> None:
1047 """Transfer the current resource to a new location.
1049 Parameters
1050 ----------
1051 src : `ResourcePath`
1052 Source URI.
1053 transfer : `str`
1054 Mode to use for transferring the resource. Generically there are
1055 many standard options: copy, link, symlink, hardlink, relsymlink.
1056 Not all URIs support all modes.
1057 overwrite : `bool`, optional
1058 Allow an existing file to be overwritten. Defaults to `False`.
1059 transaction : `~lsst.resources.utils.TransactionProtocol`, optional
1060 A transaction object that can (depending on implementation)
1061 rollback transfers on error. Not guaranteed to be implemented.
1063 Notes
1064 -----
1065 Conceptually this is hard to scale as the number of URI schemes
1066 grow. The destination URI is more important than the source URI
1067 since that is where all the transfer modes are relevant (with the
1068 complication that "move" deletes the source).
1070 Local file to local file is the fundamental use case but every
1071 other scheme has to support "copy" to local file (with implicit
1072 support for "move") and copy from local file.
1073 All the "link" options tend to be specific to local file systems.
1075 "move" is a "copy" where the remote resource is deleted at the end.
1076 Whether this works depends on the source URI rather than the
1077 destination URI. Reverting a move on transaction rollback is
1078 expected to be problematic if a remote resource was involved.
1079 """
1080 raise NotImplementedError(f"No transfer modes supported by URI scheme {self.scheme}")
1082 def walk(
1083 self, file_filter: Optional[Union[str, re.Pattern]] = None
1084 ) -> Iterator[Union[List, Tuple[ResourcePath, List[str], List[str]]]]:
1085 """Walk the directory tree returning matching files and directories.
1087 Parameters
1088 ----------
1089 file_filter : `str` or `re.Pattern`, optional
1090 Regex to filter out files from the list before it is returned.
1092 Yields
1093 ------
1094 dirpath : `ResourcePath`
1095 Current directory being examined.
1096 dirnames : `list` of `str`
1097 Names of subdirectories within dirpath.
1098 filenames : `list` of `str`
1099 Names of all the files within dirpath.
1100 """
1101 raise NotImplementedError()
1103 @classmethod
1104 def findFileResources(
1105 cls,
1106 candidates: Iterable[Union[str, ResourcePath]],
1107 file_filter: Optional[str] = None,
1108 grouped: bool = False,
1109 ) -> Iterator[Union[ResourcePath, Iterator[ResourcePath]]]:
1110 """Get all the files from a list of values.
1112 Parameters
1113 ----------
1114 candidates : iterable [`str` or `ResourcePath`]
1115 The files to return and directories in which to look for files to
1116 return.
1117 file_filter : `str`, optional
1118 The regex to use when searching for files within directories.
1119 By default returns all the found files.
1120 grouped : `bool`, optional
1121 If `True` the results will be grouped by directory and each
1122 yielded value will be an iterator over URIs. If `False` each
1123 URI will be returned separately.
1125 Yields
1126 ------
1127 found_file: `ResourcePath`
1128 The passed-in URIs and URIs found in passed-in directories.
1129 If grouping is enabled, each of the yielded values will be an
1130 iterator yielding members of the group. Files given explicitly
1131 will be returned as a single group at the end.
1133 Notes
1134 -----
1135 If a value is a file it is yielded immediately without checking that it
1136 exists. If a value is a directory, all the files in the directory
1137 (recursively) that match the regex will be yielded in turn.
1138 """
1139 fileRegex = None if file_filter is None else re.compile(file_filter)
1141 singles = []
1143 # Find all the files of interest
1144 for location in candidates:
1145 uri = ResourcePath(location)
1146 if uri.isdir():
1147 for found in uri.walk(fileRegex):
1148 if not found:
1149 # This means the uri does not exist and by
1150 # convention we ignore it
1151 continue
1152 root, dirs, files = found
1153 if not files:
1154 continue
1155 if grouped:
1156 yield (root.join(name) for name in files)
1157 else:
1158 for name in files:
1159 yield root.join(name)
1160 else:
1161 if grouped:
1162 singles.append(uri)
1163 else:
1164 yield uri
1166 # Finally, return any explicitly given files in one group
1167 if grouped and singles:
1168 yield iter(singles)
1170 @contextlib.contextmanager
1171 def open(
1172 self,
1173 mode: str = "r",
1174 *,
1175 encoding: Optional[str] = None,
1176 prefer_file_temporary: bool = False,
1177 ) -> Iterator[IO]:
1178 """Return a context manager that wraps an object that behaves like an
1179 open file at the location of the URI.
1181 Parameters
1182 ----------
1183 mode : `str`
1184 String indicating the mode in which to open the file. Values are
1185 the same as those accepted by `builtins.open`, though intrinsically
1186 read-only URI types may only support read modes, and
1187 `io.IOBase.seekable` is not guaranteed to be `True` on the returned
1188 object.
1189 encoding : `str`, optional
1190 Unicode encoding for text IO; ignored for binary IO. Defaults to
1191 ``locale.getpreferredencoding(False)``, just as `builtins.open`
1192 does.
1193 prefer_file_temporary : `bool`, optional
1194 If `True`, for implementations that require transfers from a remote
1195 system to temporary local storage and/or back, use a temporary file
1196 instead of an in-memory buffer; this is generally slower, but it
1197 may be necessary to avoid excessive memory usage by large files.
1198 Ignored by implementations that do not require a temporary.
1200 Returns
1201 -------
1202 cm : `contextlib.ContextManager`
1203 A context manager that wraps a file-like object.
1205 Notes
1206 -----
1207 The default implementation of this method uses a local temporary buffer
1208 (in-memory or file, depending on ``prefer_file_temporary``) with calls
1209 to `read`, `write`, `as_local`, and `transfer_from` as necessary to
1210 read and write from/to remote systems. Remote writes thus occur only
1211 when the context manager is exited. `ResourcePath` implementations
1212 that can return a more efficient native buffer should do so whenever
1213 possible (as is guaranteed for local files). `ResourcePath`
1214 implementations for which `as_local` does not return a temporary are
1215 required to reimplement `open`, though they may delegate to `super`
1216 when `prefer_file_temporary` is `False`.
1217 """
1218 if self.dirLike:
1219 raise TypeError(f"Directory-like URI {self} cannot be opened.")
1220 if "x" in mode and self.exists():
1221 raise FileExistsError(f"File at {self} already exists.")
1222 if prefer_file_temporary:
1223 if "r" in mode or "a" in mode:
1224 local_cm = self.as_local()
1225 else:
1226 local_cm = self.temporary_uri(suffix=self.getExtension())
1227 with local_cm as local_uri:
1228 assert local_uri.isTemporary, (
1229 "ResourcePath implementations for which as_local is not "
1230 "a temporary must reimplement `open`."
1231 )
1232 with open(local_uri.ospath, mode=mode, encoding=encoding) as file_buffer:
1233 if "a" in mode:
1234 file_buffer.seek(0, io.SEEK_END)
1235 yield file_buffer
1236 if "r" not in mode or "+" in mode:
1237 self.transfer_from(local_uri, transfer="copy", overwrite=("x" not in mode))
1238 else:
1239 if "r" in mode or "a" in mode:
1240 in_bytes = self.read()
1241 else:
1242 in_bytes = b""
1243 if "b" in mode:
1244 bytes_buffer = io.BytesIO(in_bytes)
1245 if "a" in mode:
1246 bytes_buffer.seek(0, io.SEEK_END)
1247 yield bytes_buffer
1248 out_bytes = bytes_buffer.getvalue()
1249 else:
1250 if encoding is None:
1251 encoding = locale.getpreferredencoding(False)
1252 str_buffer = io.StringIO(in_bytes.decode(encoding))
1253 if "a" in mode:
1254 str_buffer.seek(0, io.SEEK_END)
1255 yield str_buffer
1256 out_bytes = str_buffer.getvalue().encode(encoding)
1257 if "r" not in mode or "+" in mode:
1258 self.write(out_bytes, overwrite=("x" not in mode))