Coverage for python/lsst/resources/_resourcePath.py: 23%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of lsst-resources.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# Use of this source code is governed by a 3-clause BSD-style
10# license that can be found in the LICENSE file.
12from __future__ import annotations
14import concurrent.futures
15import contextlib
16import copy
17import logging
18import os
19import posixpath
20import re
21import shutil
22import tempfile
23import urllib.parse
24from pathlib import Path, PurePath, PurePosixPath
25from random import Random
27__all__ = ("ResourcePath",)
29from typing import TYPE_CHECKING, Any, Dict, Iterable, Iterator, List, Optional, Tuple, Type, Union
31if TYPE_CHECKING: 31 ↛ 32line 31 didn't jump to line 32, because the condition on line 31 was never true
32 from .utils import TransactionProtocol
35log = logging.getLogger(__name__)
37# Regex for looking for URI escapes
38ESCAPES_RE = re.compile(r"%[A-F0-9]{2}")
40# Precomputed escaped hash
41ESCAPED_HASH = urllib.parse.quote("#")
43# Maximum number of worker threads for parallelized operations.
44# If greater than 10, be aware that this number has to be consistent
45# with connection pool sizing (for example in urllib3).
46MAX_WORKERS = 10
49class ResourcePath:
50 """Convenience wrapper around URI parsers.
52 Provides access to URI components and can convert file
53 paths into absolute path URIs. Scheme-less URIs are treated as if
54 they are local file system paths and are converted to absolute URIs.
56 A specialist subclass is created for each supported URI scheme.
58 Parameters
59 ----------
60 uri : `str` or `urllib.parse.ParseResult`
61 URI in string form. Can be scheme-less if referring to a local
62 filesystem path.
63 root : `str` or `ResourcePath`, optional
64 When fixing up a relative path in a ``file`` scheme or if scheme-less,
65 use this as the root. Must be absolute. If `None` the current
66 working directory will be used. Can be a file URI.
67 forceAbsolute : `bool`, optional
68 If `True`, scheme-less relative URI will be converted to an absolute
69 path using a ``file`` scheme. If `False` scheme-less URI will remain
70 scheme-less and will not be updated to ``file`` or absolute path.
71 forceDirectory: `bool`, optional
72 If `True` forces the URI to end with a separator, otherwise given URI
73 is interpreted as is.
74 isTemporary : `bool`, optional
75 If `True` indicates that this URI points to a temporary resource.
76 """
78 _pathLib: Type[PurePath] = PurePosixPath
79 """Path library to use for this scheme."""
81 _pathModule = posixpath
82 """Path module to use for this scheme."""
84 transferModes: Tuple[str, ...] = ("copy", "auto", "move")
85 """Transfer modes supported by this implementation.
87 Move is special in that it is generally a copy followed by an unlink.
88 Whether that unlink works depends critically on whether the source URI
89 implements unlink. If it does not the move will be reported as a failure.
90 """
92 transferDefault: str = "copy"
93 """Default mode to use for transferring if ``auto`` is specified."""
95 quotePaths = True
96 """True if path-like elements modifying a URI should be quoted.
98 All non-schemeless URIs have to internally use quoted paths. Therefore
99 if a new file name is given (e.g. to updatedFile or join) a decision must
100 be made whether to quote it to be consistent.
101 """
103 isLocal = False
104 """If `True` this URI refers to a local file."""
106 # This is not an ABC with abstract methods because the __new__ being
107 # a factory confuses mypy such that it assumes that every constructor
108 # returns a ResourcePath and then determines that all the abstract methods
109 # are still abstract. If they are not marked abstract but just raise
110 # mypy is fine with it.
112 # mypy is confused without these
113 _uri: urllib.parse.ParseResult
114 isTemporary: bool
115 dirLike: bool
117 def __new__(
118 cls,
119 uri: Union[str, urllib.parse.ParseResult, ResourcePath, Path],
120 root: Optional[Union[str, ResourcePath]] = None,
121 forceAbsolute: bool = True,
122 forceDirectory: bool = False,
123 isTemporary: bool = False,
124 ) -> ResourcePath:
125 """Create and return new specialist ResourcePath subclass."""
126 parsed: urllib.parse.ParseResult
127 dirLike: bool = False
128 subclass: Optional[Type[ResourcePath]] = None
130 if isinstance(uri, os.PathLike):
131 uri = str(uri)
133 # Record if we need to post process the URI components
134 # or if the instance is already fully configured
135 if isinstance(uri, str):
136 # Since local file names can have special characters in them
137 # we need to quote them for the parser but we can unquote
138 # later. Assume that all other URI schemes are quoted.
139 # Since sometimes people write file:/a/b and not file:///a/b
140 # we should not quote in the explicit case of file:
141 if "://" not in uri and not uri.startswith("file:"):
142 if ESCAPES_RE.search(uri):
143 log.warning("Possible double encoding of %s", uri)
144 else:
145 uri = urllib.parse.quote(uri)
146 # Special case hash since we must support fragments
147 # even in schemeless URIs -- although try to only replace
148 # them in file part and not directory part
149 if ESCAPED_HASH in uri:
150 dirpos = uri.rfind("/")
151 # Do replacement after this /
152 uri = uri[: dirpos + 1] + uri[dirpos + 1 :].replace(ESCAPED_HASH, "#")
154 parsed = urllib.parse.urlparse(uri)
155 elif isinstance(uri, urllib.parse.ParseResult):
156 parsed = copy.copy(uri)
157 # If we are being instantiated with a subclass, rather than
158 # ResourcePath, ensure that that subclass is used directly.
159 # This could lead to inconsistencies if this constructor
160 # is used externally outside of the ResourcePath.replace() method.
161 # S3ResourcePath(urllib.parse.urlparse("file://a/b.txt"))
162 # will be a problem.
163 # This is needed to prevent a schemeless absolute URI become
164 # a file URI unexpectedly when calling updatedFile or
165 # updatedExtension
166 if cls is not ResourcePath:
167 parsed, dirLike = cls._fixDirectorySep(parsed, forceDirectory)
168 subclass = cls
170 elif isinstance(uri, ResourcePath):
171 # Since ResourcePath is immutable we can return the argument
172 # unchanged.
173 return uri
174 else:
175 raise ValueError(
176 f"Supplied URI must be string, Path, ResourcePath, or ParseResult but got '{uri!r}'"
177 )
179 if subclass is None:
180 # Work out the subclass from the URI scheme
181 if not parsed.scheme:
182 from .schemeless import SchemelessResourcePath
184 subclass = SchemelessResourcePath
185 elif parsed.scheme == "file":
186 from .file import FileResourcePath
188 subclass = FileResourcePath
189 elif parsed.scheme == "s3":
190 from .s3 import S3ResourcePath
192 subclass = S3ResourcePath
193 elif parsed.scheme.startswith("http"):
194 from .http import HttpResourcePath
196 subclass = HttpResourcePath
197 elif parsed.scheme == "resource":
198 # Rules for scheme names disallow pkg_resource
199 from .packageresource import PackageResourcePath
201 subclass = PackageResourcePath
202 elif parsed.scheme == "mem":
203 # in-memory datastore object
204 from .mem import InMemoryResourcePath
206 subclass = InMemoryResourcePath
207 else:
208 raise NotImplementedError(
209 f"No URI support for scheme: '{parsed.scheme}' in {parsed.geturl()}"
210 )
212 parsed, dirLike = subclass._fixupPathUri(
213 parsed, root=root, forceAbsolute=forceAbsolute, forceDirectory=forceDirectory
214 )
216 # It is possible for the class to change from schemeless
217 # to file so handle that
218 if parsed.scheme == "file":
219 from .file import FileResourcePath
221 subclass = FileResourcePath
223 # Now create an instance of the correct subclass and set the
224 # attributes directly
225 self = object.__new__(subclass)
226 self._uri = parsed
227 self.dirLike = dirLike
228 self.isTemporary = isTemporary
229 return self
231 @property
232 def scheme(self) -> str:
233 """Return the URI scheme.
235 Notes
236 -----
237 (``://`` is not part of the scheme).
238 """
239 return self._uri.scheme
241 @property
242 def netloc(self) -> str:
243 """Return the URI network location."""
244 return self._uri.netloc
246 @property
247 def path(self) -> str:
248 """Return the path component of the URI."""
249 return self._uri.path
251 @property
252 def unquoted_path(self) -> str:
253 """Return path component of the URI with any URI quoting reversed."""
254 return urllib.parse.unquote(self._uri.path)
256 @property
257 def ospath(self) -> str:
258 """Return the path component of the URI localized to current OS."""
259 raise AttributeError(f"Non-file URI ({self}) has no local OS path.")
261 @property
262 def relativeToPathRoot(self) -> str:
263 """Return path relative to network location.
265 Effectively, this is the path property with posix separator stripped
266 from the left hand side of the path.
268 Always unquotes.
269 """
270 p = self._pathLib(self.path)
271 relToRoot = str(p.relative_to(p.root))
272 if self.dirLike and not relToRoot.endswith("/"):
273 relToRoot += "/"
274 return urllib.parse.unquote(relToRoot)
276 @property
277 def is_root(self) -> bool:
278 """Return whether this URI points to the root of the network location.
280 This means that the path components refers to the top level.
281 """
282 relpath = self.relativeToPathRoot
283 if relpath == "./":
284 return True
285 return False
287 @property
288 def fragment(self) -> str:
289 """Return the fragment component of the URI."""
290 return self._uri.fragment
292 @property
293 def params(self) -> str:
294 """Return any parameters included in the URI."""
295 return self._uri.params
297 @property
298 def query(self) -> str:
299 """Return any query strings included in the URI."""
300 return self._uri.query
302 def geturl(self) -> str:
303 """Return the URI in string form.
305 Returns
306 -------
307 url : `str`
308 String form of URI.
309 """
310 return self._uri.geturl()
312 def root_uri(self) -> ResourcePath:
313 """Return the base root URI.
315 Returns
316 -------
317 uri : `ResourcePath`
318 root URI.
319 """
320 return self.replace(path="", forceDirectory=True)
322 def split(self) -> Tuple[ResourcePath, str]:
323 """Split URI into head and tail.
325 Returns
326 -------
327 head: `ResourcePath`
328 Everything leading up to tail, expanded and normalized as per
329 ResourcePath rules.
330 tail : `str`
331 Last `self.path` component. Tail will be empty if path ends on a
332 separator. Tail will never contain separators. It will be
333 unquoted.
335 Notes
336 -----
337 Equivalent to `os.path.split()` where head preserves the URI
338 components.
339 """
340 head, tail = self._pathModule.split(self.path)
341 headuri = self._uri._replace(path=head)
343 # The file part should never include quoted metacharacters
344 tail = urllib.parse.unquote(tail)
346 # Schemeless is special in that it can be a relative path
347 # We need to ensure that it stays that way. All other URIs will
348 # be absolute already.
349 forceAbsolute = self._pathModule.isabs(self.path)
350 return ResourcePath(headuri, forceDirectory=True, forceAbsolute=forceAbsolute), tail
352 def basename(self) -> str:
353 """Return the base name, last element of path, of the URI.
355 Returns
356 -------
357 tail : `str`
358 Last part of the path attribute. Trail will be empty if path ends
359 on a separator.
361 Notes
362 -----
363 If URI ends on a slash returns an empty string. This is the second
364 element returned by `split()`.
366 Equivalent of `os.path.basename()``.
367 """
368 return self.split()[1]
370 def dirname(self) -> ResourcePath:
371 """Return the directory component of the path as a new `ResourcePath`.
373 Returns
374 -------
375 head : `ResourcePath`
376 Everything except the tail of path attribute, expanded and
377 normalized as per ResourcePath rules.
379 Notes
380 -----
381 Equivalent of `os.path.dirname()`.
382 """
383 return self.split()[0]
385 def parent(self) -> ResourcePath:
386 """Return a `ResourcePath` of the parent directory.
388 Returns
389 -------
390 head : `ResourcePath`
391 Everything except the tail of path attribute, expanded and
392 normalized as per `ResourcePath` rules.
394 Notes
395 -----
396 For a file-like URI this will be the same as calling `dirname()`.
397 """
398 # When self is file-like, return self.dirname()
399 if not self.dirLike:
400 return self.dirname()
401 # When self is dir-like, return its parent directory,
402 # regardless of the presence of a trailing separator
403 originalPath = self._pathLib(self.path)
404 parentPath = originalPath.parent
405 return self.replace(path=str(parentPath), forceDirectory=True)
407 def replace(self, forceDirectory: bool = False, isTemporary: bool = False, **kwargs: Any) -> ResourcePath:
408 """Return new `ResourcePath` with specified components replaced.
410 Parameters
411 ----------
412 forceDirectory : `bool`, optional
413 Parameter passed to ResourcePath constructor to force this
414 new URI to be dir-like.
415 isTemporary : `bool`, optional
416 Indicate that the resulting URI is temporary resource.
417 **kwargs
418 Components of a `urllib.parse.ParseResult` that should be
419 modified for the newly-created `ResourcePath`.
421 Returns
422 -------
423 new : `ResourcePath`
424 New `ResourcePath` object with updated values.
426 Notes
427 -----
428 Does not, for now, allow a change in URI scheme.
429 """
430 # Disallow a change in scheme
431 if "scheme" in kwargs:
432 raise ValueError(f"Can not use replace() method to change URI scheme for {self}")
433 return self.__class__(
434 self._uri._replace(**kwargs), forceDirectory=forceDirectory, isTemporary=isTemporary
435 )
437 def updatedFile(self, newfile: str) -> ResourcePath:
438 """Return new URI with an updated final component of the path.
440 Parameters
441 ----------
442 newfile : `str`
443 File name with no path component.
445 Returns
446 -------
447 updated : `ResourcePath`
449 Notes
450 -----
451 Forces the ResourcePath.dirLike attribute to be false. The new file
452 path will be quoted if necessary.
453 """
454 if self.quotePaths:
455 newfile = urllib.parse.quote(newfile)
456 dir, _ = self._pathModule.split(self.path)
457 newpath = self._pathModule.join(dir, newfile)
459 updated = self.replace(path=newpath)
460 updated.dirLike = False
461 return updated
463 def updatedExtension(self, ext: Optional[str]) -> ResourcePath:
464 """Return a new `ResourcePath` with updated file extension.
466 All file extensions are replaced.
468 Parameters
469 ----------
470 ext : `str` or `None`
471 New extension. If an empty string is given any extension will
472 be removed. If `None` is given there will be no change.
474 Returns
475 -------
476 updated : `ResourcePath`
477 URI with the specified extension. Can return itself if
478 no extension was specified.
479 """
480 if ext is None:
481 return self
483 # Get the extension
484 current = self.getExtension()
486 # Nothing to do if the extension already matches
487 if current == ext:
488 return self
490 # Remove the current extension from the path
491 # .fits.gz counts as one extension do not use os.path.splitext
492 path = self.path
493 if current:
494 path = path[: -len(current)]
496 # Ensure that we have a leading "." on file extension (and we do not
497 # try to modify the empty string)
498 if ext and not ext.startswith("."):
499 ext = "." + ext
501 return self.replace(path=path + ext)
503 def getExtension(self) -> str:
504 """Return the file extension(s) associated with this URI path.
506 Returns
507 -------
508 ext : `str`
509 The file extension (including the ``.``). Can be empty string
510 if there is no file extension. Usually returns only the last
511 file extension unless there is a special extension modifier
512 indicating file compression, in which case the combined
513 extension (e.g. ``.fits.gz``) will be returned.
514 """
515 special = {".gz", ".bz2", ".xz", ".fz"}
517 # Get the file part of the path so as not to be confused by
518 # "." in directory names.
519 basename = self.basename()
520 extensions = self._pathLib(basename).suffixes
522 if not extensions:
523 return ""
525 ext = extensions.pop()
527 # Multiple extensions, decide whether to include the final two
528 if extensions and ext in special:
529 ext = f"{extensions[-1]}{ext}"
531 return ext
533 def join(self, path: Union[str, ResourcePath], isTemporary: bool = False) -> ResourcePath:
534 """Return new `ResourcePath` with additional path components.
536 Parameters
537 ----------
538 path : `str`, `ResourcePath`
539 Additional file components to append to the current URI. Assumed
540 to include a file at the end. Will be quoted depending on the
541 associated URI scheme. If the path looks like a URI with a scheme
542 referring to an absolute location, it will be returned
543 directly (matching the behavior of `os.path.join()`). It can
544 also be a `ResourcePath`.
545 isTemporary : `bool`, optional
546 Indicate that the resulting URI represents a temporary resource.
548 Returns
549 -------
550 new : `ResourcePath`
551 New URI with any file at the end replaced with the new path
552 components.
554 Notes
555 -----
556 Schemeless URIs assume local path separator but all other URIs assume
557 POSIX separator if the supplied path has directory structure. It
558 may be this never becomes a problem but datastore templates assume
559 POSIX separator is being used.
561 If an absolute `ResourcePath` is given for ``path`` is is assumed that
562 this should be returned directly. Giving a ``path`` of an absolute
563 scheme-less URI is not allowed for safety reasons as it may indicate
564 a mistake in the calling code.
566 Raises
567 ------
568 ValueError
569 Raised if the ``path`` is an absolute scheme-less URI. In that
570 situation it is unclear whether the intent is to return a
571 ``file`` URI or it was a mistake and a relative scheme-less URI
572 was meant.
573 """
574 # If we have a full URI in path we will use it directly
575 # but without forcing to absolute so that we can trap the
576 # expected option of relative path.
577 path_uri = ResourcePath(path, forceAbsolute=False)
578 if path_uri.scheme:
579 # Check for scheme so can distinguish explicit URIs from
580 # absolute scheme-less URIs.
581 return path_uri
583 if path_uri.isabs():
584 # Absolute scheme-less path.
585 raise ValueError(f"Can not join absolute scheme-less {path_uri!r} to another URI.")
587 # If this was originally a ResourcePath extract the unquoted path from
588 # it. Otherwise we use the string we were given to allow "#" to appear
589 # in the filename if given as a plain string.
590 if not isinstance(path, str):
591 path = path_uri.unquoted_path
593 new = self.dirname() # By definition a directory URI
595 # new should be asked about quoting, not self, since dirname can
596 # change the URI scheme for schemeless -> file
597 if new.quotePaths:
598 path = urllib.parse.quote(path)
600 newpath = self._pathModule.normpath(self._pathModule.join(new.path, path))
602 # normpath can strip trailing / so we force directory if the supplied
603 # path ended with a /
604 return new.replace(
605 path=newpath, forceDirectory=path.endswith(self._pathModule.sep), isTemporary=isTemporary
606 )
608 def relative_to(self, other: ResourcePath) -> Optional[str]:
609 """Return the relative path from this URI to the other URI.
611 Parameters
612 ----------
613 other : `ResourcePath`
614 URI to use to calculate the relative path. Must be a parent
615 of this URI.
617 Returns
618 -------
619 subpath : `str`
620 The sub path of this URI relative to the supplied other URI.
621 Returns `None` if there is no parent child relationship.
622 Scheme and netloc must match.
623 """
624 # Scheme-less absolute other is treated as if it's a file scheme.
625 # Scheme-less relative other can only return non-None if self
626 # is also scheme-less relative and that is handled specifically
627 # in a subclass.
628 if not other.scheme and other.isabs():
629 other = other.abspath()
631 # Scheme-less self is handled elsewhere.
632 if self.scheme != other.scheme or self.netloc != other.netloc:
633 return None
635 enclosed_path = self._pathLib(self.relativeToPathRoot)
636 parent_path = other.relativeToPathRoot
637 subpath: Optional[str]
638 try:
639 subpath = str(enclosed_path.relative_to(parent_path))
640 except ValueError:
641 subpath = None
642 else:
643 subpath = urllib.parse.unquote(subpath)
644 return subpath
646 def exists(self) -> bool:
647 """Indicate that the resource is available.
649 Returns
650 -------
651 exists : `bool`
652 `True` if the resource exists.
653 """
654 raise NotImplementedError()
656 @classmethod
657 def mexists(cls, uris: Iterable[ResourcePath]) -> Dict[ResourcePath, bool]:
658 """Check for existence of multiple URIs at once.
660 Parameters
661 ----------
662 uris : iterable of `ResourcePath`
663 The URIs to test.
665 Returns
666 -------
667 existence : `dict` of [`ResourcePath`, `bool`]
668 Mapping of original URI to boolean indicating existence.
669 """
670 exists_executor = concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS)
671 future_exists = {exists_executor.submit(uri.exists): uri for uri in uris}
673 results: Dict[ResourcePath, bool] = {}
674 for future in concurrent.futures.as_completed(future_exists):
675 uri = future_exists[future]
676 try:
677 exists = future.result()
678 except Exception:
679 exists = False
680 results[uri] = exists
681 return results
683 def remove(self) -> None:
684 """Remove the resource."""
685 raise NotImplementedError()
687 def isabs(self) -> bool:
688 """Indicate that the resource is fully specified.
690 For non-schemeless URIs this is always true.
692 Returns
693 -------
694 isabs : `bool`
695 `True` in all cases except schemeless URI.
696 """
697 return True
699 def abspath(self) -> ResourcePath:
700 """Return URI using an absolute path.
702 Returns
703 -------
704 abs : `ResourcePath`
705 Absolute URI. For non-schemeless URIs this always returns itself.
706 Schemeless URIs are upgraded to file URIs.
707 """
708 return self
710 def _as_local(self) -> Tuple[str, bool]:
711 """Return the location of the (possibly remote) resource as local file.
713 This is a helper function for `as_local` context manager.
715 Returns
716 -------
717 path : `str`
718 If this is a remote resource, it will be a copy of the resource
719 on the local file system, probably in a temporary directory.
720 For a local resource this should be the actual path to the
721 resource.
722 is_temporary : `bool`
723 Indicates if the local path is a temporary file or not.
724 """
725 raise NotImplementedError()
727 @contextlib.contextmanager
728 def as_local(self) -> Iterator[ResourcePath]:
729 """Return the location of the (possibly remote) resource as local file.
731 Yields
732 ------
733 local : `ResourcePath`
734 If this is a remote resource, it will be a copy of the resource
735 on the local file system, probably in a temporary directory.
736 For a local resource this should be the actual path to the
737 resource.
739 Notes
740 -----
741 The context manager will automatically delete any local temporary
742 file.
744 Examples
745 --------
746 Should be used as a context manager:
748 .. code-block:: py
750 with uri.as_local() as local:
751 ospath = local.ospath
752 """
753 local_src, is_temporary = self._as_local()
754 local_uri = ResourcePath(local_src, isTemporary=is_temporary)
756 try:
757 yield local_uri
758 finally:
759 # The caller might have relocated the temporary file
760 if is_temporary and local_uri.exists():
761 local_uri.remove()
763 @classmethod
764 @contextlib.contextmanager
765 def temporary_uri(
766 cls, prefix: Optional[ResourcePath] = None, suffix: Optional[str] = None
767 ) -> Iterator[ResourcePath]:
768 """Create a temporary URI.
770 Parameters
771 ----------
772 prefix : `ResourcePath`, optional
773 Prefix to use. Without this the path will be formed as a local
774 file URI in a temporary directory. Ensuring that the prefix
775 location exists is the responsibility of the caller.
776 suffix : `str`, optional
777 A file suffix to be used. The ``.`` should be included in this
778 suffix.
780 Yields
781 ------
782 uri : `ResourcePath`
783 The temporary URI. Will be removed when the context is completed.
784 """
785 use_tempdir = False
786 if prefix is None:
787 prefix = ResourcePath(tempfile.mkdtemp(), forceDirectory=True, isTemporary=True)
788 # Record that we need to delete this directory. Can not rely
789 # on isTemporary flag since an external prefix may have that
790 # set as well.
791 use_tempdir = True
793 # Need to create a randomized file name. For consistency do not
794 # use mkstemp for local and something else for remote. Additionally
795 # this method does not create the file to prevent name clashes.
796 characters = "abcdefghijklmnopqrstuvwxyz0123456789_"
797 rng = Random()
798 tempname = "".join(rng.choice(characters) for _ in range(16))
799 if suffix:
800 tempname += suffix
801 temporary_uri = prefix.join(tempname, isTemporary=True)
803 try:
804 yield temporary_uri
805 finally:
806 if use_tempdir:
807 shutil.rmtree(prefix.ospath, ignore_errors=True)
808 else:
809 try:
810 # It's okay if this does not work because the user removed
811 # the file.
812 temporary_uri.remove()
813 except FileNotFoundError:
814 pass
816 def read(self, size: int = -1) -> bytes:
817 """Open the resource and return the contents in bytes.
819 Parameters
820 ----------
821 size : `int`, optional
822 The number of bytes to read. Negative or omitted indicates
823 that all data should be read.
824 """
825 raise NotImplementedError()
827 def write(self, data: bytes, overwrite: bool = True) -> None:
828 """Write the supplied bytes to the new resource.
830 Parameters
831 ----------
832 data : `bytes`
833 The bytes to write to the resource. The entire contents of the
834 resource will be replaced.
835 overwrite : `bool`, optional
836 If `True` the resource will be overwritten if it exists. Otherwise
837 the write will fail.
838 """
839 raise NotImplementedError()
841 def mkdir(self) -> None:
842 """For a dir-like URI, create the directory resource if needed."""
843 raise NotImplementedError()
845 def isdir(self) -> bool:
846 """Return True if this URI looks like a directory, else False."""
847 return self.dirLike
849 def size(self) -> int:
850 """For non-dir-like URI, return the size of the resource.
852 Returns
853 -------
854 sz : `int`
855 The size in bytes of the resource associated with this URI.
856 Returns 0 if dir-like.
857 """
858 raise NotImplementedError()
860 def __str__(self) -> str:
861 """Convert the URI to its native string form."""
862 return self.geturl()
864 def __repr__(self) -> str:
865 """Return string representation suitable for evaluation."""
866 return f'ResourcePath("{self.geturl()}")'
868 def __eq__(self, other: Any) -> bool:
869 """Compare supplied object with this `ResourcePath`."""
870 if not isinstance(other, ResourcePath):
871 return NotImplemented
872 return self.geturl() == other.geturl()
874 def __hash__(self) -> int:
875 """Return hash of this object."""
876 return hash(str(self))
878 def __copy__(self) -> ResourcePath:
879 """Copy constructor.
881 Object is immutable so copy can return itself.
882 """
883 # Implement here because the __new__ method confuses things
884 return self
886 def __deepcopy__(self, memo: Any) -> ResourcePath:
887 """Deepcopy the object.
889 Object is immutable so copy can return itself.
890 """
891 # Implement here because the __new__ method confuses things
892 return self
894 def __getnewargs__(self) -> Tuple:
895 """Support pickling."""
896 return (str(self),)
898 @classmethod
899 def _fixDirectorySep(
900 cls, parsed: urllib.parse.ParseResult, forceDirectory: bool = False
901 ) -> Tuple[urllib.parse.ParseResult, bool]:
902 """Ensure that a path separator is present on directory paths.
904 Parameters
905 ----------
906 parsed : `~urllib.parse.ParseResult`
907 The result from parsing a URI using `urllib.parse`.
908 forceDirectory : `bool`, optional
909 If `True` forces the URI to end with a separator, otherwise given
910 URI is interpreted as is. Specifying that the URI is conceptually
911 equivalent to a directory can break some ambiguities when
912 interpreting the last element of a path.
914 Returns
915 -------
916 modified : `~urllib.parse.ParseResult`
917 Update result if a URI is being handled.
918 dirLike : `bool`
919 `True` if given parsed URI has a trailing separator or
920 forceDirectory is True. Otherwise `False`.
921 """
922 # assume we are not dealing with a directory like URI
923 dirLike = False
925 # Directory separator
926 sep = cls._pathModule.sep
928 # URI is dir-like if explicitly stated or if it ends on a separator
929 endsOnSep = parsed.path.endswith(sep)
930 if forceDirectory or endsOnSep:
931 dirLike = True
932 # only add the separator if it's not already there
933 if not endsOnSep:
934 parsed = parsed._replace(path=parsed.path + sep)
936 return parsed, dirLike
938 @classmethod
939 def _fixupPathUri(
940 cls,
941 parsed: urllib.parse.ParseResult,
942 root: Optional[Union[str, ResourcePath]] = None,
943 forceAbsolute: bool = False,
944 forceDirectory: bool = False,
945 ) -> Tuple[urllib.parse.ParseResult, bool]:
946 """Correct any issues with the supplied URI.
948 Parameters
949 ----------
950 parsed : `~urllib.parse.ParseResult`
951 The result from parsing a URI using `urllib.parse`.
952 root : `str` or `ResourcePath`, ignored
953 Not used by the this implementation since all URIs are
954 absolute except for those representing the local file system.
955 forceAbsolute : `bool`, ignored.
956 Not used by this implementation. URIs are generally always
957 absolute.
958 forceDirectory : `bool`, optional
959 If `True` forces the URI to end with a separator, otherwise given
960 URI is interpreted as is. Specifying that the URI is conceptually
961 equivalent to a directory can break some ambiguities when
962 interpreting the last element of a path.
964 Returns
965 -------
966 modified : `~urllib.parse.ParseResult`
967 Update result if a URI is being handled.
968 dirLike : `bool`
969 `True` if given parsed URI has a trailing separator or
970 forceDirectory is True. Otherwise `False`.
972 Notes
973 -----
974 Relative paths are explicitly not supported by RFC8089 but `urllib`
975 does accept URIs of the form ``file:relative/path.ext``. They need
976 to be turned into absolute paths before they can be used. This is
977 always done regardless of the ``forceAbsolute`` parameter.
979 AWS S3 differentiates between keys with trailing POSIX separators (i.e
980 `/dir` and `/dir/`) whereas POSIX does not neccessarily.
982 Scheme-less paths are normalized.
983 """
984 return cls._fixDirectorySep(parsed, forceDirectory)
986 def transfer_from(
987 self,
988 src: ResourcePath,
989 transfer: str,
990 overwrite: bool = False,
991 transaction: Optional[TransactionProtocol] = None,
992 ) -> None:
993 """Transfer the current resource to a new location.
995 Parameters
996 ----------
997 src : `ResourcePath`
998 Source URI.
999 transfer : `str`
1000 Mode to use for transferring the resource. Generically there are
1001 many standard options: copy, link, symlink, hardlink, relsymlink.
1002 Not all URIs support all modes.
1003 overwrite : `bool`, optional
1004 Allow an existing file to be overwritten. Defaults to `False`.
1005 transaction : `~lsst.resources.utils.TransactionProtocol`, optional
1006 A transaction object that can (depending on implementation)
1007 rollback transfers on error. Not guaranteed to be implemented.
1009 Notes
1010 -----
1011 Conceptually this is hard to scale as the number of URI schemes
1012 grow. The destination URI is more important than the source URI
1013 since that is where all the transfer modes are relevant (with the
1014 complication that "move" deletes the source).
1016 Local file to local file is the fundamental use case but every
1017 other scheme has to support "copy" to local file (with implicit
1018 support for "move") and copy from local file.
1019 All the "link" options tend to be specific to local file systems.
1021 "move" is a "copy" where the remote resource is deleted at the end.
1022 Whether this works depends on the source URI rather than the
1023 destination URI. Reverting a move on transaction rollback is
1024 expected to be problematic if a remote resource was involved.
1025 """
1026 raise NotImplementedError(f"No transfer modes supported by URI scheme {self.scheme}")
1028 def walk(
1029 self, file_filter: Optional[Union[str, re.Pattern]] = None
1030 ) -> Iterator[Union[List, Tuple[ResourcePath, List[str], List[str]]]]:
1031 """Walk the directory tree returning matching files and directories.
1033 Parameters
1034 ----------
1035 file_filter : `str` or `re.Pattern`, optional
1036 Regex to filter out files from the list before it is returned.
1038 Yields
1039 ------
1040 dirpath : `ResourcePath`
1041 Current directory being examined.
1042 dirnames : `list` of `str`
1043 Names of subdirectories within dirpath.
1044 filenames : `list` of `str`
1045 Names of all the files within dirpath.
1046 """
1047 raise NotImplementedError()
1049 @classmethod
1050 def findFileResources(
1051 cls,
1052 candidates: Iterable[Union[str, ResourcePath]],
1053 file_filter: Optional[str] = None,
1054 grouped: bool = False,
1055 ) -> Iterator[Union[ResourcePath, Iterator[ResourcePath]]]:
1056 """Get all the files from a list of values.
1058 Parameters
1059 ----------
1060 candidates : iterable [`str` or `ResourcePath`]
1061 The files to return and directories in which to look for files to
1062 return.
1063 file_filter : `str`, optional
1064 The regex to use when searching for files within directories.
1065 By default returns all the found files.
1066 grouped : `bool`, optional
1067 If `True` the results will be grouped by directory and each
1068 yielded value will be an iterator over URIs. If `False` each
1069 URI will be returned separately.
1071 Yields
1072 ------
1073 found_file: `ResourcePath`
1074 The passed-in URIs and URIs found in passed-in directories.
1075 If grouping is enabled, each of the yielded values will be an
1076 iterator yielding members of the group. Files given explicitly
1077 will be returned as a single group at the end.
1079 Notes
1080 -----
1081 If a value is a file it is yielded immediately without checking that it
1082 exists. If a value is a directory, all the files in the directory
1083 (recursively) that match the regex will be yielded in turn.
1084 """
1085 fileRegex = None if file_filter is None else re.compile(file_filter)
1087 singles = []
1089 # Find all the files of interest
1090 for location in candidates:
1091 uri = ResourcePath(location)
1092 if uri.isdir():
1093 for found in uri.walk(fileRegex):
1094 if not found:
1095 # This means the uri does not exist and by
1096 # convention we ignore it
1097 continue
1098 root, dirs, files = found
1099 if not files:
1100 continue
1101 if grouped:
1102 yield (root.join(name) for name in files)
1103 else:
1104 for name in files:
1105 yield root.join(name)
1106 else:
1107 if grouped:
1108 singles.append(uri)
1109 else:
1110 yield uri
1112 # Finally, return any explicitly given files in one group
1113 if grouped and singles:
1114 yield iter(singles)