Coverage for python/lsst/daf/butler/core/_butlerUri/_butlerUri.py : 52%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24import contextlib
25import urllib.parse
26import posixpath
27import copy
28import logging
29import re
31from pathlib import Path, PurePath, PurePosixPath
33__all__ = ('ButlerURI',)
35from typing import (
36 TYPE_CHECKING,
37 Any,
38 Iterable,
39 Iterator,
40 List,
41 Optional,
42 Tuple,
43 Type,
44 Union,
45)
47from .utils import NoTransaction
49if TYPE_CHECKING: 49 ↛ 50line 49 didn't jump to line 50, because the condition on line 49 was never true
50 from ..datastore import DatastoreTransaction
53log = logging.getLogger(__name__)
55# Regex for looking for URI escapes
56ESCAPES_RE = re.compile(r"%[A-F0-9]{2}")
58# Precomputed escaped hash
59ESCAPED_HASH = urllib.parse.quote("#")
62class ButlerURI:
63 """Convenience wrapper around URI parsers.
65 Provides access to URI components and can convert file
66 paths into absolute path URIs. Scheme-less URIs are treated as if
67 they are local file system paths and are converted to absolute URIs.
69 A specialist subclass is created for each supported URI scheme.
71 Parameters
72 ----------
73 uri : `str` or `urllib.parse.ParseResult`
74 URI in string form. Can be scheme-less if referring to a local
75 filesystem path.
76 root : `str` or `ButlerURI`, optional
77 When fixing up a relative path in a ``file`` scheme or if scheme-less,
78 use this as the root. Must be absolute. If `None` the current
79 working directory will be used. Can be a file URI.
80 forceAbsolute : `bool`, optional
81 If `True`, scheme-less relative URI will be converted to an absolute
82 path using a ``file`` scheme. If `False` scheme-less URI will remain
83 scheme-less and will not be updated to ``file`` or absolute path.
84 forceDirectory: `bool`, optional
85 If `True` forces the URI to end with a separator, otherwise given URI
86 is interpreted as is.
87 isTemporary : `bool`, optional
88 If `True` indicates that this URI points to a temporary resource.
89 """
91 _pathLib: Type[PurePath] = PurePosixPath
92 """Path library to use for this scheme."""
94 _pathModule = posixpath
95 """Path module to use for this scheme."""
97 transferModes: Tuple[str, ...] = ("copy", "auto", "move")
98 """Transfer modes supported by this implementation.
100 Move is special in that it is generally a copy followed by an unlink.
101 Whether that unlink works depends critically on whether the source URI
102 implements unlink. If it does not the move will be reported as a failure.
103 """
105 transferDefault: str = "copy"
106 """Default mode to use for transferring if ``auto`` is specified."""
108 quotePaths = True
109 """True if path-like elements modifying a URI should be quoted.
111 All non-schemeless URIs have to internally use quoted paths. Therefore
112 if a new file name is given (e.g. to updatedFile or join) a decision must
113 be made whether to quote it to be consistent.
114 """
116 isLocal = False
117 """If `True` this URI refers to a local file."""
119 # This is not an ABC with abstract methods because the __new__ being
120 # a factory confuses mypy such that it assumes that every constructor
121 # returns a ButlerURI and then determines that all the abstract methods
122 # are still abstract. If they are not marked abstract but just raise
123 # mypy is fine with it.
125 # mypy is confused without these
126 _uri: urllib.parse.ParseResult
127 isTemporary: bool
128 dirLike: bool
130 def __new__(cls, uri: Union[str, urllib.parse.ParseResult, ButlerURI, Path],
131 root: Optional[Union[str, ButlerURI]] = None, forceAbsolute: bool = True,
132 forceDirectory: bool = False, isTemporary: bool = False) -> ButlerURI:
133 """Create and return new specialist ButlerURI subclass."""
134 parsed: urllib.parse.ParseResult
135 dirLike: bool = False
136 subclass: Optional[Type[ButlerURI]] = None
138 if isinstance(uri, Path): 138 ↛ 139line 138 didn't jump to line 139, because the condition on line 138 was never true
139 uri = str(uri)
141 # Record if we need to post process the URI components
142 # or if the instance is already fully configured
143 if isinstance(uri, str):
144 # Since local file names can have special characters in them
145 # we need to quote them for the parser but we can unquote
146 # later. Assume that all other URI schemes are quoted.
147 # Since sometimes people write file:/a/b and not file:///a/b
148 # we should not quote in the explicit case of file:
149 if "://" not in uri and not uri.startswith("file:"):
150 if ESCAPES_RE.search(uri): 150 ↛ 151line 150 didn't jump to line 151, because the condition on line 150 was never true
151 log.warning("Possible double encoding of %s", uri)
152 else:
153 uri = urllib.parse.quote(uri)
154 # Special case hash since we must support fragments
155 # even in schemeless URIs -- although try to only replace
156 # them in file part and not directory part
157 if ESCAPED_HASH in uri: 157 ↛ 158line 157 didn't jump to line 158, because the condition on line 157 was never true
158 dirpos = uri.rfind("/")
159 # Do replacement after this /
160 uri = uri[:dirpos+1] + uri[dirpos+1:].replace(ESCAPED_HASH, "#")
162 parsed = urllib.parse.urlparse(uri)
163 elif isinstance(uri, urllib.parse.ParseResult):
164 parsed = copy.copy(uri)
165 # If we are being instantiated with a subclass, rather than
166 # ButlerURI, ensure that that subclass is used directly.
167 # This could lead to inconsistencies if this constructor
168 # is used externally outside of the ButlerURI.replace() method.
169 # ButlerS3URI(urllib.parse.urlparse("file://a/b.txt"))
170 # will be a problem.
171 # This is needed to prevent a schemeless absolute URI become
172 # a file URI unexpectedly when calling updatedFile or
173 # updatedExtension
174 if cls is not ButlerURI:
175 parsed, dirLike = cls._fixDirectorySep(parsed, forceDirectory)
176 subclass = cls
178 elif isinstance(uri, ButlerURI): 178 ↛ 183line 178 didn't jump to line 183, because the condition on line 178 was never false
179 # Since ButlerURI is immutable we can return the argument
180 # unchanged.
181 return uri
182 else:
183 raise ValueError("Supplied URI must be string, Path, "
184 f"ButlerURI, or ParseResult but got '{uri!r}'")
186 if subclass is None:
187 # Work out the subclass from the URI scheme
188 if not parsed.scheme:
189 from .schemeless import ButlerSchemelessURI
190 subclass = ButlerSchemelessURI
191 elif parsed.scheme == "file": 191 ↛ 192line 191 didn't jump to line 192, because the condition on line 191 was never true
192 from .file import ButlerFileURI
193 subclass = ButlerFileURI
194 elif parsed.scheme == "s3": 194 ↛ 195line 194 didn't jump to line 195, because the condition on line 194 was never true
195 from .s3 import ButlerS3URI
196 subclass = ButlerS3URI
197 elif parsed.scheme.startswith("http"): 197 ↛ 198line 197 didn't jump to line 198, because the condition on line 197 was never true
198 from .http import ButlerHttpURI
199 subclass = ButlerHttpURI
200 elif parsed.scheme == "resource": 200 ↛ 204line 200 didn't jump to line 204, because the condition on line 200 was never false
201 # Rules for scheme names disallow pkg_resource
202 from .packageresource import ButlerPackageResourceURI
203 subclass = ButlerPackageResourceURI
204 elif parsed.scheme == "mem":
205 # in-memory datastore object
206 from .mem import ButlerInMemoryURI
207 subclass = ButlerInMemoryURI
208 else:
209 raise NotImplementedError(f"No URI support for scheme: '{parsed.scheme}'"
210 " in {parsed.geturl()}")
212 parsed, dirLike = subclass._fixupPathUri(parsed, root=root,
213 forceAbsolute=forceAbsolute,
214 forceDirectory=forceDirectory)
216 # It is possible for the class to change from schemeless
217 # to file so handle that
218 if parsed.scheme == "file": 218 ↛ 219line 218 didn't jump to line 219, because the condition on line 218 was never true
219 from .file import ButlerFileURI
220 subclass = ButlerFileURI
222 # Now create an instance of the correct subclass and set the
223 # attributes directly
224 self = object.__new__(subclass)
225 self._uri = parsed
226 self.dirLike = dirLike
227 self.isTemporary = isTemporary
228 return self
230 @property
231 def scheme(self) -> str:
232 """Return the URI scheme.
234 Notes
235 -----
236 (``://`` is not part of the scheme).
237 """
238 return self._uri.scheme
240 @property
241 def netloc(self) -> str:
242 """Return the URI network location."""
243 return self._uri.netloc
245 @property
246 def path(self) -> str:
247 """Return the path component of the URI."""
248 return self._uri.path
250 @property
251 def unquoted_path(self) -> str:
252 """Return path component of the URI with any URI quoting reversed."""
253 return urllib.parse.unquote(self._uri.path)
255 @property
256 def ospath(self) -> str:
257 """Return the path component of the URI localized to current OS."""
258 raise AttributeError(f"Non-file URI ({self}) has no local OS path.")
260 @property
261 def relativeToPathRoot(self) -> str:
262 """Return path relative to network location.
264 Effectively, this is the path property with posix separator stripped
265 from the left hand side of the path.
267 Always unquotes.
268 """
269 p = self._pathLib(self.path)
270 relToRoot = str(p.relative_to(p.root))
271 if self.dirLike and not relToRoot.endswith("/"): 271 ↛ 272line 271 didn't jump to line 272, because the condition on line 271 was never true
272 relToRoot += "/"
273 return urllib.parse.unquote(relToRoot)
275 @property
276 def is_root(self) -> bool:
277 """Return whether this URI points to the root of the network location.
279 This means that the path components refers to the top level.
280 """
281 relpath = self.relativeToPathRoot
282 if relpath == "./":
283 return True
284 return False
286 @property
287 def fragment(self) -> str:
288 """Return the fragment component of the URI."""
289 return self._uri.fragment
291 @property
292 def params(self) -> str:
293 """Return any parameters included in the URI."""
294 return self._uri.params
296 @property
297 def query(self) -> str:
298 """Return any query strings included in the URI."""
299 return self._uri.query
301 def geturl(self) -> str:
302 """Return the URI in string form.
304 Returns
305 -------
306 url : `str`
307 String form of URI.
308 """
309 return self._uri.geturl()
311 def split(self) -> Tuple[ButlerURI, str]:
312 """Split URI into head and tail.
314 Returns
315 -------
316 head: `ButlerURI`
317 Everything leading up to tail, expanded and normalized as per
318 ButlerURI rules.
319 tail : `str`
320 Last `self.path` component. Tail will be empty if path ends on a
321 separator. Tail will never contain separators. It will be
322 unquoted.
324 Notes
325 -----
326 Equivalent to `os.path.split()` where head preserves the URI
327 components.
328 """
329 head, tail = self._pathModule.split(self.path)
330 headuri = self._uri._replace(path=head)
332 # The file part should never include quoted metacharacters
333 tail = urllib.parse.unquote(tail)
335 # Schemeless is special in that it can be a relative path
336 # We need to ensure that it stays that way. All other URIs will
337 # be absolute already.
338 forceAbsolute = self._pathModule.isabs(self.path)
339 return ButlerURI(headuri, forceDirectory=True, forceAbsolute=forceAbsolute), tail
341 def basename(self) -> str:
342 """Return the base name, last element of path, of the URI.
344 Returns
345 -------
346 tail : `str`
347 Last part of the path attribute. Trail will be empty if path ends
348 on a separator.
350 Notes
351 -----
352 If URI ends on a slash returns an empty string. This is the second
353 element returned by `split()`.
355 Equivalent of `os.path.basename()``.
356 """
357 return self.split()[1]
359 def dirname(self) -> ButlerURI:
360 """Return the directory component of the path as a new `ButlerURI`.
362 Returns
363 -------
364 head : `ButlerURI`
365 Everything except the tail of path attribute, expanded and
366 normalized as per ButlerURI rules.
368 Notes
369 -----
370 Equivalent of `os.path.dirname()`.
371 """
372 return self.split()[0]
374 def parent(self) -> ButlerURI:
375 """Return a `ButlerURI` of the parent directory.
377 Returns
378 -------
379 head : `ButlerURI`
380 Everything except the tail of path attribute, expanded and
381 normalized as per `ButlerURI` rules.
383 Notes
384 -----
385 For a file-like URI this will be the same as calling `dirname()`.
386 """
387 # When self is file-like, return self.dirname()
388 if not self.dirLike:
389 return self.dirname()
390 # When self is dir-like, return its parent directory,
391 # regardless of the presence of a trailing separator
392 originalPath = self._pathLib(self.path)
393 parentPath = originalPath.parent
394 return self.replace(path=str(parentPath), forceDirectory=True)
396 def replace(self, forceDirectory: bool = False, **kwargs: Any) -> ButlerURI:
397 """Return new `ButlerURI` with specified components replaced.
399 Parameters
400 ----------
401 forceDirectory : `bool`
402 Parameter passed to ButlerURI constructor to force this
403 new URI to be dir-like.
404 kwargs : `dict`
405 Components of a `urllib.parse.ParseResult` that should be
406 modified for the newly-created `ButlerURI`.
408 Returns
409 -------
410 new : `ButlerURI`
411 New `ButlerURI` object with updated values.
413 Notes
414 -----
415 Does not, for now, allow a change in URI scheme.
416 """
417 # Disallow a change in scheme
418 if "scheme" in kwargs: 418 ↛ 419line 418 didn't jump to line 419, because the condition on line 418 was never true
419 raise ValueError(f"Can not use replace() method to change URI scheme for {self}")
420 return self.__class__(self._uri._replace(**kwargs), forceDirectory=forceDirectory)
422 def updatedFile(self, newfile: str) -> ButlerURI:
423 """Return new URI with an updated final component of the path.
425 Parameters
426 ----------
427 newfile : `str`
428 File name with no path component.
430 Returns
431 -------
432 updated : `ButlerURI`
434 Notes
435 -----
436 Forces the ButlerURI.dirLike attribute to be false. The new file path
437 will be quoted if necessary.
438 """
439 if self.quotePaths:
440 newfile = urllib.parse.quote(newfile)
441 dir, _ = self._pathModule.split(self.path)
442 newpath = self._pathModule.join(dir, newfile)
444 updated = self.replace(path=newpath)
445 updated.dirLike = False
446 return updated
448 def updatedExtension(self, ext: Optional[str]) -> ButlerURI:
449 """Return a new `ButlerURI` with updated file extension.
451 All file extensions are replaced.
453 Parameters
454 ----------
455 ext : `str` or `None`
456 New extension. If an empty string is given any extension will
457 be removed. If `None` is given there will be no change.
459 Returns
460 -------
461 updated : `ButlerURI`
462 URI with the specified extension. Can return itself if
463 no extension was specified.
464 """
465 if ext is None:
466 return self
468 # Get the extension
469 current = self.getExtension()
471 # Nothing to do if the extension already matches
472 if current == ext:
473 return self
475 # Remove the current extension from the path
476 # .fits.gz counts as one extension do not use os.path.splitext
477 path = self.path
478 if current:
479 path = path[:-len(current)]
481 # Ensure that we have a leading "." on file extension (and we do not
482 # try to modify the empty string)
483 if ext and not ext.startswith("."):
484 ext = "." + ext
486 return self.replace(path=path + ext)
488 def getExtension(self) -> str:
489 """Return the file extension(s) associated with this URI path.
491 Returns
492 -------
493 ext : `str`
494 The file extension (including the ``.``). Can be empty string
495 if there is no file extension. Usually returns only the last
496 file extension unless there is a special extension modifier
497 indicating file compression, in which case the combined
498 extension (e.g. ``.fits.gz``) will be returned.
499 """
500 special = {".gz", ".bz2", ".xz", ".fz"}
502 extensions = self._pathLib(self.path).suffixes
504 if not extensions: 504 ↛ 505line 504 didn't jump to line 505, because the condition on line 504 was never true
505 return ""
507 ext = extensions.pop()
509 # Multiple extensions, decide whether to include the final two
510 if extensions and ext in special: 510 ↛ 511line 510 didn't jump to line 511, because the condition on line 510 was never true
511 ext = f"{extensions[-1]}{ext}"
513 return ext
515 def join(self, path: Union[str, ButlerURI]) -> ButlerURI:
516 """Return new `ButlerURI` with additional path components.
518 Parameters
519 ----------
520 path : `str`, `ButlerURI`
521 Additional file components to append to the current URI. Assumed
522 to include a file at the end. Will be quoted depending on the
523 associated URI scheme. If the path looks like a URI with a scheme
524 referring to an absolute location, it will be returned
525 directly (matching the behavior of `os.path.join()`). It can
526 also be a `ButlerURI`.
528 Returns
529 -------
530 new : `ButlerURI`
531 New URI with any file at the end replaced with the new path
532 components.
534 Notes
535 -----
536 Schemeless URIs assume local path separator but all other URIs assume
537 POSIX separator if the supplied path has directory structure. It
538 may be this never becomes a problem but datastore templates assume
539 POSIX separator is being used.
541 Currently, if the join path is given as an absolute scheme-less
542 URI it will be returned as an absolute ``file:`` URI even if the
543 URI it is being joined to is non-file.
544 """
545 # If we have a full URI in path we will use it directly
546 # but without forcing to absolute so that we can trap the
547 # expected option of relative path.
548 path_uri = ButlerURI(path, forceAbsolute=False)
549 if path_uri.scheme: 549 ↛ 550line 549 didn't jump to line 550, because the condition on line 549 was never true
550 return path_uri
552 # Force back to string
553 path = path_uri.path
555 new = self.dirname() # By definition a directory URI
557 # new should be asked about quoting, not self, since dirname can
558 # change the URI scheme for schemeless -> file
559 if new.quotePaths: 559 ↛ 562line 559 didn't jump to line 562, because the condition on line 559 was never false
560 path = urllib.parse.quote(path)
562 newpath = self._pathModule.normpath(self._pathModule.join(new.path, path))
564 # normpath can strip trailing / so we force directory if the supplied
565 # path ended with a /
566 return new.replace(path=newpath, forceDirectory=path.endswith(self._pathModule.sep))
568 def relative_to(self, other: ButlerURI) -> Optional[str]:
569 """Return the relative path from this URI to the other URI.
571 Parameters
572 ----------
573 other : `ButlerURI`
574 URI to use to calculate the relative path. Must be a parent
575 of this URI.
577 Returns
578 -------
579 subpath : `str`
580 The sub path of this URI relative to the supplied other URI.
581 Returns `None` if there is no parent child relationship.
582 Scheme and netloc must match.
583 """
584 if self.scheme != other.scheme or self.netloc != other.netloc:
585 return None
587 enclosed_path = self._pathLib(self.relativeToPathRoot)
588 parent_path = other.relativeToPathRoot
589 subpath: Optional[str]
590 try:
591 subpath = str(enclosed_path.relative_to(parent_path))
592 except ValueError:
593 subpath = None
594 else:
595 subpath = urllib.parse.unquote(subpath)
596 return subpath
598 def exists(self) -> bool:
599 """Indicate that the resource is available.
601 Returns
602 -------
603 exists : `bool`
604 `True` if the resource exists.
605 """
606 raise NotImplementedError()
608 def remove(self) -> None:
609 """Remove the resource."""
610 raise NotImplementedError()
612 def isabs(self) -> bool:
613 """Indicate that the resource is fully specified.
615 For non-schemeless URIs this is always true.
617 Returns
618 -------
619 isabs : `bool`
620 `True` in all cases except schemeless URI.
621 """
622 return True
624 def _as_local(self) -> Tuple[str, bool]:
625 """Return the location of the (possibly remote) resource as local file.
627 This is a helper function for `as_local` context manager.
629 Returns
630 -------
631 path : `str`
632 If this is a remote resource, it will be a copy of the resource
633 on the local file system, probably in a temporary directory.
634 For a local resource this should be the actual path to the
635 resource.
636 is_temporary : `bool`
637 Indicates if the local path is a temporary file or not.
638 """
639 raise NotImplementedError()
641 @contextlib.contextmanager
642 def as_local(self) -> Iterator[ButlerURI]:
643 """Return the location of the (possibly remote) resource as local file.
645 Yields
646 ------
647 local : `ButlerURI`
648 If this is a remote resource, it will be a copy of the resource
649 on the local file system, probably in a temporary directory.
650 For a local resource this should be the actual path to the
651 resource.
653 Notes
654 -----
655 The context manager will automatically delete any local temporary
656 file.
658 Examples
659 --------
660 Should be used as a context manager:
662 .. code-block:: py
664 with uri.as_local() as local:
665 ospath = local.ospath
666 """
667 local_src, is_temporary = self._as_local()
668 local_uri = ButlerURI(local_src, isTemporary=is_temporary)
670 try:
671 yield local_uri
672 finally:
673 # The caller might have relocated the temporary file
674 if is_temporary and local_uri.exists():
675 local_uri.remove()
677 def read(self, size: int = -1) -> bytes:
678 """Open the resource and return the contents in bytes.
680 Parameters
681 ----------
682 size : `int`, optional
683 The number of bytes to read. Negative or omitted indicates
684 that all data should be read.
685 """
686 raise NotImplementedError()
688 def write(self, data: bytes, overwrite: bool = True) -> None:
689 """Write the supplied bytes to the new resource.
691 Parameters
692 ----------
693 data : `bytes`
694 The bytes to write to the resource. The entire contents of the
695 resource will be replaced.
696 overwrite : `bool`, optional
697 If `True` the resource will be overwritten if it exists. Otherwise
698 the write will fail.
699 """
700 raise NotImplementedError()
702 def mkdir(self) -> None:
703 """For a dir-like URI, create the directory resource if needed."""
704 raise NotImplementedError()
706 def isdir(self) -> bool:
707 """Return True if this URI looks like a directory, else False."""
708 return self.dirLike
710 def size(self) -> int:
711 """For non-dir-like URI, return the size of the resource.
713 Returns
714 -------
715 sz : `int`
716 The size in bytes of the resource associated with this URI.
717 Returns 0 if dir-like.
718 """
719 raise NotImplementedError()
721 def __str__(self) -> str:
722 """Convert the URI to its native string form."""
723 return self.geturl()
725 def __repr__(self) -> str:
726 """Return string representation suitable for evaluation."""
727 return f'ButlerURI("{self.geturl()}")'
729 def __eq__(self, other: Any) -> bool:
730 """Compare supplied object with this `ButlerURI`."""
731 if not isinstance(other, ButlerURI):
732 return NotImplemented
733 return self.geturl() == other.geturl()
735 def __hash__(self) -> int:
736 """Return hash of this object."""
737 return hash(str(self))
739 def __copy__(self) -> ButlerURI:
740 """Copy constructor.
742 Object is immutable so copy can return itself.
743 """
744 # Implement here because the __new__ method confuses things
745 return self
747 def __deepcopy__(self, memo: Any) -> ButlerURI:
748 """Deepcopy the object.
750 Object is immutable so copy can return itself.
751 """
752 # Implement here because the __new__ method confuses things
753 return self
755 def __getnewargs__(self) -> Tuple:
756 """Support pickling."""
757 return (str(self),)
759 @classmethod
760 def _fixDirectorySep(cls, parsed: urllib.parse.ParseResult,
761 forceDirectory: bool = False) -> Tuple[urllib.parse.ParseResult, bool]:
762 """Ensure that a path separator is present on directory paths.
764 Parameters
765 ----------
766 parsed : `~urllib.parse.ParseResult`
767 The result from parsing a URI using `urllib.parse`.
768 forceDirectory : `bool`, optional
769 If `True` forces the URI to end with a separator, otherwise given
770 URI is interpreted as is. Specifying that the URI is conceptually
771 equivalent to a directory can break some ambiguities when
772 interpreting the last element of a path.
774 Returns
775 -------
776 modified : `~urllib.parse.ParseResult`
777 Update result if a URI is being handled.
778 dirLike : `bool`
779 `True` if given parsed URI has a trailing separator or
780 forceDirectory is True. Otherwise `False`.
781 """
782 # assume we are not dealing with a directory like URI
783 dirLike = False
785 # Directory separator
786 sep = cls._pathModule.sep
788 # URI is dir-like if explicitly stated or if it ends on a separator
789 endsOnSep = parsed.path.endswith(sep)
790 if forceDirectory or endsOnSep:
791 dirLike = True
792 # only add the separator if it's not already there
793 if not endsOnSep: 793 ↛ 796line 793 didn't jump to line 796, because the condition on line 793 was never false
794 parsed = parsed._replace(path=parsed.path+sep)
796 return parsed, dirLike
798 @classmethod
799 def _fixupPathUri(cls, parsed: urllib.parse.ParseResult, root: Optional[Union[str, ButlerURI]] = None,
800 forceAbsolute: bool = False,
801 forceDirectory: bool = False) -> Tuple[urllib.parse.ParseResult, bool]:
802 """Correct any issues with the supplied URI.
804 Parameters
805 ----------
806 parsed : `~urllib.parse.ParseResult`
807 The result from parsing a URI using `urllib.parse`.
808 root : `str` or `ButlerURI`, ignored
809 Not used by the this implementation since all URIs are
810 absolute except for those representing the local file system.
811 forceAbsolute : `bool`, ignored.
812 Not used by this implementation. URIs are generally always
813 absolute.
814 forceDirectory : `bool`, optional
815 If `True` forces the URI to end with a separator, otherwise given
816 URI is interpreted as is. Specifying that the URI is conceptually
817 equivalent to a directory can break some ambiguities when
818 interpreting the last element of a path.
820 Returns
821 -------
822 modified : `~urllib.parse.ParseResult`
823 Update result if a URI is being handled.
824 dirLike : `bool`
825 `True` if given parsed URI has a trailing separator or
826 forceDirectory is True. Otherwise `False`.
828 Notes
829 -----
830 Relative paths are explicitly not supported by RFC8089 but `urllib`
831 does accept URIs of the form ``file:relative/path.ext``. They need
832 to be turned into absolute paths before they can be used. This is
833 always done regardless of the ``forceAbsolute`` parameter.
835 AWS S3 differentiates between keys with trailing POSIX separators (i.e
836 `/dir` and `/dir/`) whereas POSIX does not neccessarily.
838 Scheme-less paths are normalized.
839 """
840 return cls._fixDirectorySep(parsed, forceDirectory)
842 def transfer_from(self, src: ButlerURI, transfer: str,
843 overwrite: bool = False,
844 transaction: Optional[Union[DatastoreTransaction, NoTransaction]] = None) -> None:
845 """Transfer the current resource to a new location.
847 Parameters
848 ----------
849 src : `ButlerURI`
850 Source URI.
851 transfer : `str`
852 Mode to use for transferring the resource. Generically there are
853 many standard options: copy, link, symlink, hardlink, relsymlink.
854 Not all URIs support all modes.
855 overwrite : `bool`, optional
856 Allow an existing file to be overwritten. Defaults to `False`.
857 transaction : `DatastoreTransaction`, optional
858 A transaction object that can (depending on implementation)
859 rollback transfers on error. Not guaranteed to be implemented.
861 Notes
862 -----
863 Conceptually this is hard to scale as the number of URI schemes
864 grow. The destination URI is more important than the source URI
865 since that is where all the transfer modes are relevant (with the
866 complication that "move" deletes the source).
868 Local file to local file is the fundamental use case but every
869 other scheme has to support "copy" to local file (with implicit
870 support for "move") and copy from local file.
871 All the "link" options tend to be specific to local file systems.
873 "move" is a "copy" where the remote resource is deleted at the end.
874 Whether this works depends on the source URI rather than the
875 destination URI. Reverting a move on transaction rollback is
876 expected to be problematic if a remote resource was involved.
877 """
878 raise NotImplementedError(f"No transfer modes supported by URI scheme {self.scheme}")
880 def walk(self, file_filter: Optional[Union[str, re.Pattern]] = None) -> Iterator[Union[List,
881 Tuple[ButlerURI,
882 List[str],
883 List[str]]]]:
884 """Walk the directory tree returning matching files and directories.
886 Parameters
887 ----------
888 file_filter : `str` or `re.Pattern`, optional
889 Regex to filter out files from the list before it is returned.
891 Yields
892 ------
893 dirpath : `ButlerURI`
894 Current directory being examined.
895 dirnames : `list` of `str`
896 Names of subdirectories within dirpath.
897 filenames : `list` of `str`
898 Names of all the files within dirpath.
899 """
900 raise NotImplementedError()
902 @classmethod
903 def findFileResources(cls, candidates: Iterable[Union[str, ButlerURI]],
904 file_filter: Optional[str] = None,
905 grouped: bool = False) -> Iterator[Union[ButlerURI, Iterator[ButlerURI]]]:
906 """Get all the files from a list of values.
908 Parameters
909 ----------
910 candidates : iterable [`str` or `ButlerURI`]
911 The files to return and directories in which to look for files to
912 return.
913 file_filter : `str`, optional
914 The regex to use when searching for files within directories.
915 By default returns all the found files.
916 grouped : `bool`, optional
917 If `True` the results will be grouped by directory and each
918 yielded value will be an iterator over URIs. If `False` each
919 URI will be returned separately.
921 Yields
922 ------
923 found_file: `ButlerURI`
924 The passed-in URIs and URIs found in passed-in directories.
925 If grouping is enabled, each of the yielded values will be an
926 iterator yielding members of the group. Files given explicitly
927 will be returned as a single group at the end.
929 Notes
930 -----
931 If a value is a file it is yielded immediately. If a value is a
932 directory, all the files in the directory (recursively) that match
933 the regex will be yielded in turn.
934 """
935 fileRegex = None if file_filter is None else re.compile(file_filter)
937 singles = []
939 # Find all the files of interest
940 for location in candidates:
941 uri = ButlerURI(location)
942 if uri.isdir():
943 for found in uri.walk(fileRegex):
944 if not found:
945 # This means the uri does not exist and by
946 # convention we ignore it
947 continue
948 root, dirs, files = found
949 if not files:
950 continue
951 if grouped:
952 yield (root.join(name) for name in files)
953 else:
954 for name in files:
955 yield root.join(name)
956 else:
957 if grouped:
958 singles.append(uri)
959 else:
960 yield uri
962 # Finally, return any explicitly given files in one group
963 if grouped and singles:
964 yield iter(singles)