Coverage for python/lsst/daf/butler/core/_butlerUri/_butlerUri.py : 52%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24import contextlib
25import urllib.parse
26import posixpath
27import copy
28import logging
29import re
31from pathlib import Path, PurePath, PurePosixPath
33__all__ = ('ButlerURI',)
35from typing import (
36 TYPE_CHECKING,
37 Any,
38 Iterable,
39 Iterator,
40 List,
41 Optional,
42 Tuple,
43 Type,
44 Union,
45)
47from .utils import NoTransaction
49if TYPE_CHECKING: 49 ↛ 50line 49 didn't jump to line 50, because the condition on line 49 was never true
50 from ..datastore import DatastoreTransaction
53log = logging.getLogger(__name__)
55# Regex for looking for URI escapes
56ESCAPES_RE = re.compile(r"%[A-F0-9]{2}")
58# Precomputed escaped hash
59ESCAPED_HASH = urllib.parse.quote("#")
62class ButlerURI:
63 """Convenience wrapper around URI parsers.
65 Provides access to URI components and can convert file
66 paths into absolute path URIs. Scheme-less URIs are treated as if
67 they are local file system paths and are converted to absolute URIs.
69 A specialist subclass is created for each supported URI scheme.
71 Parameters
72 ----------
73 uri : `str` or `urllib.parse.ParseResult`
74 URI in string form. Can be scheme-less if referring to a local
75 filesystem path.
76 root : `str` or `ButlerURI`, optional
77 When fixing up a relative path in a ``file`` scheme or if scheme-less,
78 use this as the root. Must be absolute. If `None` the current
79 working directory will be used. Can be a file URI.
80 forceAbsolute : `bool`, optional
81 If `True`, scheme-less relative URI will be converted to an absolute
82 path using a ``file`` scheme. If `False` scheme-less URI will remain
83 scheme-less and will not be updated to ``file`` or absolute path.
84 forceDirectory: `bool`, optional
85 If `True` forces the URI to end with a separator, otherwise given URI
86 is interpreted as is.
87 isTemporary : `bool`, optional
88 If `True` indicates that this URI points to a temporary resource.
89 """
91 _pathLib: Type[PurePath] = PurePosixPath
92 """Path library to use for this scheme."""
94 _pathModule = posixpath
95 """Path module to use for this scheme."""
97 transferModes: Tuple[str, ...] = ("copy", "auto", "move")
98 """Transfer modes supported by this implementation.
100 Move is special in that it is generally a copy followed by an unlink.
101 Whether that unlink works depends critically on whether the source URI
102 implements unlink. If it does not the move will be reported as a failure.
103 """
105 transferDefault: str = "copy"
106 """Default mode to use for transferring if ``auto`` is specified."""
108 quotePaths = True
109 """True if path-like elements modifying a URI should be quoted.
111 All non-schemeless URIs have to internally use quoted paths. Therefore
112 if a new file name is given (e.g. to updatedFile or join) a decision must
113 be made whether to quote it to be consistent.
114 """
116 isLocal = False
117 """If `True` this URI refers to a local file."""
119 # This is not an ABC with abstract methods because the __new__ being
120 # a factory confuses mypy such that it assumes that every constructor
121 # returns a ButlerURI and then determines that all the abstract methods
122 # are still abstract. If they are not marked abstract but just raise
123 # mypy is fine with it.
125 # mypy is confused without these
126 _uri: urllib.parse.ParseResult
127 isTemporary: bool
128 dirLike: bool
130 def __new__(cls, uri: Union[str, urllib.parse.ParseResult, ButlerURI, Path],
131 root: Optional[Union[str, ButlerURI]] = None, forceAbsolute: bool = True,
132 forceDirectory: bool = False, isTemporary: bool = False) -> ButlerURI:
133 """Create and return new specialist ButlerURI subclass."""
134 parsed: urllib.parse.ParseResult
135 dirLike: bool = False
136 subclass: Optional[Type[ButlerURI]] = None
138 if isinstance(uri, Path): 138 ↛ 139line 138 didn't jump to line 139, because the condition on line 138 was never true
139 uri = str(uri)
141 # Record if we need to post process the URI components
142 # or if the instance is already fully configured
143 if isinstance(uri, str):
144 # Since local file names can have special characters in them
145 # we need to quote them for the parser but we can unquote
146 # later. Assume that all other URI schemes are quoted.
147 # Since sometimes people write file:/a/b and not file:///a/b
148 # we should not quote in the explicit case of file:
149 if "://" not in uri and not uri.startswith("file:"):
150 if ESCAPES_RE.search(uri): 150 ↛ 151line 150 didn't jump to line 151, because the condition on line 150 was never true
151 log.warning("Possible double encoding of %s", uri)
152 else:
153 uri = urllib.parse.quote(uri)
154 # Special case hash since we must support fragments
155 # even in schemeless URIs -- although try to only replace
156 # them in file part and not directory part
157 if ESCAPED_HASH in uri: 157 ↛ 158line 157 didn't jump to line 158, because the condition on line 157 was never true
158 dirpos = uri.rfind("/")
159 # Do replacement after this /
160 uri = uri[:dirpos+1] + uri[dirpos+1:].replace(ESCAPED_HASH, "#")
162 parsed = urllib.parse.urlparse(uri)
163 elif isinstance(uri, urllib.parse.ParseResult):
164 parsed = copy.copy(uri)
165 # If we are being instantiated with a subclass, rather than
166 # ButlerURI, ensure that that subclass is used directly.
167 # This could lead to inconsistencies if this constructor
168 # is used externally outside of the ButlerURI.replace() method.
169 # ButlerS3URI(urllib.parse.urlparse("file://a/b.txt"))
170 # will be a problem.
171 # This is needed to prevent a schemeless absolute URI become
172 # a file URI unexpectedly when calling updatedFile or
173 # updatedExtension
174 if cls is not ButlerURI:
175 parsed, dirLike = cls._fixDirectorySep(parsed, forceDirectory)
176 subclass = cls
178 elif isinstance(uri, ButlerURI): 178 ↛ 183line 178 didn't jump to line 183, because the condition on line 178 was never false
179 # Since ButlerURI is immutable we can return the argument
180 # unchanged.
181 return uri
182 else:
183 raise ValueError("Supplied URI must be string, Path, "
184 f"ButlerURI, or ParseResult but got '{uri!r}'")
186 if subclass is None:
187 # Work out the subclass from the URI scheme
188 if not parsed.scheme:
189 from .schemeless import ButlerSchemelessURI
190 subclass = ButlerSchemelessURI
191 elif parsed.scheme == "file": 191 ↛ 192line 191 didn't jump to line 192, because the condition on line 191 was never true
192 from .file import ButlerFileURI
193 subclass = ButlerFileURI
194 elif parsed.scheme == "s3": 194 ↛ 195line 194 didn't jump to line 195, because the condition on line 194 was never true
195 from .s3 import ButlerS3URI
196 subclass = ButlerS3URI
197 elif parsed.scheme.startswith("http"): 197 ↛ 198line 197 didn't jump to line 198, because the condition on line 197 was never true
198 from .http import ButlerHttpURI
199 subclass = ButlerHttpURI
200 elif parsed.scheme == "resource": 200 ↛ 204line 200 didn't jump to line 204, because the condition on line 200 was never false
201 # Rules for scheme names disallow pkg_resource
202 from .packageresource import ButlerPackageResourceURI
203 subclass = ButlerPackageResourceURI
204 elif parsed.scheme == "mem":
205 # in-memory datastore object
206 from .mem import ButlerInMemoryURI
207 subclass = ButlerInMemoryURI
208 else:
209 raise NotImplementedError(f"No URI support for scheme: '{parsed.scheme}'"
210 " in {parsed.geturl()}")
212 parsed, dirLike = subclass._fixupPathUri(parsed, root=root,
213 forceAbsolute=forceAbsolute,
214 forceDirectory=forceDirectory)
216 # It is possible for the class to change from schemeless
217 # to file so handle that
218 if parsed.scheme == "file": 218 ↛ 219line 218 didn't jump to line 219, because the condition on line 218 was never true
219 from .file import ButlerFileURI
220 subclass = ButlerFileURI
222 # Now create an instance of the correct subclass and set the
223 # attributes directly
224 self = object.__new__(subclass)
225 self._uri = parsed
226 self.dirLike = dirLike
227 self.isTemporary = isTemporary
228 return self
230 @property
231 def scheme(self) -> str:
232 """Return the URI scheme.
234 Notes
235 -----
236 (``://`` is not part of the scheme).
237 """
238 return self._uri.scheme
240 @property
241 def netloc(self) -> str:
242 """Return the URI network location."""
243 return self._uri.netloc
245 @property
246 def path(self) -> str:
247 """Return the path component of the URI."""
248 return self._uri.path
250 @property
251 def unquoted_path(self) -> str:
252 """Return path component of the URI with any URI quoting reversed."""
253 return urllib.parse.unquote(self._uri.path)
255 @property
256 def ospath(self) -> str:
257 """Return the path component of the URI localized to current OS."""
258 raise AttributeError(f"Non-file URI ({self}) has no local OS path.")
260 @property
261 def relativeToPathRoot(self) -> str:
262 """Return path relative to network location.
264 Effectively, this is the path property with posix separator stripped
265 from the left hand side of the path.
267 Always unquotes.
268 """
269 p = self._pathLib(self.path)
270 relToRoot = str(p.relative_to(p.root))
271 if self.dirLike and not relToRoot.endswith("/"): 271 ↛ 272line 271 didn't jump to line 272, because the condition on line 271 was never true
272 relToRoot += "/"
273 return urllib.parse.unquote(relToRoot)
275 @property
276 def is_root(self) -> bool:
277 """Return whether this URI points to the root of the network location.
279 This means that the path components refers to the top level.
280 """
281 relpath = self.relativeToPathRoot
282 if relpath == "./":
283 return True
284 return False
286 @property
287 def fragment(self) -> str:
288 """Return the fragment component of the URI."""
289 return self._uri.fragment
291 @property
292 def params(self) -> str:
293 """Return any parameters included in the URI."""
294 return self._uri.params
296 @property
297 def query(self) -> str:
298 """Return any query strings included in the URI."""
299 return self._uri.query
301 def geturl(self) -> str:
302 """Return the URI in string form.
304 Returns
305 -------
306 url : `str`
307 String form of URI.
308 """
309 return self._uri.geturl()
311 def root_uri(self) -> ButlerURI:
312 """Return the base root URI.
314 Returns
315 -------
316 uri : `ButlerURI`
317 root URI.
318 """
319 return self.replace(path="", forceDirectory=True)
321 def split(self) -> Tuple[ButlerURI, str]:
322 """Split URI into head and tail.
324 Returns
325 -------
326 head: `ButlerURI`
327 Everything leading up to tail, expanded and normalized as per
328 ButlerURI rules.
329 tail : `str`
330 Last `self.path` component. Tail will be empty if path ends on a
331 separator. Tail will never contain separators. It will be
332 unquoted.
334 Notes
335 -----
336 Equivalent to `os.path.split()` where head preserves the URI
337 components.
338 """
339 head, tail = self._pathModule.split(self.path)
340 headuri = self._uri._replace(path=head)
342 # The file part should never include quoted metacharacters
343 tail = urllib.parse.unquote(tail)
345 # Schemeless is special in that it can be a relative path
346 # We need to ensure that it stays that way. All other URIs will
347 # be absolute already.
348 forceAbsolute = self._pathModule.isabs(self.path)
349 return ButlerURI(headuri, forceDirectory=True, forceAbsolute=forceAbsolute), tail
351 def basename(self) -> str:
352 """Return the base name, last element of path, of the URI.
354 Returns
355 -------
356 tail : `str`
357 Last part of the path attribute. Trail will be empty if path ends
358 on a separator.
360 Notes
361 -----
362 If URI ends on a slash returns an empty string. This is the second
363 element returned by `split()`.
365 Equivalent of `os.path.basename()``.
366 """
367 return self.split()[1]
369 def dirname(self) -> ButlerURI:
370 """Return the directory component of the path as a new `ButlerURI`.
372 Returns
373 -------
374 head : `ButlerURI`
375 Everything except the tail of path attribute, expanded and
376 normalized as per ButlerURI rules.
378 Notes
379 -----
380 Equivalent of `os.path.dirname()`.
381 """
382 return self.split()[0]
384 def parent(self) -> ButlerURI:
385 """Return a `ButlerURI` of the parent directory.
387 Returns
388 -------
389 head : `ButlerURI`
390 Everything except the tail of path attribute, expanded and
391 normalized as per `ButlerURI` rules.
393 Notes
394 -----
395 For a file-like URI this will be the same as calling `dirname()`.
396 """
397 # When self is file-like, return self.dirname()
398 if not self.dirLike:
399 return self.dirname()
400 # When self is dir-like, return its parent directory,
401 # regardless of the presence of a trailing separator
402 originalPath = self._pathLib(self.path)
403 parentPath = originalPath.parent
404 return self.replace(path=str(parentPath), forceDirectory=True)
406 def replace(self, forceDirectory: bool = False, **kwargs: Any) -> ButlerURI:
407 """Return new `ButlerURI` with specified components replaced.
409 Parameters
410 ----------
411 forceDirectory : `bool`
412 Parameter passed to ButlerURI constructor to force this
413 new URI to be dir-like.
414 **kwargs
415 Components of a `urllib.parse.ParseResult` that should be
416 modified for the newly-created `ButlerURI`.
418 Returns
419 -------
420 new : `ButlerURI`
421 New `ButlerURI` object with updated values.
423 Notes
424 -----
425 Does not, for now, allow a change in URI scheme.
426 """
427 # Disallow a change in scheme
428 if "scheme" in kwargs: 428 ↛ 429line 428 didn't jump to line 429, because the condition on line 428 was never true
429 raise ValueError(f"Can not use replace() method to change URI scheme for {self}")
430 return self.__class__(self._uri._replace(**kwargs), forceDirectory=forceDirectory)
432 def updatedFile(self, newfile: str) -> ButlerURI:
433 """Return new URI with an updated final component of the path.
435 Parameters
436 ----------
437 newfile : `str`
438 File name with no path component.
440 Returns
441 -------
442 updated : `ButlerURI`
444 Notes
445 -----
446 Forces the ButlerURI.dirLike attribute to be false. The new file path
447 will be quoted if necessary.
448 """
449 if self.quotePaths:
450 newfile = urllib.parse.quote(newfile)
451 dir, _ = self._pathModule.split(self.path)
452 newpath = self._pathModule.join(dir, newfile)
454 updated = self.replace(path=newpath)
455 updated.dirLike = False
456 return updated
458 def updatedExtension(self, ext: Optional[str]) -> ButlerURI:
459 """Return a new `ButlerURI` with updated file extension.
461 All file extensions are replaced.
463 Parameters
464 ----------
465 ext : `str` or `None`
466 New extension. If an empty string is given any extension will
467 be removed. If `None` is given there will be no change.
469 Returns
470 -------
471 updated : `ButlerURI`
472 URI with the specified extension. Can return itself if
473 no extension was specified.
474 """
475 if ext is None:
476 return self
478 # Get the extension
479 current = self.getExtension()
481 # Nothing to do if the extension already matches
482 if current == ext:
483 return self
485 # Remove the current extension from the path
486 # .fits.gz counts as one extension do not use os.path.splitext
487 path = self.path
488 if current:
489 path = path[:-len(current)]
491 # Ensure that we have a leading "." on file extension (and we do not
492 # try to modify the empty string)
493 if ext and not ext.startswith("."):
494 ext = "." + ext
496 return self.replace(path=path + ext)
498 def getExtension(self) -> str:
499 """Return the file extension(s) associated with this URI path.
501 Returns
502 -------
503 ext : `str`
504 The file extension (including the ``.``). Can be empty string
505 if there is no file extension. Usually returns only the last
506 file extension unless there is a special extension modifier
507 indicating file compression, in which case the combined
508 extension (e.g. ``.fits.gz``) will be returned.
509 """
510 special = {".gz", ".bz2", ".xz", ".fz"}
512 # Get the file part of the path so as not to be confused by
513 # "." in directory names.
514 basename = self.basename()
515 extensions = self._pathLib(basename).suffixes
517 if not extensions: 517 ↛ 518line 517 didn't jump to line 518, because the condition on line 517 was never true
518 return ""
520 ext = extensions.pop()
522 # Multiple extensions, decide whether to include the final two
523 if extensions and ext in special: 523 ↛ 524line 523 didn't jump to line 524, because the condition on line 523 was never true
524 ext = f"{extensions[-1]}{ext}"
526 return ext
528 def join(self, path: Union[str, ButlerURI]) -> ButlerURI:
529 """Return new `ButlerURI` with additional path components.
531 Parameters
532 ----------
533 path : `str`, `ButlerURI`
534 Additional file components to append to the current URI. Assumed
535 to include a file at the end. Will be quoted depending on the
536 associated URI scheme. If the path looks like a URI with a scheme
537 referring to an absolute location, it will be returned
538 directly (matching the behavior of `os.path.join()`). It can
539 also be a `ButlerURI`.
541 Returns
542 -------
543 new : `ButlerURI`
544 New URI with any file at the end replaced with the new path
545 components.
547 Notes
548 -----
549 Schemeless URIs assume local path separator but all other URIs assume
550 POSIX separator if the supplied path has directory structure. It
551 may be this never becomes a problem but datastore templates assume
552 POSIX separator is being used.
554 If an absolute `ButlerURI` is given for ``path`` is is assumed that
555 this should be returned directly. Giving a ``path`` of an absolute
556 scheme-less URI is not allowed for safety reasons as it may indicate
557 a mistake in the calling code.
559 Raises
560 ------
561 ValueError
562 Raised if the ``path`` is an absolute scheme-less URI. In that
563 situation it is unclear whether the intent is to return a
564 ``file`` URI or it was a mistake and a relative scheme-less URI
565 was meant.
566 """
567 # If we have a full URI in path we will use it directly
568 # but without forcing to absolute so that we can trap the
569 # expected option of relative path.
570 path_uri = ButlerURI(path, forceAbsolute=False)
571 if path_uri.scheme: 571 ↛ 574line 571 didn't jump to line 574, because the condition on line 571 was never true
572 # Check for scheme so can distinguish explicit URIs from
573 # absolute scheme-less URIs.
574 return path_uri
576 if path_uri.isabs(): 576 ↛ 578line 576 didn't jump to line 578, because the condition on line 576 was never true
577 # Absolute scheme-less path.
578 raise ValueError(f"Can not join absolute scheme-less {path_uri!r} to another URI.")
580 # If this was originally a ButlerURI extract the unquoted path from it.
581 # Otherwise we use the string we were given to allow "#" to appear
582 # in the filename if given as a plain string.
583 if not isinstance(path, str): 583 ↛ 584line 583 didn't jump to line 584, because the condition on line 583 was never true
584 path = path_uri.unquoted_path
586 new = self.dirname() # By definition a directory URI
588 # new should be asked about quoting, not self, since dirname can
589 # change the URI scheme for schemeless -> file
590 if new.quotePaths: 590 ↛ 593line 590 didn't jump to line 593, because the condition on line 590 was never false
591 path = urllib.parse.quote(path)
593 newpath = self._pathModule.normpath(self._pathModule.join(new.path, path))
595 # normpath can strip trailing / so we force directory if the supplied
596 # path ended with a /
597 return new.replace(path=newpath, forceDirectory=path.endswith(self._pathModule.sep))
599 def relative_to(self, other: ButlerURI) -> Optional[str]:
600 """Return the relative path from this URI to the other URI.
602 Parameters
603 ----------
604 other : `ButlerURI`
605 URI to use to calculate the relative path. Must be a parent
606 of this URI.
608 Returns
609 -------
610 subpath : `str`
611 The sub path of this URI relative to the supplied other URI.
612 Returns `None` if there is no parent child relationship.
613 Scheme and netloc must match.
614 """
615 if self.scheme != other.scheme or self.netloc != other.netloc:
616 return None
618 enclosed_path = self._pathLib(self.relativeToPathRoot)
619 parent_path = other.relativeToPathRoot
620 subpath: Optional[str]
621 try:
622 subpath = str(enclosed_path.relative_to(parent_path))
623 except ValueError:
624 subpath = None
625 else:
626 subpath = urllib.parse.unquote(subpath)
627 return subpath
629 def exists(self) -> bool:
630 """Indicate that the resource is available.
632 Returns
633 -------
634 exists : `bool`
635 `True` if the resource exists.
636 """
637 raise NotImplementedError()
639 def remove(self) -> None:
640 """Remove the resource."""
641 raise NotImplementedError()
643 def isabs(self) -> bool:
644 """Indicate that the resource is fully specified.
646 For non-schemeless URIs this is always true.
648 Returns
649 -------
650 isabs : `bool`
651 `True` in all cases except schemeless URI.
652 """
653 return True
655 def abspath(self) -> ButlerURI:
656 """Return URI using an absolute path.
658 Returns
659 -------
660 abs : `ButlerURI`
661 Absolute URI. For non-schemeless URIs this always returns itself.
662 Schemeless URIs are upgraded to file URIs.
663 """
664 return self
666 def _as_local(self) -> Tuple[str, bool]:
667 """Return the location of the (possibly remote) resource as local file.
669 This is a helper function for `as_local` context manager.
671 Returns
672 -------
673 path : `str`
674 If this is a remote resource, it will be a copy of the resource
675 on the local file system, probably in a temporary directory.
676 For a local resource this should be the actual path to the
677 resource.
678 is_temporary : `bool`
679 Indicates if the local path is a temporary file or not.
680 """
681 raise NotImplementedError()
683 @contextlib.contextmanager
684 def as_local(self) -> Iterator[ButlerURI]:
685 """Return the location of the (possibly remote) resource as local file.
687 Yields
688 ------
689 local : `ButlerURI`
690 If this is a remote resource, it will be a copy of the resource
691 on the local file system, probably in a temporary directory.
692 For a local resource this should be the actual path to the
693 resource.
695 Notes
696 -----
697 The context manager will automatically delete any local temporary
698 file.
700 Examples
701 --------
702 Should be used as a context manager:
704 .. code-block:: py
706 with uri.as_local() as local:
707 ospath = local.ospath
708 """
709 local_src, is_temporary = self._as_local()
710 local_uri = ButlerURI(local_src, isTemporary=is_temporary)
712 try:
713 yield local_uri
714 finally:
715 # The caller might have relocated the temporary file
716 if is_temporary and local_uri.exists():
717 local_uri.remove()
719 def read(self, size: int = -1) -> bytes:
720 """Open the resource and return the contents in bytes.
722 Parameters
723 ----------
724 size : `int`, optional
725 The number of bytes to read. Negative or omitted indicates
726 that all data should be read.
727 """
728 raise NotImplementedError()
730 def write(self, data: bytes, overwrite: bool = True) -> None:
731 """Write the supplied bytes to the new resource.
733 Parameters
734 ----------
735 data : `bytes`
736 The bytes to write to the resource. The entire contents of the
737 resource will be replaced.
738 overwrite : `bool`, optional
739 If `True` the resource will be overwritten if it exists. Otherwise
740 the write will fail.
741 """
742 raise NotImplementedError()
744 def mkdir(self) -> None:
745 """For a dir-like URI, create the directory resource if needed."""
746 raise NotImplementedError()
748 def isdir(self) -> bool:
749 """Return True if this URI looks like a directory, else False."""
750 return self.dirLike
752 def size(self) -> int:
753 """For non-dir-like URI, return the size of the resource.
755 Returns
756 -------
757 sz : `int`
758 The size in bytes of the resource associated with this URI.
759 Returns 0 if dir-like.
760 """
761 raise NotImplementedError()
763 def __str__(self) -> str:
764 """Convert the URI to its native string form."""
765 return self.geturl()
767 def __repr__(self) -> str:
768 """Return string representation suitable for evaluation."""
769 return f'ButlerURI("{self.geturl()}")'
771 def __eq__(self, other: Any) -> bool:
772 """Compare supplied object with this `ButlerURI`."""
773 if not isinstance(other, ButlerURI):
774 return NotImplemented
775 return self.geturl() == other.geturl()
777 def __hash__(self) -> int:
778 """Return hash of this object."""
779 return hash(str(self))
781 def __copy__(self) -> ButlerURI:
782 """Copy constructor.
784 Object is immutable so copy can return itself.
785 """
786 # Implement here because the __new__ method confuses things
787 return self
789 def __deepcopy__(self, memo: Any) -> ButlerURI:
790 """Deepcopy the object.
792 Object is immutable so copy can return itself.
793 """
794 # Implement here because the __new__ method confuses things
795 return self
797 def __getnewargs__(self) -> Tuple:
798 """Support pickling."""
799 return (str(self),)
801 @classmethod
802 def _fixDirectorySep(cls, parsed: urllib.parse.ParseResult,
803 forceDirectory: bool = False) -> Tuple[urllib.parse.ParseResult, bool]:
804 """Ensure that a path separator is present on directory paths.
806 Parameters
807 ----------
808 parsed : `~urllib.parse.ParseResult`
809 The result from parsing a URI using `urllib.parse`.
810 forceDirectory : `bool`, optional
811 If `True` forces the URI to end with a separator, otherwise given
812 URI is interpreted as is. Specifying that the URI is conceptually
813 equivalent to a directory can break some ambiguities when
814 interpreting the last element of a path.
816 Returns
817 -------
818 modified : `~urllib.parse.ParseResult`
819 Update result if a URI is being handled.
820 dirLike : `bool`
821 `True` if given parsed URI has a trailing separator or
822 forceDirectory is True. Otherwise `False`.
823 """
824 # assume we are not dealing with a directory like URI
825 dirLike = False
827 # Directory separator
828 sep = cls._pathModule.sep
830 # URI is dir-like if explicitly stated or if it ends on a separator
831 endsOnSep = parsed.path.endswith(sep)
832 if forceDirectory or endsOnSep:
833 dirLike = True
834 # only add the separator if it's not already there
835 if not endsOnSep: 835 ↛ 838line 835 didn't jump to line 838, because the condition on line 835 was never false
836 parsed = parsed._replace(path=parsed.path+sep)
838 return parsed, dirLike
840 @classmethod
841 def _fixupPathUri(cls, parsed: urllib.parse.ParseResult, root: Optional[Union[str, ButlerURI]] = None,
842 forceAbsolute: bool = False,
843 forceDirectory: bool = False) -> Tuple[urllib.parse.ParseResult, bool]:
844 """Correct any issues with the supplied URI.
846 Parameters
847 ----------
848 parsed : `~urllib.parse.ParseResult`
849 The result from parsing a URI using `urllib.parse`.
850 root : `str` or `ButlerURI`, ignored
851 Not used by the this implementation since all URIs are
852 absolute except for those representing the local file system.
853 forceAbsolute : `bool`, ignored.
854 Not used by this implementation. URIs are generally always
855 absolute.
856 forceDirectory : `bool`, optional
857 If `True` forces the URI to end with a separator, otherwise given
858 URI is interpreted as is. Specifying that the URI is conceptually
859 equivalent to a directory can break some ambiguities when
860 interpreting the last element of a path.
862 Returns
863 -------
864 modified : `~urllib.parse.ParseResult`
865 Update result if a URI is being handled.
866 dirLike : `bool`
867 `True` if given parsed URI has a trailing separator or
868 forceDirectory is True. Otherwise `False`.
870 Notes
871 -----
872 Relative paths are explicitly not supported by RFC8089 but `urllib`
873 does accept URIs of the form ``file:relative/path.ext``. They need
874 to be turned into absolute paths before they can be used. This is
875 always done regardless of the ``forceAbsolute`` parameter.
877 AWS S3 differentiates between keys with trailing POSIX separators (i.e
878 `/dir` and `/dir/`) whereas POSIX does not neccessarily.
880 Scheme-less paths are normalized.
881 """
882 return cls._fixDirectorySep(parsed, forceDirectory)
884 def transfer_from(self, src: ButlerURI, transfer: str,
885 overwrite: bool = False,
886 transaction: Optional[Union[DatastoreTransaction, NoTransaction]] = None) -> None:
887 """Transfer the current resource to a new location.
889 Parameters
890 ----------
891 src : `ButlerURI`
892 Source URI.
893 transfer : `str`
894 Mode to use for transferring the resource. Generically there are
895 many standard options: copy, link, symlink, hardlink, relsymlink.
896 Not all URIs support all modes.
897 overwrite : `bool`, optional
898 Allow an existing file to be overwritten. Defaults to `False`.
899 transaction : `DatastoreTransaction`, optional
900 A transaction object that can (depending on implementation)
901 rollback transfers on error. Not guaranteed to be implemented.
903 Notes
904 -----
905 Conceptually this is hard to scale as the number of URI schemes
906 grow. The destination URI is more important than the source URI
907 since that is where all the transfer modes are relevant (with the
908 complication that "move" deletes the source).
910 Local file to local file is the fundamental use case but every
911 other scheme has to support "copy" to local file (with implicit
912 support for "move") and copy from local file.
913 All the "link" options tend to be specific to local file systems.
915 "move" is a "copy" where the remote resource is deleted at the end.
916 Whether this works depends on the source URI rather than the
917 destination URI. Reverting a move on transaction rollback is
918 expected to be problematic if a remote resource was involved.
919 """
920 raise NotImplementedError(f"No transfer modes supported by URI scheme {self.scheme}")
922 def walk(self, file_filter: Optional[Union[str, re.Pattern]] = None) -> Iterator[Union[List,
923 Tuple[ButlerURI,
924 List[str],
925 List[str]]]]:
926 """Walk the directory tree returning matching files and directories.
928 Parameters
929 ----------
930 file_filter : `str` or `re.Pattern`, optional
931 Regex to filter out files from the list before it is returned.
933 Yields
934 ------
935 dirpath : `ButlerURI`
936 Current directory being examined.
937 dirnames : `list` of `str`
938 Names of subdirectories within dirpath.
939 filenames : `list` of `str`
940 Names of all the files within dirpath.
941 """
942 raise NotImplementedError()
944 @classmethod
945 def findFileResources(cls, candidates: Iterable[Union[str, ButlerURI]],
946 file_filter: Optional[str] = None,
947 grouped: bool = False) -> Iterator[Union[ButlerURI, Iterator[ButlerURI]]]:
948 """Get all the files from a list of values.
950 Parameters
951 ----------
952 candidates : iterable [`str` or `ButlerURI`]
953 The files to return and directories in which to look for files to
954 return.
955 file_filter : `str`, optional
956 The regex to use when searching for files within directories.
957 By default returns all the found files.
958 grouped : `bool`, optional
959 If `True` the results will be grouped by directory and each
960 yielded value will be an iterator over URIs. If `False` each
961 URI will be returned separately.
963 Yields
964 ------
965 found_file: `ButlerURI`
966 The passed-in URIs and URIs found in passed-in directories.
967 If grouping is enabled, each of the yielded values will be an
968 iterator yielding members of the group. Files given explicitly
969 will be returned as a single group at the end.
971 Notes
972 -----
973 If a value is a file it is yielded immediately. If a value is a
974 directory, all the files in the directory (recursively) that match
975 the regex will be yielded in turn.
976 """
977 fileRegex = None if file_filter is None else re.compile(file_filter)
979 singles = []
981 # Find all the files of interest
982 for location in candidates:
983 uri = ButlerURI(location)
984 if uri.isdir():
985 for found in uri.walk(fileRegex):
986 if not found:
987 # This means the uri does not exist and by
988 # convention we ignore it
989 continue
990 root, dirs, files = found
991 if not files:
992 continue
993 if grouped:
994 yield (root.join(name) for name in files)
995 else:
996 for name in files:
997 yield root.join(name)
998 else:
999 if grouped:
1000 singles.append(uri)
1001 else:
1002 yield uri
1004 # Finally, return any explicitly given files in one group
1005 if grouped and singles:
1006 yield iter(singles)