Coverage for python/lsst/daf/butler/core/_butlerUri/_butlerUri.py : 49%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24import contextlib
25import urllib.parse
26import posixpath
27import copy
28import logging
29import re
30import shutil
31import tempfile
33from random import Random
34from pathlib import Path, PurePath, PurePosixPath
36__all__ = ('ButlerURI',)
38from typing import (
39 TYPE_CHECKING,
40 Any,
41 Iterable,
42 Iterator,
43 List,
44 Optional,
45 Tuple,
46 Type,
47 Union,
48)
50from .utils import NoTransaction
52if TYPE_CHECKING: 52 ↛ 53line 52 didn't jump to line 53, because the condition on line 52 was never true
53 from ..datastore import DatastoreTransaction
56log = logging.getLogger(__name__)
58# Regex for looking for URI escapes
59ESCAPES_RE = re.compile(r"%[A-F0-9]{2}")
61# Precomputed escaped hash
62ESCAPED_HASH = urllib.parse.quote("#")
65class ButlerURI:
66 """Convenience wrapper around URI parsers.
68 Provides access to URI components and can convert file
69 paths into absolute path URIs. Scheme-less URIs are treated as if
70 they are local file system paths and are converted to absolute URIs.
72 A specialist subclass is created for each supported URI scheme.
74 Parameters
75 ----------
76 uri : `str` or `urllib.parse.ParseResult`
77 URI in string form. Can be scheme-less if referring to a local
78 filesystem path.
79 root : `str` or `ButlerURI`, optional
80 When fixing up a relative path in a ``file`` scheme or if scheme-less,
81 use this as the root. Must be absolute. If `None` the current
82 working directory will be used. Can be a file URI.
83 forceAbsolute : `bool`, optional
84 If `True`, scheme-less relative URI will be converted to an absolute
85 path using a ``file`` scheme. If `False` scheme-less URI will remain
86 scheme-less and will not be updated to ``file`` or absolute path.
87 forceDirectory: `bool`, optional
88 If `True` forces the URI to end with a separator, otherwise given URI
89 is interpreted as is.
90 isTemporary : `bool`, optional
91 If `True` indicates that this URI points to a temporary resource.
92 """
94 _pathLib: Type[PurePath] = PurePosixPath
95 """Path library to use for this scheme."""
97 _pathModule = posixpath
98 """Path module to use for this scheme."""
100 transferModes: Tuple[str, ...] = ("copy", "auto", "move")
101 """Transfer modes supported by this implementation.
103 Move is special in that it is generally a copy followed by an unlink.
104 Whether that unlink works depends critically on whether the source URI
105 implements unlink. If it does not the move will be reported as a failure.
106 """
108 transferDefault: str = "copy"
109 """Default mode to use for transferring if ``auto`` is specified."""
111 quotePaths = True
112 """True if path-like elements modifying a URI should be quoted.
114 All non-schemeless URIs have to internally use quoted paths. Therefore
115 if a new file name is given (e.g. to updatedFile or join) a decision must
116 be made whether to quote it to be consistent.
117 """
119 isLocal = False
120 """If `True` this URI refers to a local file."""
122 # This is not an ABC with abstract methods because the __new__ being
123 # a factory confuses mypy such that it assumes that every constructor
124 # returns a ButlerURI and then determines that all the abstract methods
125 # are still abstract. If they are not marked abstract but just raise
126 # mypy is fine with it.
128 # mypy is confused without these
129 _uri: urllib.parse.ParseResult
130 isTemporary: bool
131 dirLike: bool
133 def __new__(cls, uri: Union[str, urllib.parse.ParseResult, ButlerURI, Path],
134 root: Optional[Union[str, ButlerURI]] = None, forceAbsolute: bool = True,
135 forceDirectory: bool = False, isTemporary: bool = False) -> ButlerURI:
136 """Create and return new specialist ButlerURI subclass."""
137 parsed: urllib.parse.ParseResult
138 dirLike: bool = False
139 subclass: Optional[Type[ButlerURI]] = None
141 if isinstance(uri, Path): 141 ↛ 142line 141 didn't jump to line 142, because the condition on line 141 was never true
142 uri = str(uri)
144 # Record if we need to post process the URI components
145 # or if the instance is already fully configured
146 if isinstance(uri, str):
147 # Since local file names can have special characters in them
148 # we need to quote them for the parser but we can unquote
149 # later. Assume that all other URI schemes are quoted.
150 # Since sometimes people write file:/a/b and not file:///a/b
151 # we should not quote in the explicit case of file:
152 if "://" not in uri and not uri.startswith("file:"):
153 if ESCAPES_RE.search(uri): 153 ↛ 154line 153 didn't jump to line 154, because the condition on line 153 was never true
154 log.warning("Possible double encoding of %s", uri)
155 else:
156 uri = urllib.parse.quote(uri)
157 # Special case hash since we must support fragments
158 # even in schemeless URIs -- although try to only replace
159 # them in file part and not directory part
160 if ESCAPED_HASH in uri: 160 ↛ 161line 160 didn't jump to line 161, because the condition on line 160 was never true
161 dirpos = uri.rfind("/")
162 # Do replacement after this /
163 uri = uri[:dirpos+1] + uri[dirpos+1:].replace(ESCAPED_HASH, "#")
165 parsed = urllib.parse.urlparse(uri)
166 elif isinstance(uri, urllib.parse.ParseResult):
167 parsed = copy.copy(uri)
168 # If we are being instantiated with a subclass, rather than
169 # ButlerURI, ensure that that subclass is used directly.
170 # This could lead to inconsistencies if this constructor
171 # is used externally outside of the ButlerURI.replace() method.
172 # ButlerS3URI(urllib.parse.urlparse("file://a/b.txt"))
173 # will be a problem.
174 # This is needed to prevent a schemeless absolute URI become
175 # a file URI unexpectedly when calling updatedFile or
176 # updatedExtension
177 if cls is not ButlerURI:
178 parsed, dirLike = cls._fixDirectorySep(parsed, forceDirectory)
179 subclass = cls
181 elif isinstance(uri, ButlerURI): 181 ↛ 186line 181 didn't jump to line 186, because the condition on line 181 was never false
182 # Since ButlerURI is immutable we can return the argument
183 # unchanged.
184 return uri
185 else:
186 raise ValueError("Supplied URI must be string, Path, "
187 f"ButlerURI, or ParseResult but got '{uri!r}'")
189 if subclass is None:
190 # Work out the subclass from the URI scheme
191 if not parsed.scheme:
192 from .schemeless import ButlerSchemelessURI
193 subclass = ButlerSchemelessURI
194 elif parsed.scheme == "file": 194 ↛ 195line 194 didn't jump to line 195, because the condition on line 194 was never true
195 from .file import ButlerFileURI
196 subclass = ButlerFileURI
197 elif parsed.scheme == "s3": 197 ↛ 198line 197 didn't jump to line 198, because the condition on line 197 was never true
198 from .s3 import ButlerS3URI
199 subclass = ButlerS3URI
200 elif parsed.scheme.startswith("http"): 200 ↛ 201line 200 didn't jump to line 201, because the condition on line 200 was never true
201 from .http import ButlerHttpURI
202 subclass = ButlerHttpURI
203 elif parsed.scheme == "resource": 203 ↛ 207line 203 didn't jump to line 207, because the condition on line 203 was never false
204 # Rules for scheme names disallow pkg_resource
205 from .packageresource import ButlerPackageResourceURI
206 subclass = ButlerPackageResourceURI
207 elif parsed.scheme == "mem":
208 # in-memory datastore object
209 from .mem import ButlerInMemoryURI
210 subclass = ButlerInMemoryURI
211 else:
212 raise NotImplementedError(f"No URI support for scheme: '{parsed.scheme}'"
213 " in {parsed.geturl()}")
215 parsed, dirLike = subclass._fixupPathUri(parsed, root=root,
216 forceAbsolute=forceAbsolute,
217 forceDirectory=forceDirectory)
219 # It is possible for the class to change from schemeless
220 # to file so handle that
221 if parsed.scheme == "file": 221 ↛ 222line 221 didn't jump to line 222, because the condition on line 221 was never true
222 from .file import ButlerFileURI
223 subclass = ButlerFileURI
225 # Now create an instance of the correct subclass and set the
226 # attributes directly
227 self = object.__new__(subclass)
228 self._uri = parsed
229 self.dirLike = dirLike
230 self.isTemporary = isTemporary
231 return self
233 @property
234 def scheme(self) -> str:
235 """Return the URI scheme.
237 Notes
238 -----
239 (``://`` is not part of the scheme).
240 """
241 return self._uri.scheme
243 @property
244 def netloc(self) -> str:
245 """Return the URI network location."""
246 return self._uri.netloc
248 @property
249 def path(self) -> str:
250 """Return the path component of the URI."""
251 return self._uri.path
253 @property
254 def unquoted_path(self) -> str:
255 """Return path component of the URI with any URI quoting reversed."""
256 return urllib.parse.unquote(self._uri.path)
258 @property
259 def ospath(self) -> str:
260 """Return the path component of the URI localized to current OS."""
261 raise AttributeError(f"Non-file URI ({self}) has no local OS path.")
263 @property
264 def relativeToPathRoot(self) -> str:
265 """Return path relative to network location.
267 Effectively, this is the path property with posix separator stripped
268 from the left hand side of the path.
270 Always unquotes.
271 """
272 p = self._pathLib(self.path)
273 relToRoot = str(p.relative_to(p.root))
274 if self.dirLike and not relToRoot.endswith("/"): 274 ↛ 275line 274 didn't jump to line 275, because the condition on line 274 was never true
275 relToRoot += "/"
276 return urllib.parse.unquote(relToRoot)
278 @property
279 def is_root(self) -> bool:
280 """Return whether this URI points to the root of the network location.
282 This means that the path components refers to the top level.
283 """
284 relpath = self.relativeToPathRoot
285 if relpath == "./":
286 return True
287 return False
289 @property
290 def fragment(self) -> str:
291 """Return the fragment component of the URI."""
292 return self._uri.fragment
294 @property
295 def params(self) -> str:
296 """Return any parameters included in the URI."""
297 return self._uri.params
299 @property
300 def query(self) -> str:
301 """Return any query strings included in the URI."""
302 return self._uri.query
304 def geturl(self) -> str:
305 """Return the URI in string form.
307 Returns
308 -------
309 url : `str`
310 String form of URI.
311 """
312 return self._uri.geturl()
314 def root_uri(self) -> ButlerURI:
315 """Return the base root URI.
317 Returns
318 -------
319 uri : `ButlerURI`
320 root URI.
321 """
322 return self.replace(path="", forceDirectory=True)
324 def split(self) -> Tuple[ButlerURI, str]:
325 """Split URI into head and tail.
327 Returns
328 -------
329 head: `ButlerURI`
330 Everything leading up to tail, expanded and normalized as per
331 ButlerURI rules.
332 tail : `str`
333 Last `self.path` component. Tail will be empty if path ends on a
334 separator. Tail will never contain separators. It will be
335 unquoted.
337 Notes
338 -----
339 Equivalent to `os.path.split()` where head preserves the URI
340 components.
341 """
342 head, tail = self._pathModule.split(self.path)
343 headuri = self._uri._replace(path=head)
345 # The file part should never include quoted metacharacters
346 tail = urllib.parse.unquote(tail)
348 # Schemeless is special in that it can be a relative path
349 # We need to ensure that it stays that way. All other URIs will
350 # be absolute already.
351 forceAbsolute = self._pathModule.isabs(self.path)
352 return ButlerURI(headuri, forceDirectory=True, forceAbsolute=forceAbsolute), tail
354 def basename(self) -> str:
355 """Return the base name, last element of path, of the URI.
357 Returns
358 -------
359 tail : `str`
360 Last part of the path attribute. Trail will be empty if path ends
361 on a separator.
363 Notes
364 -----
365 If URI ends on a slash returns an empty string. This is the second
366 element returned by `split()`.
368 Equivalent of `os.path.basename()``.
369 """
370 return self.split()[1]
372 def dirname(self) -> ButlerURI:
373 """Return the directory component of the path as a new `ButlerURI`.
375 Returns
376 -------
377 head : `ButlerURI`
378 Everything except the tail of path attribute, expanded and
379 normalized as per ButlerURI rules.
381 Notes
382 -----
383 Equivalent of `os.path.dirname()`.
384 """
385 return self.split()[0]
387 def parent(self) -> ButlerURI:
388 """Return a `ButlerURI` of the parent directory.
390 Returns
391 -------
392 head : `ButlerURI`
393 Everything except the tail of path attribute, expanded and
394 normalized as per `ButlerURI` rules.
396 Notes
397 -----
398 For a file-like URI this will be the same as calling `dirname()`.
399 """
400 # When self is file-like, return self.dirname()
401 if not self.dirLike:
402 return self.dirname()
403 # When self is dir-like, return its parent directory,
404 # regardless of the presence of a trailing separator
405 originalPath = self._pathLib(self.path)
406 parentPath = originalPath.parent
407 return self.replace(path=str(parentPath), forceDirectory=True)
409 def replace(self, forceDirectory: bool = False, isTemporary: bool = False, **kwargs: Any) -> ButlerURI:
410 """Return new `ButlerURI` with specified components replaced.
412 Parameters
413 ----------
414 forceDirectory : `bool`, optional
415 Parameter passed to ButlerURI constructor to force this
416 new URI to be dir-like.
417 isTemporary : `bool`, optional
418 Indicate that the resulting URI is temporary resource.
419 **kwargs
420 Components of a `urllib.parse.ParseResult` that should be
421 modified for the newly-created `ButlerURI`.
423 Returns
424 -------
425 new : `ButlerURI`
426 New `ButlerURI` object with updated values.
428 Notes
429 -----
430 Does not, for now, allow a change in URI scheme.
431 """
432 # Disallow a change in scheme
433 if "scheme" in kwargs: 433 ↛ 434line 433 didn't jump to line 434, because the condition on line 433 was never true
434 raise ValueError(f"Can not use replace() method to change URI scheme for {self}")
435 return self.__class__(self._uri._replace(**kwargs), forceDirectory=forceDirectory,
436 isTemporary=isTemporary)
438 def updatedFile(self, newfile: str) -> ButlerURI:
439 """Return new URI with an updated final component of the path.
441 Parameters
442 ----------
443 newfile : `str`
444 File name with no path component.
446 Returns
447 -------
448 updated : `ButlerURI`
450 Notes
451 -----
452 Forces the ButlerURI.dirLike attribute to be false. The new file path
453 will be quoted if necessary.
454 """
455 if self.quotePaths:
456 newfile = urllib.parse.quote(newfile)
457 dir, _ = self._pathModule.split(self.path)
458 newpath = self._pathModule.join(dir, newfile)
460 updated = self.replace(path=newpath)
461 updated.dirLike = False
462 return updated
464 def updatedExtension(self, ext: Optional[str]) -> ButlerURI:
465 """Return a new `ButlerURI` with updated file extension.
467 All file extensions are replaced.
469 Parameters
470 ----------
471 ext : `str` or `None`
472 New extension. If an empty string is given any extension will
473 be removed. If `None` is given there will be no change.
475 Returns
476 -------
477 updated : `ButlerURI`
478 URI with the specified extension. Can return itself if
479 no extension was specified.
480 """
481 if ext is None:
482 return self
484 # Get the extension
485 current = self.getExtension()
487 # Nothing to do if the extension already matches
488 if current == ext:
489 return self
491 # Remove the current extension from the path
492 # .fits.gz counts as one extension do not use os.path.splitext
493 path = self.path
494 if current:
495 path = path[:-len(current)]
497 # Ensure that we have a leading "." on file extension (and we do not
498 # try to modify the empty string)
499 if ext and not ext.startswith("."):
500 ext = "." + ext
502 return self.replace(path=path + ext)
504 def getExtension(self) -> str:
505 """Return the file extension(s) associated with this URI path.
507 Returns
508 -------
509 ext : `str`
510 The file extension (including the ``.``). Can be empty string
511 if there is no file extension. Usually returns only the last
512 file extension unless there is a special extension modifier
513 indicating file compression, in which case the combined
514 extension (e.g. ``.fits.gz``) will be returned.
515 """
516 special = {".gz", ".bz2", ".xz", ".fz"}
518 # Get the file part of the path so as not to be confused by
519 # "." in directory names.
520 basename = self.basename()
521 extensions = self._pathLib(basename).suffixes
523 if not extensions: 523 ↛ 524line 523 didn't jump to line 524, because the condition on line 523 was never true
524 return ""
526 ext = extensions.pop()
528 # Multiple extensions, decide whether to include the final two
529 if extensions and ext in special: 529 ↛ 530line 529 didn't jump to line 530, because the condition on line 529 was never true
530 ext = f"{extensions[-1]}{ext}"
532 return ext
534 def join(self, path: Union[str, ButlerURI], isTemporary: bool = False) -> ButlerURI:
535 """Return new `ButlerURI` with additional path components.
537 Parameters
538 ----------
539 path : `str`, `ButlerURI`
540 Additional file components to append to the current URI. Assumed
541 to include a file at the end. Will be quoted depending on the
542 associated URI scheme. If the path looks like a URI with a scheme
543 referring to an absolute location, it will be returned
544 directly (matching the behavior of `os.path.join()`). It can
545 also be a `ButlerURI`.
546 isTemporary : `bool`, optional
547 Indicate that the resulting URI represents a temporary resource.
549 Returns
550 -------
551 new : `ButlerURI`
552 New URI with any file at the end replaced with the new path
553 components.
555 Notes
556 -----
557 Schemeless URIs assume local path separator but all other URIs assume
558 POSIX separator if the supplied path has directory structure. It
559 may be this never becomes a problem but datastore templates assume
560 POSIX separator is being used.
562 If an absolute `ButlerURI` is given for ``path`` is is assumed that
563 this should be returned directly. Giving a ``path`` of an absolute
564 scheme-less URI is not allowed for safety reasons as it may indicate
565 a mistake in the calling code.
567 Raises
568 ------
569 ValueError
570 Raised if the ``path`` is an absolute scheme-less URI. In that
571 situation it is unclear whether the intent is to return a
572 ``file`` URI or it was a mistake and a relative scheme-less URI
573 was meant.
574 """
575 # If we have a full URI in path we will use it directly
576 # but without forcing to absolute so that we can trap the
577 # expected option of relative path.
578 path_uri = ButlerURI(path, forceAbsolute=False)
579 if path_uri.scheme: 579 ↛ 582line 579 didn't jump to line 582, because the condition on line 579 was never true
580 # Check for scheme so can distinguish explicit URIs from
581 # absolute scheme-less URIs.
582 return path_uri
584 if path_uri.isabs(): 584 ↛ 586line 584 didn't jump to line 586, because the condition on line 584 was never true
585 # Absolute scheme-less path.
586 raise ValueError(f"Can not join absolute scheme-less {path_uri!r} to another URI.")
588 # If this was originally a ButlerURI extract the unquoted path from it.
589 # Otherwise we use the string we were given to allow "#" to appear
590 # in the filename if given as a plain string.
591 if not isinstance(path, str): 591 ↛ 592line 591 didn't jump to line 592, because the condition on line 591 was never true
592 path = path_uri.unquoted_path
594 new = self.dirname() # By definition a directory URI
596 # new should be asked about quoting, not self, since dirname can
597 # change the URI scheme for schemeless -> file
598 if new.quotePaths: 598 ↛ 601line 598 didn't jump to line 601, because the condition on line 598 was never false
599 path = urllib.parse.quote(path)
601 newpath = self._pathModule.normpath(self._pathModule.join(new.path, path))
603 # normpath can strip trailing / so we force directory if the supplied
604 # path ended with a /
605 return new.replace(path=newpath, forceDirectory=path.endswith(self._pathModule.sep),
606 isTemporary=isTemporary)
608 def relative_to(self, other: ButlerURI) -> Optional[str]:
609 """Return the relative path from this URI to the other URI.
611 Parameters
612 ----------
613 other : `ButlerURI`
614 URI to use to calculate the relative path. Must be a parent
615 of this URI.
617 Returns
618 -------
619 subpath : `str`
620 The sub path of this URI relative to the supplied other URI.
621 Returns `None` if there is no parent child relationship.
622 Scheme and netloc must match.
623 """
624 # Scheme-less absolute other is treated as if it's a file scheme.
625 # Scheme-less relative other can only return non-None if self
626 # is also scheme-less relative and that is handled specifically
627 # in a subclass.
628 if not other.scheme and other.isabs():
629 other = other.abspath()
631 # Scheme-less self is handled elsewhere.
632 if self.scheme != other.scheme or self.netloc != other.netloc:
633 return None
635 enclosed_path = self._pathLib(self.relativeToPathRoot)
636 parent_path = other.relativeToPathRoot
637 subpath: Optional[str]
638 try:
639 subpath = str(enclosed_path.relative_to(parent_path))
640 except ValueError:
641 subpath = None
642 else:
643 subpath = urllib.parse.unquote(subpath)
644 return subpath
646 def exists(self) -> bool:
647 """Indicate that the resource is available.
649 Returns
650 -------
651 exists : `bool`
652 `True` if the resource exists.
653 """
654 raise NotImplementedError()
656 def remove(self) -> None:
657 """Remove the resource."""
658 raise NotImplementedError()
660 def isabs(self) -> bool:
661 """Indicate that the resource is fully specified.
663 For non-schemeless URIs this is always true.
665 Returns
666 -------
667 isabs : `bool`
668 `True` in all cases except schemeless URI.
669 """
670 return True
672 def abspath(self) -> ButlerURI:
673 """Return URI using an absolute path.
675 Returns
676 -------
677 abs : `ButlerURI`
678 Absolute URI. For non-schemeless URIs this always returns itself.
679 Schemeless URIs are upgraded to file URIs.
680 """
681 return self
683 def _as_local(self) -> Tuple[str, bool]:
684 """Return the location of the (possibly remote) resource as local file.
686 This is a helper function for `as_local` context manager.
688 Returns
689 -------
690 path : `str`
691 If this is a remote resource, it will be a copy of the resource
692 on the local file system, probably in a temporary directory.
693 For a local resource this should be the actual path to the
694 resource.
695 is_temporary : `bool`
696 Indicates if the local path is a temporary file or not.
697 """
698 raise NotImplementedError()
700 @contextlib.contextmanager
701 def as_local(self) -> Iterator[ButlerURI]:
702 """Return the location of the (possibly remote) resource as local file.
704 Yields
705 ------
706 local : `ButlerURI`
707 If this is a remote resource, it will be a copy of the resource
708 on the local file system, probably in a temporary directory.
709 For a local resource this should be the actual path to the
710 resource.
712 Notes
713 -----
714 The context manager will automatically delete any local temporary
715 file.
717 Examples
718 --------
719 Should be used as a context manager:
721 .. code-block:: py
723 with uri.as_local() as local:
724 ospath = local.ospath
725 """
726 local_src, is_temporary = self._as_local()
727 local_uri = ButlerURI(local_src, isTemporary=is_temporary)
729 try:
730 yield local_uri
731 finally:
732 # The caller might have relocated the temporary file
733 if is_temporary and local_uri.exists():
734 local_uri.remove()
736 @classmethod
737 @contextlib.contextmanager
738 def temporary_uri(cls, prefix: Optional[ButlerURI] = None,
739 suffix: Optional[str] = None) -> Iterator[ButlerURI]:
740 """Create a temporary URI.
742 Parameters
743 ----------
744 prefix : `ButlerURI`, optional
745 Prefix to use. Without this the path will be formed as a local
746 file URI in a temporary directory. Ensuring that the prefix
747 location exists is the responsibility of the caller.
748 suffix : `str`, optional
749 A file suffix to be used. The ``.`` should be included in this
750 suffix.
752 Yields
753 ------
754 uri : `ButlerURI`
755 The temporary URI. Will be removed when the context is completed.
756 """
757 use_tempdir = False
758 if prefix is None:
759 prefix = ButlerURI(tempfile.mkdtemp(), forceDirectory=True, isTemporary=True)
760 # Record that we need to delete this directory. Can not rely
761 # on isTemporary flag since an external prefix may have that
762 # set as well.
763 use_tempdir = True
765 # Need to create a randomized file name. For consistency do not
766 # use mkstemp for local and something else for remote. Additionally
767 # this method does not create the file to prevent name clashes.
768 characters = "abcdefghijklmnopqrstuvwxyz0123456789_"
769 rng = Random()
770 tempname = "".join(rng.choice(characters) for _ in range(16))
771 if suffix:
772 tempname += suffix
773 temporary_uri = prefix.join(tempname, isTemporary=True)
775 try:
776 yield temporary_uri
777 finally:
778 if use_tempdir:
779 shutil.rmtree(prefix.ospath, ignore_errors=True)
780 else:
781 try:
782 # It's okay if this does not work because the user removed
783 # the file.
784 temporary_uri.remove()
785 except FileNotFoundError:
786 pass
788 def read(self, size: int = -1) -> bytes:
789 """Open the resource and return the contents in bytes.
791 Parameters
792 ----------
793 size : `int`, optional
794 The number of bytes to read. Negative or omitted indicates
795 that all data should be read.
796 """
797 raise NotImplementedError()
799 def write(self, data: bytes, overwrite: bool = True) -> None:
800 """Write the supplied bytes to the new resource.
802 Parameters
803 ----------
804 data : `bytes`
805 The bytes to write to the resource. The entire contents of the
806 resource will be replaced.
807 overwrite : `bool`, optional
808 If `True` the resource will be overwritten if it exists. Otherwise
809 the write will fail.
810 """
811 raise NotImplementedError()
813 def mkdir(self) -> None:
814 """For a dir-like URI, create the directory resource if needed."""
815 raise NotImplementedError()
817 def isdir(self) -> bool:
818 """Return True if this URI looks like a directory, else False."""
819 return self.dirLike
821 def size(self) -> int:
822 """For non-dir-like URI, return the size of the resource.
824 Returns
825 -------
826 sz : `int`
827 The size in bytes of the resource associated with this URI.
828 Returns 0 if dir-like.
829 """
830 raise NotImplementedError()
832 def __str__(self) -> str:
833 """Convert the URI to its native string form."""
834 return self.geturl()
836 def __repr__(self) -> str:
837 """Return string representation suitable for evaluation."""
838 return f'ButlerURI("{self.geturl()}")'
840 def __eq__(self, other: Any) -> bool:
841 """Compare supplied object with this `ButlerURI`."""
842 if not isinstance(other, ButlerURI):
843 return NotImplemented
844 return self.geturl() == other.geturl()
846 def __hash__(self) -> int:
847 """Return hash of this object."""
848 return hash(str(self))
850 def __copy__(self) -> ButlerURI:
851 """Copy constructor.
853 Object is immutable so copy can return itself.
854 """
855 # Implement here because the __new__ method confuses things
856 return self
858 def __deepcopy__(self, memo: Any) -> ButlerURI:
859 """Deepcopy the object.
861 Object is immutable so copy can return itself.
862 """
863 # Implement here because the __new__ method confuses things
864 return self
866 def __getnewargs__(self) -> Tuple:
867 """Support pickling."""
868 return (str(self),)
870 @classmethod
871 def _fixDirectorySep(cls, parsed: urllib.parse.ParseResult,
872 forceDirectory: bool = False) -> Tuple[urllib.parse.ParseResult, bool]:
873 """Ensure that a path separator is present on directory paths.
875 Parameters
876 ----------
877 parsed : `~urllib.parse.ParseResult`
878 The result from parsing a URI using `urllib.parse`.
879 forceDirectory : `bool`, optional
880 If `True` forces the URI to end with a separator, otherwise given
881 URI is interpreted as is. Specifying that the URI is conceptually
882 equivalent to a directory can break some ambiguities when
883 interpreting the last element of a path.
885 Returns
886 -------
887 modified : `~urllib.parse.ParseResult`
888 Update result if a URI is being handled.
889 dirLike : `bool`
890 `True` if given parsed URI has a trailing separator or
891 forceDirectory is True. Otherwise `False`.
892 """
893 # assume we are not dealing with a directory like URI
894 dirLike = False
896 # Directory separator
897 sep = cls._pathModule.sep
899 # URI is dir-like if explicitly stated or if it ends on a separator
900 endsOnSep = parsed.path.endswith(sep)
901 if forceDirectory or endsOnSep:
902 dirLike = True
903 # only add the separator if it's not already there
904 if not endsOnSep: 904 ↛ 907line 904 didn't jump to line 907, because the condition on line 904 was never false
905 parsed = parsed._replace(path=parsed.path+sep)
907 return parsed, dirLike
909 @classmethod
910 def _fixupPathUri(cls, parsed: urllib.parse.ParseResult, root: Optional[Union[str, ButlerURI]] = None,
911 forceAbsolute: bool = False,
912 forceDirectory: bool = False) -> Tuple[urllib.parse.ParseResult, bool]:
913 """Correct any issues with the supplied URI.
915 Parameters
916 ----------
917 parsed : `~urllib.parse.ParseResult`
918 The result from parsing a URI using `urllib.parse`.
919 root : `str` or `ButlerURI`, ignored
920 Not used by the this implementation since all URIs are
921 absolute except for those representing the local file system.
922 forceAbsolute : `bool`, ignored.
923 Not used by this implementation. URIs are generally always
924 absolute.
925 forceDirectory : `bool`, optional
926 If `True` forces the URI to end with a separator, otherwise given
927 URI is interpreted as is. Specifying that the URI is conceptually
928 equivalent to a directory can break some ambiguities when
929 interpreting the last element of a path.
931 Returns
932 -------
933 modified : `~urllib.parse.ParseResult`
934 Update result if a URI is being handled.
935 dirLike : `bool`
936 `True` if given parsed URI has a trailing separator or
937 forceDirectory is True. Otherwise `False`.
939 Notes
940 -----
941 Relative paths are explicitly not supported by RFC8089 but `urllib`
942 does accept URIs of the form ``file:relative/path.ext``. They need
943 to be turned into absolute paths before they can be used. This is
944 always done regardless of the ``forceAbsolute`` parameter.
946 AWS S3 differentiates between keys with trailing POSIX separators (i.e
947 `/dir` and `/dir/`) whereas POSIX does not neccessarily.
949 Scheme-less paths are normalized.
950 """
951 return cls._fixDirectorySep(parsed, forceDirectory)
953 def transfer_from(self, src: ButlerURI, transfer: str,
954 overwrite: bool = False,
955 transaction: Optional[Union[DatastoreTransaction, NoTransaction]] = None) -> None:
956 """Transfer the current resource to a new location.
958 Parameters
959 ----------
960 src : `ButlerURI`
961 Source URI.
962 transfer : `str`
963 Mode to use for transferring the resource. Generically there are
964 many standard options: copy, link, symlink, hardlink, relsymlink.
965 Not all URIs support all modes.
966 overwrite : `bool`, optional
967 Allow an existing file to be overwritten. Defaults to `False`.
968 transaction : `DatastoreTransaction`, optional
969 A transaction object that can (depending on implementation)
970 rollback transfers on error. Not guaranteed to be implemented.
972 Notes
973 -----
974 Conceptually this is hard to scale as the number of URI schemes
975 grow. The destination URI is more important than the source URI
976 since that is where all the transfer modes are relevant (with the
977 complication that "move" deletes the source).
979 Local file to local file is the fundamental use case but every
980 other scheme has to support "copy" to local file (with implicit
981 support for "move") and copy from local file.
982 All the "link" options tend to be specific to local file systems.
984 "move" is a "copy" where the remote resource is deleted at the end.
985 Whether this works depends on the source URI rather than the
986 destination URI. Reverting a move on transaction rollback is
987 expected to be problematic if a remote resource was involved.
988 """
989 raise NotImplementedError(f"No transfer modes supported by URI scheme {self.scheme}")
991 def walk(self, file_filter: Optional[Union[str, re.Pattern]] = None) -> Iterator[Union[List,
992 Tuple[ButlerURI,
993 List[str],
994 List[str]]]]:
995 """Walk the directory tree returning matching files and directories.
997 Parameters
998 ----------
999 file_filter : `str` or `re.Pattern`, optional
1000 Regex to filter out files from the list before it is returned.
1002 Yields
1003 ------
1004 dirpath : `ButlerURI`
1005 Current directory being examined.
1006 dirnames : `list` of `str`
1007 Names of subdirectories within dirpath.
1008 filenames : `list` of `str`
1009 Names of all the files within dirpath.
1010 """
1011 raise NotImplementedError()
1013 @classmethod
1014 def findFileResources(cls, candidates: Iterable[Union[str, ButlerURI]],
1015 file_filter: Optional[str] = None,
1016 grouped: bool = False) -> Iterator[Union[ButlerURI, Iterator[ButlerURI]]]:
1017 """Get all the files from a list of values.
1019 Parameters
1020 ----------
1021 candidates : iterable [`str` or `ButlerURI`]
1022 The files to return and directories in which to look for files to
1023 return.
1024 file_filter : `str`, optional
1025 The regex to use when searching for files within directories.
1026 By default returns all the found files.
1027 grouped : `bool`, optional
1028 If `True` the results will be grouped by directory and each
1029 yielded value will be an iterator over URIs. If `False` each
1030 URI will be returned separately.
1032 Yields
1033 ------
1034 found_file: `ButlerURI`
1035 The passed-in URIs and URIs found in passed-in directories.
1036 If grouping is enabled, each of the yielded values will be an
1037 iterator yielding members of the group. Files given explicitly
1038 will be returned as a single group at the end.
1040 Notes
1041 -----
1042 If a value is a file it is yielded immediately. If a value is a
1043 directory, all the files in the directory (recursively) that match
1044 the regex will be yielded in turn.
1045 """
1046 fileRegex = None if file_filter is None else re.compile(file_filter)
1048 singles = []
1050 # Find all the files of interest
1051 for location in candidates:
1052 uri = ButlerURI(location)
1053 if uri.isdir():
1054 for found in uri.walk(fileRegex):
1055 if not found:
1056 # This means the uri does not exist and by
1057 # convention we ignore it
1058 continue
1059 root, dirs, files = found
1060 if not files:
1061 continue
1062 if grouped:
1063 yield (root.join(name) for name in files)
1064 else:
1065 for name in files:
1066 yield root.join(name)
1067 else:
1068 if grouped:
1069 singles.append(uri)
1070 else:
1071 yield uri
1073 # Finally, return any explicitly given files in one group
1074 if grouped and singles:
1075 yield iter(singles)