Coverage for python/lsst/daf/butler/core/_butlerUri/_butlerUri.py: 48%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24import contextlib
25import concurrent.futures
26import urllib.parse
27import posixpath
28import copy
29import logging
30import re
31import shutil
32import tempfile
34from random import Random
35from pathlib import Path, PurePath, PurePosixPath
37__all__ = ('ButlerURI',)
39from typing import (
40 TYPE_CHECKING,
41 Any,
42 Iterable,
43 Iterator,
44 List,
45 Dict,
46 Optional,
47 Tuple,
48 Type,
49 Union,
50)
52from .utils import NoTransaction
54if TYPE_CHECKING: 54 ↛ 55line 54 didn't jump to line 55, because the condition on line 54 was never true
55 from ..datastore import DatastoreTransaction
58log = logging.getLogger(__name__)
60# Regex for looking for URI escapes
61ESCAPES_RE = re.compile(r"%[A-F0-9]{2}")
63# Precomputed escaped hash
64ESCAPED_HASH = urllib.parse.quote("#")
66# Maximum number of worker threads for parallelized operations.
67# If greater than 10, be aware that this number has to be consistent
68# with connection pool sizing (for example in urllib3).
69MAX_WORKERS = 10
72class ButlerURI:
73 """Convenience wrapper around URI parsers.
75 Provides access to URI components and can convert file
76 paths into absolute path URIs. Scheme-less URIs are treated as if
77 they are local file system paths and are converted to absolute URIs.
79 A specialist subclass is created for each supported URI scheme.
81 Parameters
82 ----------
83 uri : `str` or `urllib.parse.ParseResult`
84 URI in string form. Can be scheme-less if referring to a local
85 filesystem path.
86 root : `str` or `ButlerURI`, optional
87 When fixing up a relative path in a ``file`` scheme or if scheme-less,
88 use this as the root. Must be absolute. If `None` the current
89 working directory will be used. Can be a file URI.
90 forceAbsolute : `bool`, optional
91 If `True`, scheme-less relative URI will be converted to an absolute
92 path using a ``file`` scheme. If `False` scheme-less URI will remain
93 scheme-less and will not be updated to ``file`` or absolute path.
94 forceDirectory: `bool`, optional
95 If `True` forces the URI to end with a separator, otherwise given URI
96 is interpreted as is.
97 isTemporary : `bool`, optional
98 If `True` indicates that this URI points to a temporary resource.
99 """
101 _pathLib: Type[PurePath] = PurePosixPath
102 """Path library to use for this scheme."""
104 _pathModule = posixpath
105 """Path module to use for this scheme."""
107 transferModes: Tuple[str, ...] = ("copy", "auto", "move")
108 """Transfer modes supported by this implementation.
110 Move is special in that it is generally a copy followed by an unlink.
111 Whether that unlink works depends critically on whether the source URI
112 implements unlink. If it does not the move will be reported as a failure.
113 """
115 transferDefault: str = "copy"
116 """Default mode to use for transferring if ``auto`` is specified."""
118 quotePaths = True
119 """True if path-like elements modifying a URI should be quoted.
121 All non-schemeless URIs have to internally use quoted paths. Therefore
122 if a new file name is given (e.g. to updatedFile or join) a decision must
123 be made whether to quote it to be consistent.
124 """
126 isLocal = False
127 """If `True` this URI refers to a local file."""
129 # This is not an ABC with abstract methods because the __new__ being
130 # a factory confuses mypy such that it assumes that every constructor
131 # returns a ButlerURI and then determines that all the abstract methods
132 # are still abstract. If they are not marked abstract but just raise
133 # mypy is fine with it.
135 # mypy is confused without these
136 _uri: urllib.parse.ParseResult
137 isTemporary: bool
138 dirLike: bool
140 def __new__(cls, uri: Union[str, urllib.parse.ParseResult, ButlerURI, Path],
141 root: Optional[Union[str, ButlerURI]] = None, forceAbsolute: bool = True,
142 forceDirectory: bool = False, isTemporary: bool = False) -> ButlerURI:
143 """Create and return new specialist ButlerURI subclass."""
144 parsed: urllib.parse.ParseResult
145 dirLike: bool = False
146 subclass: Optional[Type[ButlerURI]] = None
148 if isinstance(uri, Path): 148 ↛ 149line 148 didn't jump to line 149, because the condition on line 148 was never true
149 uri = str(uri)
151 # Record if we need to post process the URI components
152 # or if the instance is already fully configured
153 if isinstance(uri, str):
154 # Since local file names can have special characters in them
155 # we need to quote them for the parser but we can unquote
156 # later. Assume that all other URI schemes are quoted.
157 # Since sometimes people write file:/a/b and not file:///a/b
158 # we should not quote in the explicit case of file:
159 if "://" not in uri and not uri.startswith("file:"):
160 if ESCAPES_RE.search(uri): 160 ↛ 161line 160 didn't jump to line 161, because the condition on line 160 was never true
161 log.warning("Possible double encoding of %s", uri)
162 else:
163 uri = urllib.parse.quote(uri)
164 # Special case hash since we must support fragments
165 # even in schemeless URIs -- although try to only replace
166 # them in file part and not directory part
167 if ESCAPED_HASH in uri: 167 ↛ 168line 167 didn't jump to line 168, because the condition on line 167 was never true
168 dirpos = uri.rfind("/")
169 # Do replacement after this /
170 uri = uri[:dirpos+1] + uri[dirpos+1:].replace(ESCAPED_HASH, "#")
172 parsed = urllib.parse.urlparse(uri)
173 elif isinstance(uri, urllib.parse.ParseResult):
174 parsed = copy.copy(uri)
175 # If we are being instantiated with a subclass, rather than
176 # ButlerURI, ensure that that subclass is used directly.
177 # This could lead to inconsistencies if this constructor
178 # is used externally outside of the ButlerURI.replace() method.
179 # ButlerS3URI(urllib.parse.urlparse("file://a/b.txt"))
180 # will be a problem.
181 # This is needed to prevent a schemeless absolute URI become
182 # a file URI unexpectedly when calling updatedFile or
183 # updatedExtension
184 if cls is not ButlerURI:
185 parsed, dirLike = cls._fixDirectorySep(parsed, forceDirectory)
186 subclass = cls
188 elif isinstance(uri, ButlerURI): 188 ↛ 193line 188 didn't jump to line 193, because the condition on line 188 was never false
189 # Since ButlerURI is immutable we can return the argument
190 # unchanged.
191 return uri
192 else:
193 raise ValueError("Supplied URI must be string, Path, "
194 f"ButlerURI, or ParseResult but got '{uri!r}'")
196 if subclass is None:
197 # Work out the subclass from the URI scheme
198 if not parsed.scheme:
199 from .schemeless import ButlerSchemelessURI
200 subclass = ButlerSchemelessURI
201 elif parsed.scheme == "file": 201 ↛ 202line 201 didn't jump to line 202, because the condition on line 201 was never true
202 from .file import ButlerFileURI
203 subclass = ButlerFileURI
204 elif parsed.scheme == "s3": 204 ↛ 205line 204 didn't jump to line 205, because the condition on line 204 was never true
205 from .s3 import ButlerS3URI
206 subclass = ButlerS3URI
207 elif parsed.scheme.startswith("http"): 207 ↛ 208line 207 didn't jump to line 208, because the condition on line 207 was never true
208 from .http import ButlerHttpURI
209 subclass = ButlerHttpURI
210 elif parsed.scheme == "resource": 210 ↛ 214line 210 didn't jump to line 214, because the condition on line 210 was never false
211 # Rules for scheme names disallow pkg_resource
212 from .packageresource import ButlerPackageResourceURI
213 subclass = ButlerPackageResourceURI
214 elif parsed.scheme == "mem":
215 # in-memory datastore object
216 from .mem import ButlerInMemoryURI
217 subclass = ButlerInMemoryURI
218 else:
219 raise NotImplementedError(f"No URI support for scheme: '{parsed.scheme}'"
220 " in {parsed.geturl()}")
222 parsed, dirLike = subclass._fixupPathUri(parsed, root=root,
223 forceAbsolute=forceAbsolute,
224 forceDirectory=forceDirectory)
226 # It is possible for the class to change from schemeless
227 # to file so handle that
228 if parsed.scheme == "file": 228 ↛ 229line 228 didn't jump to line 229, because the condition on line 228 was never true
229 from .file import ButlerFileURI
230 subclass = ButlerFileURI
232 # Now create an instance of the correct subclass and set the
233 # attributes directly
234 self = object.__new__(subclass)
235 self._uri = parsed
236 self.dirLike = dirLike
237 self.isTemporary = isTemporary
238 return self
240 @property
241 def scheme(self) -> str:
242 """Return the URI scheme.
244 Notes
245 -----
246 (``://`` is not part of the scheme).
247 """
248 return self._uri.scheme
250 @property
251 def netloc(self) -> str:
252 """Return the URI network location."""
253 return self._uri.netloc
255 @property
256 def path(self) -> str:
257 """Return the path component of the URI."""
258 return self._uri.path
260 @property
261 def unquoted_path(self) -> str:
262 """Return path component of the URI with any URI quoting reversed."""
263 return urllib.parse.unquote(self._uri.path)
265 @property
266 def ospath(self) -> str:
267 """Return the path component of the URI localized to current OS."""
268 raise AttributeError(f"Non-file URI ({self}) has no local OS path.")
270 @property
271 def relativeToPathRoot(self) -> str:
272 """Return path relative to network location.
274 Effectively, this is the path property with posix separator stripped
275 from the left hand side of the path.
277 Always unquotes.
278 """
279 p = self._pathLib(self.path)
280 relToRoot = str(p.relative_to(p.root))
281 if self.dirLike and not relToRoot.endswith("/"): 281 ↛ 282line 281 didn't jump to line 282, because the condition on line 281 was never true
282 relToRoot += "/"
283 return urllib.parse.unquote(relToRoot)
285 @property
286 def is_root(self) -> bool:
287 """Return whether this URI points to the root of the network location.
289 This means that the path components refers to the top level.
290 """
291 relpath = self.relativeToPathRoot
292 if relpath == "./":
293 return True
294 return False
296 @property
297 def fragment(self) -> str:
298 """Return the fragment component of the URI."""
299 return self._uri.fragment
301 @property
302 def params(self) -> str:
303 """Return any parameters included in the URI."""
304 return self._uri.params
306 @property
307 def query(self) -> str:
308 """Return any query strings included in the URI."""
309 return self._uri.query
311 def geturl(self) -> str:
312 """Return the URI in string form.
314 Returns
315 -------
316 url : `str`
317 String form of URI.
318 """
319 return self._uri.geturl()
321 def root_uri(self) -> ButlerURI:
322 """Return the base root URI.
324 Returns
325 -------
326 uri : `ButlerURI`
327 root URI.
328 """
329 return self.replace(path="", forceDirectory=True)
331 def split(self) -> Tuple[ButlerURI, str]:
332 """Split URI into head and tail.
334 Returns
335 -------
336 head: `ButlerURI`
337 Everything leading up to tail, expanded and normalized as per
338 ButlerURI rules.
339 tail : `str`
340 Last `self.path` component. Tail will be empty if path ends on a
341 separator. Tail will never contain separators. It will be
342 unquoted.
344 Notes
345 -----
346 Equivalent to `os.path.split()` where head preserves the URI
347 components.
348 """
349 head, tail = self._pathModule.split(self.path)
350 headuri = self._uri._replace(path=head)
352 # The file part should never include quoted metacharacters
353 tail = urllib.parse.unquote(tail)
355 # Schemeless is special in that it can be a relative path
356 # We need to ensure that it stays that way. All other URIs will
357 # be absolute already.
358 forceAbsolute = self._pathModule.isabs(self.path)
359 return ButlerURI(headuri, forceDirectory=True, forceAbsolute=forceAbsolute), tail
361 def basename(self) -> str:
362 """Return the base name, last element of path, of the URI.
364 Returns
365 -------
366 tail : `str`
367 Last part of the path attribute. Trail will be empty if path ends
368 on a separator.
370 Notes
371 -----
372 If URI ends on a slash returns an empty string. This is the second
373 element returned by `split()`.
375 Equivalent of `os.path.basename()``.
376 """
377 return self.split()[1]
379 def dirname(self) -> ButlerURI:
380 """Return the directory component of the path as a new `ButlerURI`.
382 Returns
383 -------
384 head : `ButlerURI`
385 Everything except the tail of path attribute, expanded and
386 normalized as per ButlerURI rules.
388 Notes
389 -----
390 Equivalent of `os.path.dirname()`.
391 """
392 return self.split()[0]
394 def parent(self) -> ButlerURI:
395 """Return a `ButlerURI` of the parent directory.
397 Returns
398 -------
399 head : `ButlerURI`
400 Everything except the tail of path attribute, expanded and
401 normalized as per `ButlerURI` rules.
403 Notes
404 -----
405 For a file-like URI this will be the same as calling `dirname()`.
406 """
407 # When self is file-like, return self.dirname()
408 if not self.dirLike:
409 return self.dirname()
410 # When self is dir-like, return its parent directory,
411 # regardless of the presence of a trailing separator
412 originalPath = self._pathLib(self.path)
413 parentPath = originalPath.parent
414 return self.replace(path=str(parentPath), forceDirectory=True)
416 def replace(self, forceDirectory: bool = False, isTemporary: bool = False, **kwargs: Any) -> ButlerURI:
417 """Return new `ButlerURI` with specified components replaced.
419 Parameters
420 ----------
421 forceDirectory : `bool`, optional
422 Parameter passed to ButlerURI constructor to force this
423 new URI to be dir-like.
424 isTemporary : `bool`, optional
425 Indicate that the resulting URI is temporary resource.
426 **kwargs
427 Components of a `urllib.parse.ParseResult` that should be
428 modified for the newly-created `ButlerURI`.
430 Returns
431 -------
432 new : `ButlerURI`
433 New `ButlerURI` object with updated values.
435 Notes
436 -----
437 Does not, for now, allow a change in URI scheme.
438 """
439 # Disallow a change in scheme
440 if "scheme" in kwargs: 440 ↛ 441line 440 didn't jump to line 441, because the condition on line 440 was never true
441 raise ValueError(f"Can not use replace() method to change URI scheme for {self}")
442 return self.__class__(self._uri._replace(**kwargs), forceDirectory=forceDirectory,
443 isTemporary=isTemporary)
445 def updatedFile(self, newfile: str) -> ButlerURI:
446 """Return new URI with an updated final component of the path.
448 Parameters
449 ----------
450 newfile : `str`
451 File name with no path component.
453 Returns
454 -------
455 updated : `ButlerURI`
457 Notes
458 -----
459 Forces the ButlerURI.dirLike attribute to be false. The new file path
460 will be quoted if necessary.
461 """
462 if self.quotePaths:
463 newfile = urllib.parse.quote(newfile)
464 dir, _ = self._pathModule.split(self.path)
465 newpath = self._pathModule.join(dir, newfile)
467 updated = self.replace(path=newpath)
468 updated.dirLike = False
469 return updated
471 def updatedExtension(self, ext: Optional[str]) -> ButlerURI:
472 """Return a new `ButlerURI` with updated file extension.
474 All file extensions are replaced.
476 Parameters
477 ----------
478 ext : `str` or `None`
479 New extension. If an empty string is given any extension will
480 be removed. If `None` is given there will be no change.
482 Returns
483 -------
484 updated : `ButlerURI`
485 URI with the specified extension. Can return itself if
486 no extension was specified.
487 """
488 if ext is None:
489 return self
491 # Get the extension
492 current = self.getExtension()
494 # Nothing to do if the extension already matches
495 if current == ext:
496 return self
498 # Remove the current extension from the path
499 # .fits.gz counts as one extension do not use os.path.splitext
500 path = self.path
501 if current:
502 path = path[:-len(current)]
504 # Ensure that we have a leading "." on file extension (and we do not
505 # try to modify the empty string)
506 if ext and not ext.startswith("."):
507 ext = "." + ext
509 return self.replace(path=path + ext)
511 def getExtension(self) -> str:
512 """Return the file extension(s) associated with this URI path.
514 Returns
515 -------
516 ext : `str`
517 The file extension (including the ``.``). Can be empty string
518 if there is no file extension. Usually returns only the last
519 file extension unless there is a special extension modifier
520 indicating file compression, in which case the combined
521 extension (e.g. ``.fits.gz``) will be returned.
522 """
523 special = {".gz", ".bz2", ".xz", ".fz"}
525 # Get the file part of the path so as not to be confused by
526 # "." in directory names.
527 basename = self.basename()
528 extensions = self._pathLib(basename).suffixes
530 if not extensions: 530 ↛ 531line 530 didn't jump to line 531, because the condition on line 530 was never true
531 return ""
533 ext = extensions.pop()
535 # Multiple extensions, decide whether to include the final two
536 if extensions and ext in special: 536 ↛ 537line 536 didn't jump to line 537, because the condition on line 536 was never true
537 ext = f"{extensions[-1]}{ext}"
539 return ext
541 def join(self, path: Union[str, ButlerURI], isTemporary: bool = False) -> ButlerURI:
542 """Return new `ButlerURI` with additional path components.
544 Parameters
545 ----------
546 path : `str`, `ButlerURI`
547 Additional file components to append to the current URI. Assumed
548 to include a file at the end. Will be quoted depending on the
549 associated URI scheme. If the path looks like a URI with a scheme
550 referring to an absolute location, it will be returned
551 directly (matching the behavior of `os.path.join()`). It can
552 also be a `ButlerURI`.
553 isTemporary : `bool`, optional
554 Indicate that the resulting URI represents a temporary resource.
556 Returns
557 -------
558 new : `ButlerURI`
559 New URI with any file at the end replaced with the new path
560 components.
562 Notes
563 -----
564 Schemeless URIs assume local path separator but all other URIs assume
565 POSIX separator if the supplied path has directory structure. It
566 may be this never becomes a problem but datastore templates assume
567 POSIX separator is being used.
569 If an absolute `ButlerURI` is given for ``path`` is is assumed that
570 this should be returned directly. Giving a ``path`` of an absolute
571 scheme-less URI is not allowed for safety reasons as it may indicate
572 a mistake in the calling code.
574 Raises
575 ------
576 ValueError
577 Raised if the ``path`` is an absolute scheme-less URI. In that
578 situation it is unclear whether the intent is to return a
579 ``file`` URI or it was a mistake and a relative scheme-less URI
580 was meant.
581 """
582 # If we have a full URI in path we will use it directly
583 # but without forcing to absolute so that we can trap the
584 # expected option of relative path.
585 path_uri = ButlerURI(path, forceAbsolute=False)
586 if path_uri.scheme: 586 ↛ 589line 586 didn't jump to line 589, because the condition on line 586 was never true
587 # Check for scheme so can distinguish explicit URIs from
588 # absolute scheme-less URIs.
589 return path_uri
591 if path_uri.isabs(): 591 ↛ 593line 591 didn't jump to line 593, because the condition on line 591 was never true
592 # Absolute scheme-less path.
593 raise ValueError(f"Can not join absolute scheme-less {path_uri!r} to another URI.")
595 # If this was originally a ButlerURI extract the unquoted path from it.
596 # Otherwise we use the string we were given to allow "#" to appear
597 # in the filename if given as a plain string.
598 if not isinstance(path, str): 598 ↛ 599line 598 didn't jump to line 599, because the condition on line 598 was never true
599 path = path_uri.unquoted_path
601 new = self.dirname() # By definition a directory URI
603 # new should be asked about quoting, not self, since dirname can
604 # change the URI scheme for schemeless -> file
605 if new.quotePaths: 605 ↛ 608line 605 didn't jump to line 608, because the condition on line 605 was never false
606 path = urllib.parse.quote(path)
608 newpath = self._pathModule.normpath(self._pathModule.join(new.path, path))
610 # normpath can strip trailing / so we force directory if the supplied
611 # path ended with a /
612 return new.replace(path=newpath, forceDirectory=path.endswith(self._pathModule.sep),
613 isTemporary=isTemporary)
615 def relative_to(self, other: ButlerURI) -> Optional[str]:
616 """Return the relative path from this URI to the other URI.
618 Parameters
619 ----------
620 other : `ButlerURI`
621 URI to use to calculate the relative path. Must be a parent
622 of this URI.
624 Returns
625 -------
626 subpath : `str`
627 The sub path of this URI relative to the supplied other URI.
628 Returns `None` if there is no parent child relationship.
629 Scheme and netloc must match.
630 """
631 # Scheme-less absolute other is treated as if it's a file scheme.
632 # Scheme-less relative other can only return non-None if self
633 # is also scheme-less relative and that is handled specifically
634 # in a subclass.
635 if not other.scheme and other.isabs():
636 other = other.abspath()
638 # Scheme-less self is handled elsewhere.
639 if self.scheme != other.scheme or self.netloc != other.netloc:
640 return None
642 enclosed_path = self._pathLib(self.relativeToPathRoot)
643 parent_path = other.relativeToPathRoot
644 subpath: Optional[str]
645 try:
646 subpath = str(enclosed_path.relative_to(parent_path))
647 except ValueError:
648 subpath = None
649 else:
650 subpath = urllib.parse.unquote(subpath)
651 return subpath
653 def exists(self) -> bool:
654 """Indicate that the resource is available.
656 Returns
657 -------
658 exists : `bool`
659 `True` if the resource exists.
660 """
661 raise NotImplementedError()
663 @classmethod
664 def mexists(cls, uris: Iterable[ButlerURI]) -> Dict[ButlerURI, bool]:
665 """Check for existence of multiple URIs at once.
667 Parameters
668 ----------
669 uris : iterable of `ButlerURI`
670 The URIs to test.
672 Returns
673 -------
674 existence : `dict` of [`ButlerURI`, `bool`]
675 Mapping of original URI to boolean indicating existence.
676 """
677 exists_executor = concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS)
678 future_exists = {exists_executor.submit(uri.exists): uri for uri in uris}
680 results: Dict[ButlerURI, bool] = {}
681 for future in concurrent.futures.as_completed(future_exists):
682 uri = future_exists[future]
683 try:
684 exists = future.result()
685 except Exception:
686 exists = False
687 results[uri] = exists
688 return results
690 def remove(self) -> None:
691 """Remove the resource."""
692 raise NotImplementedError()
694 def isabs(self) -> bool:
695 """Indicate that the resource is fully specified.
697 For non-schemeless URIs this is always true.
699 Returns
700 -------
701 isabs : `bool`
702 `True` in all cases except schemeless URI.
703 """
704 return True
706 def abspath(self) -> ButlerURI:
707 """Return URI using an absolute path.
709 Returns
710 -------
711 abs : `ButlerURI`
712 Absolute URI. For non-schemeless URIs this always returns itself.
713 Schemeless URIs are upgraded to file URIs.
714 """
715 return self
717 def _as_local(self) -> Tuple[str, bool]:
718 """Return the location of the (possibly remote) resource as local file.
720 This is a helper function for `as_local` context manager.
722 Returns
723 -------
724 path : `str`
725 If this is a remote resource, it will be a copy of the resource
726 on the local file system, probably in a temporary directory.
727 For a local resource this should be the actual path to the
728 resource.
729 is_temporary : `bool`
730 Indicates if the local path is a temporary file or not.
731 """
732 raise NotImplementedError()
734 @contextlib.contextmanager
735 def as_local(self) -> Iterator[ButlerURI]:
736 """Return the location of the (possibly remote) resource as local file.
738 Yields
739 ------
740 local : `ButlerURI`
741 If this is a remote resource, it will be a copy of the resource
742 on the local file system, probably in a temporary directory.
743 For a local resource this should be the actual path to the
744 resource.
746 Notes
747 -----
748 The context manager will automatically delete any local temporary
749 file.
751 Examples
752 --------
753 Should be used as a context manager:
755 .. code-block:: py
757 with uri.as_local() as local:
758 ospath = local.ospath
759 """
760 local_src, is_temporary = self._as_local()
761 local_uri = ButlerURI(local_src, isTemporary=is_temporary)
763 try:
764 yield local_uri
765 finally:
766 # The caller might have relocated the temporary file
767 if is_temporary and local_uri.exists():
768 local_uri.remove()
770 @classmethod
771 @contextlib.contextmanager
772 def temporary_uri(cls, prefix: Optional[ButlerURI] = None,
773 suffix: Optional[str] = None) -> Iterator[ButlerURI]:
774 """Create a temporary URI.
776 Parameters
777 ----------
778 prefix : `ButlerURI`, optional
779 Prefix to use. Without this the path will be formed as a local
780 file URI in a temporary directory. Ensuring that the prefix
781 location exists is the responsibility of the caller.
782 suffix : `str`, optional
783 A file suffix to be used. The ``.`` should be included in this
784 suffix.
786 Yields
787 ------
788 uri : `ButlerURI`
789 The temporary URI. Will be removed when the context is completed.
790 """
791 use_tempdir = False
792 if prefix is None:
793 prefix = ButlerURI(tempfile.mkdtemp(), forceDirectory=True, isTemporary=True)
794 # Record that we need to delete this directory. Can not rely
795 # on isTemporary flag since an external prefix may have that
796 # set as well.
797 use_tempdir = True
799 # Need to create a randomized file name. For consistency do not
800 # use mkstemp for local and something else for remote. Additionally
801 # this method does not create the file to prevent name clashes.
802 characters = "abcdefghijklmnopqrstuvwxyz0123456789_"
803 rng = Random()
804 tempname = "".join(rng.choice(characters) for _ in range(16))
805 if suffix:
806 tempname += suffix
807 temporary_uri = prefix.join(tempname, isTemporary=True)
809 try:
810 yield temporary_uri
811 finally:
812 if use_tempdir:
813 shutil.rmtree(prefix.ospath, ignore_errors=True)
814 else:
815 try:
816 # It's okay if this does not work because the user removed
817 # the file.
818 temporary_uri.remove()
819 except FileNotFoundError:
820 pass
822 def read(self, size: int = -1) -> bytes:
823 """Open the resource and return the contents in bytes.
825 Parameters
826 ----------
827 size : `int`, optional
828 The number of bytes to read. Negative or omitted indicates
829 that all data should be read.
830 """
831 raise NotImplementedError()
833 def write(self, data: bytes, overwrite: bool = True) -> None:
834 """Write the supplied bytes to the new resource.
836 Parameters
837 ----------
838 data : `bytes`
839 The bytes to write to the resource. The entire contents of the
840 resource will be replaced.
841 overwrite : `bool`, optional
842 If `True` the resource will be overwritten if it exists. Otherwise
843 the write will fail.
844 """
845 raise NotImplementedError()
847 def mkdir(self) -> None:
848 """For a dir-like URI, create the directory resource if needed."""
849 raise NotImplementedError()
851 def isdir(self) -> bool:
852 """Return True if this URI looks like a directory, else False."""
853 return self.dirLike
855 def size(self) -> int:
856 """For non-dir-like URI, return the size of the resource.
858 Returns
859 -------
860 sz : `int`
861 The size in bytes of the resource associated with this URI.
862 Returns 0 if dir-like.
863 """
864 raise NotImplementedError()
866 def __str__(self) -> str:
867 """Convert the URI to its native string form."""
868 return self.geturl()
870 def __repr__(self) -> str:
871 """Return string representation suitable for evaluation."""
872 return f'ButlerURI("{self.geturl()}")'
874 def __eq__(self, other: Any) -> bool:
875 """Compare supplied object with this `ButlerURI`."""
876 if not isinstance(other, ButlerURI):
877 return NotImplemented
878 return self.geturl() == other.geturl()
880 def __hash__(self) -> int:
881 """Return hash of this object."""
882 return hash(str(self))
884 def __copy__(self) -> ButlerURI:
885 """Copy constructor.
887 Object is immutable so copy can return itself.
888 """
889 # Implement here because the __new__ method confuses things
890 return self
892 def __deepcopy__(self, memo: Any) -> ButlerURI:
893 """Deepcopy the object.
895 Object is immutable so copy can return itself.
896 """
897 # Implement here because the __new__ method confuses things
898 return self
900 def __getnewargs__(self) -> Tuple:
901 """Support pickling."""
902 return (str(self),)
904 @classmethod
905 def _fixDirectorySep(cls, parsed: urllib.parse.ParseResult,
906 forceDirectory: bool = False) -> Tuple[urllib.parse.ParseResult, bool]:
907 """Ensure that a path separator is present on directory paths.
909 Parameters
910 ----------
911 parsed : `~urllib.parse.ParseResult`
912 The result from parsing a URI using `urllib.parse`.
913 forceDirectory : `bool`, optional
914 If `True` forces the URI to end with a separator, otherwise given
915 URI is interpreted as is. Specifying that the URI is conceptually
916 equivalent to a directory can break some ambiguities when
917 interpreting the last element of a path.
919 Returns
920 -------
921 modified : `~urllib.parse.ParseResult`
922 Update result if a URI is being handled.
923 dirLike : `bool`
924 `True` if given parsed URI has a trailing separator or
925 forceDirectory is True. Otherwise `False`.
926 """
927 # assume we are not dealing with a directory like URI
928 dirLike = False
930 # Directory separator
931 sep = cls._pathModule.sep
933 # URI is dir-like if explicitly stated or if it ends on a separator
934 endsOnSep = parsed.path.endswith(sep)
935 if forceDirectory or endsOnSep:
936 dirLike = True
937 # only add the separator if it's not already there
938 if not endsOnSep: 938 ↛ 941line 938 didn't jump to line 941, because the condition on line 938 was never false
939 parsed = parsed._replace(path=parsed.path+sep)
941 return parsed, dirLike
943 @classmethod
944 def _fixupPathUri(cls, parsed: urllib.parse.ParseResult, root: Optional[Union[str, ButlerURI]] = None,
945 forceAbsolute: bool = False,
946 forceDirectory: bool = False) -> Tuple[urllib.parse.ParseResult, bool]:
947 """Correct any issues with the supplied URI.
949 Parameters
950 ----------
951 parsed : `~urllib.parse.ParseResult`
952 The result from parsing a URI using `urllib.parse`.
953 root : `str` or `ButlerURI`, ignored
954 Not used by the this implementation since all URIs are
955 absolute except for those representing the local file system.
956 forceAbsolute : `bool`, ignored.
957 Not used by this implementation. URIs are generally always
958 absolute.
959 forceDirectory : `bool`, optional
960 If `True` forces the URI to end with a separator, otherwise given
961 URI is interpreted as is. Specifying that the URI is conceptually
962 equivalent to a directory can break some ambiguities when
963 interpreting the last element of a path.
965 Returns
966 -------
967 modified : `~urllib.parse.ParseResult`
968 Update result if a URI is being handled.
969 dirLike : `bool`
970 `True` if given parsed URI has a trailing separator or
971 forceDirectory is True. Otherwise `False`.
973 Notes
974 -----
975 Relative paths are explicitly not supported by RFC8089 but `urllib`
976 does accept URIs of the form ``file:relative/path.ext``. They need
977 to be turned into absolute paths before they can be used. This is
978 always done regardless of the ``forceAbsolute`` parameter.
980 AWS S3 differentiates between keys with trailing POSIX separators (i.e
981 `/dir` and `/dir/`) whereas POSIX does not neccessarily.
983 Scheme-less paths are normalized.
984 """
985 return cls._fixDirectorySep(parsed, forceDirectory)
987 def transfer_from(self, src: ButlerURI, transfer: str,
988 overwrite: bool = False,
989 transaction: Optional[Union[DatastoreTransaction, NoTransaction]] = None) -> None:
990 """Transfer the current resource to a new location.
992 Parameters
993 ----------
994 src : `ButlerURI`
995 Source URI.
996 transfer : `str`
997 Mode to use for transferring the resource. Generically there are
998 many standard options: copy, link, symlink, hardlink, relsymlink.
999 Not all URIs support all modes.
1000 overwrite : `bool`, optional
1001 Allow an existing file to be overwritten. Defaults to `False`.
1002 transaction : `DatastoreTransaction`, optional
1003 A transaction object that can (depending on implementation)
1004 rollback transfers on error. Not guaranteed to be implemented.
1006 Notes
1007 -----
1008 Conceptually this is hard to scale as the number of URI schemes
1009 grow. The destination URI is more important than the source URI
1010 since that is where all the transfer modes are relevant (with the
1011 complication that "move" deletes the source).
1013 Local file to local file is the fundamental use case but every
1014 other scheme has to support "copy" to local file (with implicit
1015 support for "move") and copy from local file.
1016 All the "link" options tend to be specific to local file systems.
1018 "move" is a "copy" where the remote resource is deleted at the end.
1019 Whether this works depends on the source URI rather than the
1020 destination URI. Reverting a move on transaction rollback is
1021 expected to be problematic if a remote resource was involved.
1022 """
1023 raise NotImplementedError(f"No transfer modes supported by URI scheme {self.scheme}")
1025 def walk(self, file_filter: Optional[Union[str, re.Pattern]] = None) -> Iterator[Union[List,
1026 Tuple[ButlerURI,
1027 List[str],
1028 List[str]]]]:
1029 """Walk the directory tree returning matching files and directories.
1031 Parameters
1032 ----------
1033 file_filter : `str` or `re.Pattern`, optional
1034 Regex to filter out files from the list before it is returned.
1036 Yields
1037 ------
1038 dirpath : `ButlerURI`
1039 Current directory being examined.
1040 dirnames : `list` of `str`
1041 Names of subdirectories within dirpath.
1042 filenames : `list` of `str`
1043 Names of all the files within dirpath.
1044 """
1045 raise NotImplementedError()
1047 @classmethod
1048 def findFileResources(cls, candidates: Iterable[Union[str, ButlerURI]],
1049 file_filter: Optional[str] = None,
1050 grouped: bool = False) -> Iterator[Union[ButlerURI, Iterator[ButlerURI]]]:
1051 """Get all the files from a list of values.
1053 Parameters
1054 ----------
1055 candidates : iterable [`str` or `ButlerURI`]
1056 The files to return and directories in which to look for files to
1057 return.
1058 file_filter : `str`, optional
1059 The regex to use when searching for files within directories.
1060 By default returns all the found files.
1061 grouped : `bool`, optional
1062 If `True` the results will be grouped by directory and each
1063 yielded value will be an iterator over URIs. If `False` each
1064 URI will be returned separately.
1066 Yields
1067 ------
1068 found_file: `ButlerURI`
1069 The passed-in URIs and URIs found in passed-in directories.
1070 If grouping is enabled, each of the yielded values will be an
1071 iterator yielding members of the group. Files given explicitly
1072 will be returned as a single group at the end.
1074 Notes
1075 -----
1076 If a value is a file it is yielded immediately. If a value is a
1077 directory, all the files in the directory (recursively) that match
1078 the regex will be yielded in turn.
1079 """
1080 fileRegex = None if file_filter is None else re.compile(file_filter)
1082 singles = []
1084 # Find all the files of interest
1085 for location in candidates:
1086 uri = ButlerURI(location)
1087 if uri.isdir():
1088 for found in uri.walk(fileRegex):
1089 if not found:
1090 # This means the uri does not exist and by
1091 # convention we ignore it
1092 continue
1093 root, dirs, files = found
1094 if not files:
1095 continue
1096 if grouped:
1097 yield (root.join(name) for name in files)
1098 else:
1099 for name in files:
1100 yield root.join(name)
1101 else:
1102 if grouped:
1103 singles.append(uri)
1104 else:
1105 yield uri
1107 # Finally, return any explicitly given files in one group
1108 if grouped and singles:
1109 yield iter(singles)