Coverage for python/lsst/daf/butler/core/location.py : 18%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("Location", "LocationFactory", "ButlerURI")
26import os
27import os.path
28import urllib
29import posixpath
30from pathlib import Path, PurePath, PurePosixPath
31import copy
32import types
34from typing import (
35 Any,
36 Optional,
37 Tuple,
38 Union,
39)
41# Determine if the path separator for the OS looks like POSIX
42IS_POSIX = os.sep == posixpath.sep
44# Root path for this operating system
45OS_ROOT_PATH = Path().resolve().root
48def os2posix(ospath: str) -> str:
49 """Convert a local path description to a POSIX path description.
51 Parameters
52 ----------
53 path : `str`
54 Path using the local path separator.
56 Returns
57 -------
58 posix : `str`
59 Path using POSIX path separator
60 """
61 if IS_POSIX:
62 return ospath
64 posix = PurePath(ospath).as_posix()
66 # PurePath strips trailing "/" from paths such that you can no
67 # longer tell if a path is meant to be referring to a directory
68 # Try to fix this.
69 if ospath.endswith(os.sep) and not posix.endswith(posixpath.sep):
70 posix += posixpath.sep
72 return posix
75def posix2os(posix: Union[PurePath, str]) -> str:
76 """Convert a POSIX path description to a local path description.
78 Parameters
79 ----------
80 posix : `str`
81 Path using the POSIX path separator.
83 Returns
84 -------
85 ospath : `str`
86 Path using OS path separator
87 """
88 if IS_POSIX:
89 return str(posix)
91 posixPath = PurePosixPath(posix)
92 paths = list(posixPath.parts)
94 # Have to convert the root directory after splitting
95 if paths[0] == posixPath.root:
96 paths[0] = OS_ROOT_PATH
98 # Trailing "/" is stripped so we need to add back an empty path
99 # for consistency
100 if str(posix).endswith(posixpath.sep):
101 paths.append("")
103 return os.path.join(*paths)
106class ButlerURI:
107 """Convenience wrapper around URI parsers.
109 Provides access to URI components and can convert file
110 paths into absolute path URIs. Scheme-less URIs are treated as if
111 they are local file system paths and are converted to absolute URIs.
113 Parameters
114 ----------
115 uri : `str` or `urllib.parse.ParseResult`
116 URI in string form. Can be scheme-less if referring to a local
117 filesystem path.
118 root : `str`, optional
119 When fixing up a relative path in a ``file`` scheme or if scheme-less,
120 use this as the root. Must be absolute. If `None` the current
121 working directory will be used.
122 forceAbsolute : `bool`, optional
123 If `True`, scheme-less relative URI will be converted to an absolute
124 path using a ``file`` scheme. If `False` scheme-less URI will remain
125 scheme-less and will not be updated to ``file`` or absolute path.
126 forceDirectory: `bool`, optional
127 If `True` forces the URI to end with a separator, otherwise given URI
128 is interpreted as is.
129 """
131 def __init__(self, uri: Union[str, urllib.parse.ParseResult, ButlerURI],
132 root: Optional[str] = None, forceAbsolute: bool = True, forceDirectory: bool = False):
133 self._uri: urllib.parse.ParseResult
134 self.dirLike: bool
136 # Record if we need to post process the URI components
137 # or if the instance is already fully configured
138 is_configured = False
139 if isinstance(uri, str):
140 parsed = urllib.parse.urlparse(uri)
141 elif isinstance(uri, urllib.parse.ParseResult):
142 parsed = copy.copy(uri)
143 elif isinstance(uri, ButlerURI):
144 self._uri = copy.copy(uri._uri)
145 self.dirLike = uri.dirLike
146 # No further parsing required
147 is_configured = True
148 else:
149 raise ValueError(f"Supplied URI must be string, ButlerURI, or ParseResult but got '{uri!r}'")
151 if not is_configured:
152 parsed, dirLike = self._fixupPathUri(parsed, root=root,
153 forceAbsolute=forceAbsolute,
154 forceDirectory=forceDirectory)
156 self.dirLike = dirLike
157 self._uri = parsed
159 @property
160 def scheme(self) -> str:
161 """The URI scheme (``://`` is not part of the scheme)."""
162 return self._uri.scheme
164 @property
165 def netloc(self) -> str:
166 """The URI network location."""
167 return self._uri.netloc
169 @property
170 def path(self) -> str:
171 """The path component of the URI."""
172 return self._uri.path
174 @property
175 def ospath(self) -> str:
176 """Path component of the URI localized to current OS."""
177 if self.scheme == 's3':
178 raise AttributeError('S3 URIs have no OS path.')
179 return posix2os(self._uri.path)
181 @property
182 def relativeToPathRoot(self) -> str:
183 """Returns path relative to network location.
185 Effectively, this is the path property with posix separator stripped
186 from the left hand side of the path.
187 """
188 if not self.scheme:
189 p = PurePath(self.path)
190 else:
191 p = PurePosixPath(self.path)
192 relToRoot = str(p.relative_to(p.root))
193 if self.dirLike and not relToRoot.endswith("/"):
194 relToRoot += "/"
195 return relToRoot
197 @property
198 def fragment(self) -> str:
199 """The fragment component of the URI."""
200 return self._uri.fragment
202 @property
203 def params(self) -> str:
204 """Any parameters included in the URI."""
205 return self._uri.params
207 @property
208 def query(self) -> str:
209 """Any query strings included in the URI."""
210 return self._uri.query
212 def geturl(self) -> str:
213 """Return the URI in string form.
215 Returns
216 -------
217 url : `str`
218 String form of URI.
219 """
220 return self._uri.geturl()
222 def split(self) -> Tuple[ButlerURI, str]:
223 """Splits URI into head and tail. Equivalent to os.path.split where
224 head preserves the URI components.
226 Returns
227 -------
228 head: `ButlerURI`
229 Everything leading up to tail, expanded and normalized as per
230 ButlerURI rules.
231 tail : `str`
232 Last `self.path` component. Tail will be empty if path ends on a
233 separator. Tail will never contain separators.
234 """
235 if self.scheme:
236 head, tail = posixpath.split(self.path)
237 else:
238 head, tail = os.path.split(self.path)
239 headuri = self._uri._replace(path=head)
240 return self.__class__(headuri, forceDirectory=True), tail
242 def basename(self) -> str:
243 """Returns the base name, last element of path, of the URI. If URI ends
244 on a slash returns an empty string. This is the second element returned
245 by split().
247 Equivalent of os.path.basename().
249 Returns
250 -------
251 tail : `str`
252 Last part of the path attribute. Trail will be empty if path ends
253 on a separator.
254 """
255 return self.split()[1]
257 def dirname(self) -> ButlerURI:
258 """Returns a ButlerURI containing all the directories of the path
259 attribute.
261 Equivalent of os.path.dirname()
263 Returns
264 -------
265 head : `ButlerURI`
266 Everything except the tail of path attribute, expanded and
267 normalized as per ButlerURI rules.
268 """
269 return self.split()[0]
271 def replace(self, **kwargs: Any) -> ButlerURI:
272 """Replace components in a URI with new values and return a new
273 instance.
275 Returns
276 -------
277 new : `ButlerURI`
278 New `ButlerURI` object with updated values.
279 """
280 return self.__class__(self._uri._replace(**kwargs))
282 def updateFile(self, newfile: str) -> None:
283 """Update in place the final component of the path with the supplied
284 file name.
286 Parameters
287 ----------
288 newfile : `str`
289 File name with no path component.
291 Notes
292 -----
293 Updates the URI in place.
294 Updates the ButlerURI.dirLike attribute.
295 """
296 pathclass = posixpath if self.scheme else os.path
298 # Mypy can't work out that these specific modules support split
299 # and join
300 dir, _ = pathclass.split(self.path) # type: ignore
301 newpath = pathclass.join(dir, newfile) # type: ignore
303 self.dirLike = False
304 self._uri = self._uri._replace(path=newpath)
306 def getExtension(self) -> str:
307 """Return the file extension(s) associated with this URI path.
309 Returns
310 -------
311 ext : `str`
312 The file extension (including the ``.``). Can be empty string
313 if there is no file extension. Will return all file extensions
314 as a single extension such that ``file.fits.gz`` will return
315 a value of ``.fits.gz``.
316 """
317 if not self.scheme:
318 extensions = PurePath(self.path).suffixes
319 else:
320 extensions = PurePosixPath(self.path).suffixes
321 return "".join(extensions)
323 def __str__(self) -> str:
324 return self.geturl()
326 def __repr__(self) -> str:
327 return f'ButlerURI("{self.geturl()}")'
329 def __eq__(self, other: Any) -> bool:
330 if not isinstance(other, ButlerURI):
331 return False
332 return self.geturl() == other.geturl()
334 @staticmethod
335 def _fixupPathUri(parsed: urllib.parse.ParseResult, root: Optional[str] = None,
336 forceAbsolute: bool = False,
337 forceDirectory: bool = False) -> Tuple[urllib.parse.ParseResult, bool]:
338 """Fix up relative paths in URI instances.
340 Parameters
341 ----------
342 parsed : `~urllib.parse.ParseResult`
343 The result from parsing a URI using `urllib.parse`.
344 root : `str`, optional
345 Path to use as root when converting relative to absolute.
346 If `None`, it will be the current working directory. This
347 is a local file system path, not a URI.
348 forceAbsolute : `bool`, optional
349 If `True`, scheme-less relative URI will be converted to an
350 absolute path using a ``file`` scheme. If `False` scheme-less URI
351 will remain scheme-less and will not be updated to ``file`` or
352 absolute path. URIs with a defined scheme will not be affected
353 by this parameter.
354 forceDirectory : `bool`, optional
355 If `True` forces the URI to end with a separator, otherwise given
356 URI is interpreted as is.
358 Returns
359 -------
360 modified : `~urllib.parse.ParseResult`
361 Update result if a URI is being handled.
362 dirLike : `bool`
363 `True` if given parsed URI has a trailing separator or
364 forceDirectory is True. Otherwise `False`.
366 Notes
367 -----
368 Relative paths are explicitly not supported by RFC8089 but `urllib`
369 does accept URIs of the form ``file:relative/path.ext``. They need
370 to be turned into absolute paths before they can be used. This is
371 always done regardless of the ``forceAbsolute`` parameter.
373 AWS S3 differentiates between keys with trailing POSIX separators (i.e
374 `/dir` and `/dir/`) whereas POSIX does not neccessarily.
376 Scheme-less paths are normalized.
377 """
378 # assume we are not dealing with a directory like URI
379 dirLike = False
380 if not parsed.scheme or parsed.scheme == "file":
382 # Replacement values for the URI
383 replacements = {}
385 if root is None:
386 root = os.path.abspath(os.path.curdir)
388 if not parsed.scheme:
389 # if there was no scheme this is a local OS file path
390 # which can support tilde expansion.
391 expandedPath = os.path.expanduser(parsed.path)
393 # Ensure that this is a file URI if it is already absolute
394 if os.path.isabs(expandedPath):
395 replacements["scheme"] = "file"
396 replacements["path"] = os2posix(os.path.normpath(expandedPath))
397 elif forceAbsolute:
398 # This can stay in OS path form, do not change to file
399 # scheme.
400 replacements["path"] = os.path.normpath(os.path.join(root, expandedPath))
401 else:
402 # No change needed for relative local path staying relative
403 # except normalization
404 replacements["path"] = os.path.normpath(expandedPath)
405 # normalization of empty path returns "." so we are dirLike
406 if expandedPath == "":
407 dirLike = True
409 # normpath strips trailing "/" which makes it hard to keep
410 # track of directory vs file when calling replaceFile
411 # find the appropriate separator
412 if "scheme" in replacements:
413 sep = posixpath.sep
414 else:
415 sep = os.sep
417 # add the trailing separator only if explicitly required or
418 # if it was stripped by normpath. Acknowledge that trailing
419 # separator exists.
420 endsOnSep = expandedPath.endswith(os.sep) and not replacements["path"].endswith(sep)
421 if (forceDirectory or endsOnSep or dirLike):
422 dirLike = True
423 replacements["path"] += sep
425 elif parsed.scheme == "file":
426 # file URI implies POSIX path separators so split as POSIX,
427 # then join as os, and convert to abspath. Do not handle
428 # home directories since "file" scheme is explicitly documented
429 # to not do tilde expansion.
430 sep = posixpath.sep
431 if posixpath.isabs(parsed.path):
432 if forceDirectory:
433 parsed = parsed._replace(path=parsed.path+sep)
434 dirLike = True
435 return copy.copy(parsed), dirLike
437 replacements["path"] = posixpath.normpath(posixpath.join(os2posix(root), parsed.path))
439 # normpath strips trailing "/" so put it back if necessary
440 # Acknowledge that trailing separator exists.
441 if forceDirectory or (parsed.path.endswith(sep) and not replacements["path"].endswith(sep)):
442 replacements["path"] += sep
443 dirLike = True
444 else:
445 raise RuntimeError("Unexpectedly got confused by URI scheme")
447 # ParseResult is a NamedTuple so _replace is standard API
448 parsed = parsed._replace(**replacements)
450 # URI is dir-like if explicitly stated or if it ends on a separator
451 endsOnSep = parsed.path.endswith(posixpath.sep)
452 if forceDirectory or endsOnSep:
453 dirLike = True
454 # only add the separator if it's not already there
455 if not endsOnSep:
456 parsed = parsed._replace(path=parsed.path+posixpath.sep)
458 if dirLike is None:
459 raise RuntimeError("ButlerURI.dirLike attribute not set successfully.")
461 return parsed, dirLike
464class Location:
465 """Identifies a location within the `Datastore`.
467 Parameters
468 ----------
469 datastoreRootUri : `ButlerURI` or `str`
470 Base URI for this datastore, must include an absolute path.
471 path : `str`
472 Relative path within datastore. Assumed to be using the local
473 path separator if a ``file`` scheme is being used for the URI,
474 else a POSIX separator.
475 """
477 __slots__ = ("_datastoreRootUri", "_path")
479 def __init__(self, datastoreRootUri: Union[ButlerURI, str], path: str):
480 if isinstance(datastoreRootUri, str):
481 datastoreRootUri = ButlerURI(datastoreRootUri, forceDirectory=True)
482 elif not isinstance(datastoreRootUri, ButlerURI):
483 raise ValueError("Datastore root must be a ButlerURI instance")
485 if not posixpath.isabs(datastoreRootUri.path):
486 raise ValueError(f"Supplied URI must be an absolute path (given {datastoreRootUri}).")
488 self._datastoreRootUri = datastoreRootUri
490 pathModule: types.ModuleType
491 if self._datastoreRootUri.scheme == "file":
492 pathModule = os.path
493 else:
494 pathModule = posixpath
496 # mypy can not work out that these modules support isabs
497 if pathModule.isabs(path): # type: ignore
498 raise ValueError("Path within datastore must be relative not absolute")
500 self._path = path
502 def __str__(self) -> str:
503 return self.uri
505 def __repr__(self) -> str:
506 uri = self._datastoreRootUri.geturl()
507 path = self._path
508 return f"{self.__class__.__name__}({uri!r}, {path!r})"
510 @property
511 def uri(self) -> str:
512 """URI string corresponding to fully-specified location in datastore.
513 """
514 uriPath = os2posix(self.path)
515 return self._datastoreRootUri.replace(path=uriPath).geturl()
517 @property
518 def path(self) -> str:
519 """Path corresponding to location.
521 This path includes the root of the `Datastore`, but does not include
522 non-path components of the root URI. If a file URI scheme is being
523 used the path will be returned with the local OS path separator.
524 """
525 if not self._datastoreRootUri.scheme:
526 # Entirely local file system
527 return os.path.normpath(os.path.join(self._datastoreRootUri.path, self.pathInStore))
528 elif self._datastoreRootUri.scheme == "file":
529 return os.path.normpath(os.path.join(posix2os(self._datastoreRootUri.path), self.pathInStore))
530 else:
531 return posixpath.join(self._datastoreRootUri.path, self.pathInStore)
533 @property
534 def pathInStore(self) -> str:
535 """Path corresponding to location relative to `Datastore` root.
537 Uses the same path separator as supplied to the object constructor.
538 """
539 return self._path
541 @property
542 def netloc(self) -> str:
543 """The URI network location."""
544 return self._datastoreRootUri.netloc
546 @property
547 def relativeToPathRoot(self) -> str:
548 """Returns the path component of the URI relative to the network
549 location.
551 Effectively, this is the path property with POSIX separator stripped
552 from the left hand side of the path.
553 """
554 if self._datastoreRootUri.scheme == 'file' or not self._datastoreRootUri.scheme:
555 p = PurePath(os2posix(self.path))
556 else:
557 p = PurePosixPath(self.path)
558 stripped = p.relative_to(p.root)
559 return str(posix2os(stripped))
561 def updateExtension(self, ext: Optional[str]) -> None:
562 """Update the file extension associated with this `Location`.
564 All file extensions are replaced.
566 Parameters
567 ----------
568 ext : `str`
569 New extension. If an empty string is given any extension will
570 be removed. If `None` is given there will be no change.
571 """
572 if ext is None:
573 return
575 # Get the extension and remove it from the path if one is found
576 # .fits.gz counts as one extension do not use os.path.splitext
577 current = self.getExtension()
578 path = self.pathInStore
579 if current:
580 path = path[:-len(current)]
582 # Ensure that we have a leading "." on file extension (and we do not
583 # try to modify the empty string)
584 if ext and not ext.startswith("."):
585 ext = "." + ext
587 self._path = path + ext
589 def getExtension(self) -> str:
590 """Return the file extension(s) associated with this location.
592 Returns
593 -------
594 ext : `str`
595 The file extension (including the ``.``). Can be empty string
596 if there is no file extension. Will return all file extensions
597 as a single extension such that ``file.fits.gz`` will return
598 a value of ``.fits.gz``.
599 """
600 if not self._datastoreRootUri.scheme:
601 extensions = PurePath(self.path).suffixes
602 else:
603 extensions = PurePath(self.path).suffixes
604 return "".join(extensions)
607class LocationFactory:
608 """Factory for `Location` instances.
610 The factory is constructed from the root location of the datastore.
611 This location can be a path on the file system (absolute or relative)
612 or as a URI.
614 Parameters
615 ----------
616 datastoreRoot : `str`
617 Root location of the `Datastore` either as a path in the local
618 filesystem or as a URI. File scheme URIs can be used. If a local
619 filesystem path is used without URI scheme, it will be converted
620 to an absolute path and any home directory indicators expanded.
621 If a file scheme is used with a relative path, the path will
622 be treated as a posixpath but then converted to an absolute path.
623 """
625 def __init__(self, datastoreRoot: str):
626 self._datastoreRootUri = ButlerURI(datastoreRoot, forceAbsolute=True,
627 forceDirectory=True)
629 def __str__(self) -> str:
630 return f"{self.__class__.__name__}@{self._datastoreRootUri}"
632 @property
633 def netloc(self) -> str:
634 """Returns the network location of root location of the `Datastore`."""
635 return self._datastoreRootUri.netloc
637 def fromPath(self, path: str) -> Location:
638 """Factory function to create a `Location` from a POSIX path.
640 Parameters
641 ----------
642 path : `str`
643 A standard POSIX path, relative to the `Datastore` root.
645 Returns
646 -------
647 location : `Location`
648 The equivalent `Location`.
649 """
650 if os.path.isabs(path):
651 raise ValueError("LocationFactory path must be relative to datastore, not absolute.")
652 return Location(self._datastoreRootUri, path)