Coverage for python/lsst/daf/butler/core/location.py : 18%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("Location", "LocationFactory", "ButlerURI")
26import os
27import os.path
28import urllib
29import posixpath
30from pathlib import Path, PurePath, PurePosixPath
31import copy
32import types
34from typing import (
35 Any,
36 Optional,
37 Tuple,
38 Union,
39)
41# Determine if the path separator for the OS looks like POSIX
42IS_POSIX = os.sep == posixpath.sep
44# Root path for this operating system
45OS_ROOT_PATH = Path().resolve().root
48def os2posix(ospath: str) -> str:
49 """Convert a local path description to a POSIX path description.
51 Parameters
52 ----------
53 path : `str`
54 Path using the local path separator.
56 Returns
57 -------
58 posix : `str`
59 Path using POSIX path separator
60 """
61 if IS_POSIX:
62 return ospath
64 posix = PurePath(ospath).as_posix()
66 # PurePath strips trailing "/" from paths such that you can no
67 # longer tell if a path is meant to be referring to a directory
68 # Try to fix this.
69 if ospath.endswith(os.sep) and not posix.endswith(posixpath.sep):
70 posix += posixpath.sep
72 return posix
75def posix2os(posix: Union[PurePath, str]) -> str:
76 """Convert a POSIX path description to a local path description.
78 Parameters
79 ----------
80 posix : `str`
81 Path using the POSIX path separator.
83 Returns
84 -------
85 ospath : `str`
86 Path using OS path separator
87 """
88 if IS_POSIX:
89 return str(posix)
91 posixPath = PurePosixPath(posix)
92 paths = list(posixPath.parts)
94 # Have to convert the root directory after splitting
95 if paths[0] == posixPath.root:
96 paths[0] = OS_ROOT_PATH
98 # Trailing "/" is stripped so we need to add back an empty path
99 # for consistency
100 if str(posix).endswith(posixpath.sep):
101 paths.append("")
103 return os.path.join(*paths)
106class ButlerURI:
107 """Convenience wrapper around URI parsers.
109 Provides access to URI components and can convert file
110 paths into absolute path URIs. Scheme-less URIs are treated as if
111 they are local file system paths and are converted to absolute URIs.
113 Parameters
114 ----------
115 uri : `str` or `urllib.parse.ParseResult`
116 URI in string form. Can be scheme-less if referring to a local
117 filesystem path.
118 root : `str`, optional
119 When fixing up a relative path in a ``file`` scheme or if scheme-less,
120 use this as the root. Must be absolute. If `None` the current
121 working directory will be used.
122 forceAbsolute : `bool`, optional
123 If `True`, scheme-less relative URI will be converted to an absolute
124 path using a ``file`` scheme. If `False` scheme-less URI will remain
125 scheme-less and will not be updated to ``file`` or absolute path.
126 forceDirectory: `bool`, optional
127 If `True` forces the URI to end with a separator, otherwise given URI
128 is interpreted as is.
129 """
131 def __init__(self, uri: Union[str, urllib.parse.ParseResult],
132 root: Optional[str] = None, forceAbsolute: bool = True, forceDirectory: bool = False):
133 if isinstance(uri, str):
134 parsed = urllib.parse.urlparse(uri)
135 elif isinstance(uri, urllib.parse.ParseResult):
136 parsed = copy.copy(uri)
137 else:
138 raise ValueError("Supplied URI must be either string or ParseResult")
140 parsed, dirLike = self._fixupPathUri(parsed, root=root,
141 forceAbsolute=forceAbsolute,
142 forceDirectory=forceDirectory)
144 self.dirLike = dirLike
145 self._uri = parsed
147 @property
148 def scheme(self) -> str:
149 """The URI scheme (``://`` is not part of the scheme)."""
150 return self._uri.scheme
152 @property
153 def netloc(self) -> str:
154 """The URI network location."""
155 return self._uri.netloc
157 @property
158 def path(self) -> str:
159 """The path component of the URI."""
160 return self._uri.path
162 @property
163 def ospath(self) -> str:
164 """Path component of the URI localized to current OS."""
165 if self.scheme == 's3':
166 raise AttributeError('S3 URIs have no OS path.')
167 return posix2os(self._uri.path)
169 @property
170 def relativeToPathRoot(self) -> str:
171 """Returns path relative to network location.
173 Effectively, this is the path property with posix separator stripped
174 from the left hand side of the path.
175 """
176 if not self.scheme:
177 p = PurePath(self.path)
178 else:
179 p = PurePosixPath(self.path)
180 relToRoot = str(p.relative_to(p.root))
181 if self.dirLike and not relToRoot.endswith("/"):
182 relToRoot += "/"
183 return relToRoot
185 @property
186 def fragment(self) -> str:
187 """The fragment component of the URI."""
188 return self._uri.fragment
190 @property
191 def params(self) -> str:
192 """Any parameters included in the URI."""
193 return self._uri.params
195 @property
196 def query(self) -> str:
197 """Any query strings included in the URI."""
198 return self._uri.query
200 def geturl(self) -> str:
201 """Return the URI in string form.
203 Returns
204 -------
205 url : `str`
206 String form of URI.
207 """
208 return self._uri.geturl()
210 def split(self) -> Tuple[ButlerURI, str]:
211 """Splits URI into head and tail. Equivalent to os.path.split where
212 head preserves the URI components.
214 Returns
215 -------
216 head: `ButlerURI`
217 Everything leading up to tail, expanded and normalized as per
218 ButlerURI rules.
219 tail : `str`
220 Last `self.path` component. Tail will be empty if path ends on a
221 separator. Tail will never contain separators.
222 """
223 if self.scheme:
224 head, tail = posixpath.split(self.path)
225 else:
226 head, tail = os.path.split(self.path)
227 headuri = self._uri._replace(path=head)
228 return self.__class__(headuri, forceDirectory=True), tail
230 def basename(self) -> str:
231 """Returns the base name, last element of path, of the URI. If URI ends
232 on a slash returns an empty string. This is the second element returned
233 by split().
235 Equivalent of os.path.basename().
237 Returns
238 -------
239 tail : `str`
240 Last part of the path attribute. Trail will be empty if path ends
241 on a separator.
242 """
243 return self.split()[1]
245 def dirname(self) -> ButlerURI:
246 """Returns a ButlerURI containing all the directories of the path
247 attribute.
249 Equivalent of os.path.dirname()
251 Returns
252 -------
253 head : `ButlerURI`
254 Everything except the tail of path attribute, expanded and
255 normalized as per ButlerURI rules.
256 """
257 return self.split()[0]
259 def replace(self, **kwargs: Any) -> ButlerURI:
260 """Replace components in a URI with new values and return a new
261 instance.
263 Returns
264 -------
265 new : `ButlerURI`
266 New `ButlerURI` object with updated values.
267 """
268 return self.__class__(self._uri._replace(**kwargs))
270 def updateFile(self, newfile: str) -> None:
271 """Update in place the final component of the path with the supplied
272 file name.
274 Parameters
275 ----------
276 newfile : `str`
277 File name with no path component.
279 Notes
280 -----
281 Updates the URI in place.
282 Updates the ButlerURI.dirLike attribute.
283 """
284 pathclass = posixpath if self.scheme else os.path
286 # Mypy can't work out that these specific modules support split
287 # and join
288 dir, _ = pathclass.split(self.path) # type: ignore
289 newpath = pathclass.join(dir, newfile) # type: ignore
291 self.dirLike = False
292 self._uri = self._uri._replace(path=newpath)
294 def getExtension(self) -> str:
295 """Return the file extension(s) associated with this URI path.
297 Returns
298 -------
299 ext : `str`
300 The file extension (including the ``.``). Can be empty string
301 if there is no file extension. Will return all file extensions
302 as a single extension such that ``file.fits.gz`` will return
303 a value of ``.fits.gz``.
304 """
305 if not self.scheme:
306 extensions = PurePath(self.path).suffixes
307 else:
308 extensions = PurePosixPath(self.path).suffixes
309 return "".join(extensions)
311 def __str__(self) -> str:
312 return self.geturl()
314 def __repr__(self) -> str:
315 return f'ButlerURI("{self.geturl()}")'
317 def __eq__(self, other: Any) -> bool:
318 if not isinstance(other, ButlerURI):
319 return False
320 return self.geturl() == other.geturl()
322 @staticmethod
323 def _fixupPathUri(parsed: urllib.parse.ParseResult, root: Optional[str] = None,
324 forceAbsolute: bool = False,
325 forceDirectory: bool = False) -> Tuple[urllib.parse.ParseResult, bool]:
326 """Fix up relative paths in URI instances.
328 Parameters
329 ----------
330 parsed : `~urllib.parse.ParseResult`
331 The result from parsing a URI using `urllib.parse`.
332 root : `str`, optional
333 Path to use as root when converting relative to absolute.
334 If `None`, it will be the current working directory. This
335 is a local file system path, not a URI.
336 forceAbsolute : `bool`, optional
337 If `True`, scheme-less relative URI will be converted to an
338 absolute path using a ``file`` scheme. If `False` scheme-less URI
339 will remain scheme-less and will not be updated to ``file`` or
340 absolute path. URIs with a defined scheme will not be affected
341 by this parameter.
342 forceDirectory : `bool`, optional
343 If `True` forces the URI to end with a separator, otherwise given
344 URI is interpreted as is.
346 Returns
347 -------
348 modified : `~urllib.parse.ParseResult`
349 Update result if a URI is being handled.
350 dirLike : `bool`
351 `True` if given parsed URI has a trailing separator or
352 forceDirectory is True. Otherwise `False`.
354 Notes
355 -----
356 Relative paths are explicitly not supported by RFC8089 but `urllib`
357 does accept URIs of the form ``file:relative/path.ext``. They need
358 to be turned into absolute paths before they can be used. This is
359 always done regardless of the ``forceAbsolute`` parameter.
361 AWS S3 differentiates between keys with trailing POSIX separators (i.e
362 `/dir` and `/dir/`) whereas POSIX does not neccessarily.
364 Scheme-less paths are normalized.
365 """
366 # assume we are not dealing with a directory like URI
367 dirLike = False
368 if not parsed.scheme or parsed.scheme == "file":
370 # Replacement values for the URI
371 replacements = {}
373 if root is None:
374 root = os.path.abspath(os.path.curdir)
376 if not parsed.scheme:
377 # if there was no scheme this is a local OS file path
378 # which can support tilde expansion.
379 expandedPath = os.path.expanduser(parsed.path)
381 # Ensure that this is a file URI if it is already absolute
382 if os.path.isabs(expandedPath):
383 replacements["scheme"] = "file"
384 replacements["path"] = os2posix(os.path.normpath(expandedPath))
385 elif forceAbsolute:
386 # This can stay in OS path form, do not change to file
387 # scheme.
388 replacements["path"] = os.path.normpath(os.path.join(root, expandedPath))
389 else:
390 # No change needed for relative local path staying relative
391 # except normalization
392 replacements["path"] = os.path.normpath(expandedPath)
393 # normalization of empty path returns "." so we are dirLike
394 if expandedPath == "":
395 dirLike = True
397 # normpath strips trailing "/" which makes it hard to keep
398 # track of directory vs file when calling replaceFile
399 # find the appropriate separator
400 if "scheme" in replacements:
401 sep = posixpath.sep
402 else:
403 sep = os.sep
405 # add the trailing separator only if explicitly required or
406 # if it was stripped by normpath. Acknowledge that trailing
407 # separator exists.
408 endsOnSep = expandedPath.endswith(os.sep) and not replacements["path"].endswith(sep)
409 if (forceDirectory or endsOnSep or dirLike):
410 dirLike = True
411 replacements["path"] += sep
413 elif parsed.scheme == "file":
414 # file URI implies POSIX path separators so split as POSIX,
415 # then join as os, and convert to abspath. Do not handle
416 # home directories since "file" scheme is explicitly documented
417 # to not do tilde expansion.
418 sep = posixpath.sep
419 if posixpath.isabs(parsed.path):
420 if forceDirectory:
421 parsed = parsed._replace(path=parsed.path+sep)
422 dirLike = True
423 return copy.copy(parsed), dirLike
425 replacements["path"] = posixpath.normpath(posixpath.join(os2posix(root), parsed.path))
427 # normpath strips trailing "/" so put it back if necessary
428 # Acknowledge that trailing separator exists.
429 if forceDirectory or (parsed.path.endswith(sep) and not replacements["path"].endswith(sep)):
430 replacements["path"] += sep
431 dirLike = True
432 else:
433 raise RuntimeError("Unexpectedly got confused by URI scheme")
435 # ParseResult is a NamedTuple so _replace is standard API
436 parsed = parsed._replace(**replacements)
438 # URI is dir-like if explicitly stated or if it ends on a separator
439 endsOnSep = parsed.path.endswith(posixpath.sep)
440 if forceDirectory or endsOnSep:
441 dirLike = True
442 # only add the separator if it's not already there
443 if not endsOnSep:
444 parsed = parsed._replace(path=parsed.path+posixpath.sep)
446 if dirLike is None:
447 raise RuntimeError("ButlerURI.dirLike attribute not set successfully.")
449 return parsed, dirLike
452class Location:
453 """Identifies a location within the `Datastore`.
455 Parameters
456 ----------
457 datastoreRootUri : `ButlerURI` or `str`
458 Base URI for this datastore, must include an absolute path.
459 path : `str`
460 Relative path within datastore. Assumed to be using the local
461 path separator if a ``file`` scheme is being used for the URI,
462 else a POSIX separator.
463 """
465 __slots__ = ("_datastoreRootUri", "_path")
467 def __init__(self, datastoreRootUri: Union[ButlerURI, str], path: str):
468 if isinstance(datastoreRootUri, str):
469 datastoreRootUri = ButlerURI(datastoreRootUri, forceDirectory=True)
470 elif not isinstance(datastoreRootUri, ButlerURI):
471 raise ValueError("Datastore root must be a ButlerURI instance")
473 if not posixpath.isabs(datastoreRootUri.path):
474 raise ValueError(f"Supplied URI must be an absolute path (given {datastoreRootUri}).")
476 self._datastoreRootUri = datastoreRootUri
478 pathModule: types.ModuleType
479 if self._datastoreRootUri.scheme == "file":
480 pathModule = os.path
481 else:
482 pathModule = posixpath
484 # mypy can not work out that these modules support isabs
485 if pathModule.isabs(path): # type: ignore
486 raise ValueError("Path within datastore must be relative not absolute")
488 self._path = path
490 def __str__(self) -> str:
491 return self.uri
493 def __repr__(self) -> str:
494 uri = self._datastoreRootUri.geturl()
495 path = self._path
496 return f"{self.__class__.__name__}({uri!r}, {path!r})"
498 @property
499 def uri(self) -> str:
500 """URI string corresponding to fully-specified location in datastore.
501 """
502 uriPath = os2posix(self.path)
503 return self._datastoreRootUri.replace(path=uriPath).geturl()
505 @property
506 def path(self) -> str:
507 """Path corresponding to location.
509 This path includes the root of the `Datastore`, but does not include
510 non-path components of the root URI. If a file URI scheme is being
511 used the path will be returned with the local OS path separator.
512 """
513 if not self._datastoreRootUri.scheme:
514 # Entirely local file system
515 return os.path.normpath(os.path.join(self._datastoreRootUri.path, self.pathInStore))
516 elif self._datastoreRootUri.scheme == "file":
517 return os.path.normpath(os.path.join(posix2os(self._datastoreRootUri.path), self.pathInStore))
518 else:
519 return posixpath.join(self._datastoreRootUri.path, self.pathInStore)
521 @property
522 def pathInStore(self) -> str:
523 """Path corresponding to location relative to `Datastore` root.
525 Uses the same path separator as supplied to the object constructor.
526 """
527 return self._path
529 @property
530 def netloc(self) -> str:
531 """The URI network location."""
532 return self._datastoreRootUri.netloc
534 @property
535 def relativeToPathRoot(self) -> str:
536 """Returns the path component of the URI relative to the network
537 location.
539 Effectively, this is the path property with POSIX separator stripped
540 from the left hand side of the path.
541 """
542 if self._datastoreRootUri.scheme == 'file' or not self._datastoreRootUri.scheme:
543 p = PurePath(os2posix(self.path))
544 else:
545 p = PurePosixPath(self.path)
546 stripped = p.relative_to(p.root)
547 return str(posix2os(stripped))
549 def updateExtension(self, ext: Optional[str]) -> None:
550 """Update the file extension associated with this `Location`.
552 All file extensions are replaced.
554 Parameters
555 ----------
556 ext : `str`
557 New extension. If an empty string is given any extension will
558 be removed. If `None` is given there will be no change.
559 """
560 if ext is None:
561 return
563 # Get the extension and remove it from the path if one is found
564 # .fits.gz counts as one extension do not use os.path.splitext
565 current = self.getExtension()
566 path = self.pathInStore
567 if current:
568 path = path[:-len(current)]
570 # Ensure that we have a leading "." on file extension (and we do not
571 # try to modify the empty string)
572 if ext and not ext.startswith("."):
573 ext = "." + ext
575 self._path = path + ext
577 def getExtension(self) -> str:
578 """Return the file extension(s) associated with this location.
580 Returns
581 -------
582 ext : `str`
583 The file extension (including the ``.``). Can be empty string
584 if there is no file extension. Will return all file extensions
585 as a single extension such that ``file.fits.gz`` will return
586 a value of ``.fits.gz``.
587 """
588 if not self._datastoreRootUri.scheme:
589 extensions = PurePath(self.path).suffixes
590 else:
591 extensions = PurePath(self.path).suffixes
592 return "".join(extensions)
595class LocationFactory:
596 """Factory for `Location` instances.
598 The factory is constructed from the root location of the datastore.
599 This location can be a path on the file system (absolute or relative)
600 or as a URI.
602 Parameters
603 ----------
604 datastoreRoot : `str`
605 Root location of the `Datastore` either as a path in the local
606 filesystem or as a URI. File scheme URIs can be used. If a local
607 filesystem path is used without URI scheme, it will be converted
608 to an absolute path and any home directory indicators expanded.
609 If a file scheme is used with a relative path, the path will
610 be treated as a posixpath but then converted to an absolute path.
611 """
613 def __init__(self, datastoreRoot: str):
614 self._datastoreRootUri = ButlerURI(datastoreRoot, forceAbsolute=True,
615 forceDirectory=True)
617 def __str__(self) -> str:
618 return f"{self.__class__.__name__}@{self._datastoreRootUri}"
620 @property
621 def netloc(self) -> str:
622 """Returns the network location of root location of the `Datastore`."""
623 return self._datastoreRootUri.netloc
625 def fromPath(self, path: str) -> Location:
626 """Factory function to create a `Location` from a POSIX path.
628 Parameters
629 ----------
630 path : `str`
631 A standard POSIX path, relative to the `Datastore` root.
633 Returns
634 -------
635 location : `Location`
636 The equivalent `Location`.
637 """
638 if os.path.isabs(path):
639 raise ValueError("LocationFactory path must be relative to datastore, not absolute.")
640 return Location(self._datastoreRootUri, path)