Coverage for python/lsst/daf/butler/core/location.py : 17%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22__all__ = ("Location", "LocationFactory", "ButlerURI")
24import os
25import os.path
26import urllib
27import posixpath
28from pathlib import Path, PurePath, PurePosixPath
29import copy
31# Determine if the path separator for the OS looks like POSIX
32IS_POSIX = os.sep == posixpath.sep
34# Root path for this operating system
35OS_ROOT_PATH = Path().resolve().root
38def os2posix(ospath):
39 """Convert a local path description to a POSIX path description.
41 Parameters
42 ----------
43 path : `str`
44 Path using the local path separator.
46 Returns
47 -------
48 posix : `str`
49 Path using POSIX path separator
50 """
51 if IS_POSIX:
52 return ospath
54 posix = PurePath(ospath).as_posix()
56 # PurePath strips trailing "/" from paths such that you can no
57 # longer tell if a path is meant to be referring to a directory
58 # Try to fix this.
59 if ospath.endswith(os.sep) and not posix.endswith(posixpath.sep):
60 posix += posixpath.sep
62 return posix
65def posix2os(posix):
66 """Convert a POSIX path description to a local path description.
68 Parameters
69 ----------
70 posix : `str`
71 Path using the POSIX path separator.
73 Returns
74 -------
75 ospath : `str`
76 Path using OS path separator
77 """
78 if IS_POSIX:
79 return posix
81 posixPath = PurePosixPath(posix)
82 paths = list(posixPath.parts)
84 # Have to convert the root directory after splitting
85 if paths[0] == posixPath.root:
86 paths[0] = OS_ROOT_PATH
88 # Trailing "/" is stripped so we need to add back an empty path
89 # for consistency
90 if posix.endswith(posixpath.sep):
91 paths.append("")
93 return os.path.join(*paths)
96class ButlerURI:
97 """Convenience wrapper around URI parsers.
99 Provides access to URI components and can convert file
100 paths into absolute path URIs. Scheme-less URIs are treated as if
101 they are local file system paths and are converted to absolute URIs.
103 Parameters
104 ----------
105 uri : `str` or `urllib.parse.ParseResult`
106 URI in string form. Can be scheme-less if referring to a local
107 filesystem path.
108 root : `str`, optional
109 When fixing up a relative path in a ``file`` scheme or if scheme-less,
110 use this as the root. Must be absolute. If `None` the current
111 working directory will be used.
112 forceAbsolute : `bool`, optional
113 If `True`, scheme-less relative URI will be converted to an absolute
114 path using a ``file`` scheme. If `False` scheme-less URI will remain
115 scheme-less and will not be updated to ``file`` or absolute path.
116 forceDirectory: `bool`, optional
117 If `True` forces the URI to end with a separator, otherwise given URI
118 is interpreted as is.
119 """
121 def __init__(self, uri, root=None, forceAbsolute=True, forceDirectory=False):
122 if isinstance(uri, str):
123 parsed = urllib.parse.urlparse(uri)
124 elif isinstance(uri, urllib.parse.ParseResult):
125 parsed = copy.copy(uri)
126 else:
127 raise ValueError("Supplied URI must be either string or ParseResult")
129 parsed, dirLike = self._fixupPathUri(parsed, root=root,
130 forceAbsolute=forceAbsolute,
131 forceDirectory=forceDirectory)
133 self.dirLike = dirLike
134 self._uri = parsed
136 @property
137 def scheme(self):
138 """The URI scheme (``://`` is not part of the scheme)."""
139 return self._uri.scheme
141 @property
142 def netloc(self):
143 """The URI network location."""
144 return self._uri.netloc
146 @property
147 def path(self):
148 """The path component of the URI."""
149 return self._uri.path
151 @property
152 def ospath(self):
153 """Path component of the URI localized to current OS."""
154 if self.scheme == 's3':
155 raise AttributeError('S3 URIs have no OS path.')
156 return posix2os(self._uri.path)
158 @property
159 def relativeToPathRoot(self):
160 """Returns path relative to network location.
162 Effectively, this is the path property with posix separator stripped
163 from the left hand side of the path.
164 """
165 if not self.scheme:
166 p = PurePath(self.path)
167 else:
168 p = PurePosixPath(self.path)
169 relToRoot = str(p.relative_to(p.root))
170 if self.dirLike and not relToRoot.endswith("/"):
171 relToRoot += "/"
172 return relToRoot
174 @property
175 def fragment(self):
176 """The fragment component of the URI."""
177 return self._uri.fragment
179 @property
180 def params(self):
181 """Any parameters included in the URI."""
182 return self._uri.params
184 @property
185 def query(self):
186 """Any query strings included in the URI."""
187 return self._uri.query
189 def geturl(self):
190 """Return the URI in string form.
192 Returns
193 -------
194 url : `str`
195 String form of URI.
196 """
197 return self._uri.geturl()
199 def split(self):
200 """Splits URI into head and tail. Equivalent to os.path.split where
201 head preserves the URI components.
203 Returns
204 -------
205 head: `ButlerURI`
206 Everything leading up to tail, expanded and normalized as per
207 ButlerURI rules.
208 tail : `str`
209 Last `self.path` component. Tail will be empty if path ends on a
210 separator. Tail will never contain separators.
211 """
212 if self.scheme:
213 head, tail = posixpath.split(self.path)
214 else:
215 head, tail = os.path.split(self.path)
216 headuri = self._uri._replace(path=head)
217 return self.__class__(headuri, forceDirectory=True), tail
219 def basename(self):
220 """Returns the base name, last element of path, of the URI. If URI ends
221 on a slash returns an empty string. This is the second element returned
222 by split().
224 Equivalent of os.path.basename().
226 Returns
227 -------
228 tail : `str`
229 Last part of the path attribute. Trail will be empty if path ends
230 on a separator.
231 """
232 return self.split()[1]
234 def dirname(self):
235 """Returns a ButlerURI containing all the directories of the path
236 attribute.
238 Equivalent of os.path.dirname()
240 Returns
241 -------
242 head : `ButlerURI`
243 Everything except the tail of path attribute, expanded and
244 normalized as per ButlerURI rules.
245 """
246 return self.split()[0]
248 def replace(self, **kwargs):
249 """Replace components in a URI with new values and return a new
250 instance.
252 Returns
253 -------
254 new : `ButlerURI`
255 New `ButlerURI` object with updated values.
256 """
257 return self.__class__(self._uri._replace(**kwargs))
259 def updateFile(self, newfile):
260 """Update in place the final component of the path with the supplied
261 file name.
263 Parameters
264 ----------
265 newfile : `str`
266 File name with no path component.
268 Notes
269 -----
270 Updates the URI in place.
271 Updates the ButlerURI.dirLike attribute.
272 """
273 if self.scheme:
274 # POSIX
275 pathclass = posixpath
276 else:
277 pathclass = os.path
279 dir, _ = pathclass.split(self.path)
280 newpath = pathclass.join(dir, newfile)
282 self.dirLike = False
283 self._uri = self._uri._replace(path=newpath)
285 def __str__(self):
286 return self.geturl()
288 @staticmethod
289 def _fixupPathUri(parsed, root=None, forceAbsolute=False, forceDirectory=False):
290 """Fix up relative paths in URI instances.
292 Parameters
293 ----------
294 parsed : `~urllib.parse.ParseResult`
295 The result from parsing a URI using `urllib.parse`.
296 root : `str`, optional
297 Path to use as root when converting relative to absolute.
298 If `None`, it will be the current working directory. This
299 is a local file system path, not a URI.
300 forceAbsolute : `bool`, optional
301 If `True`, scheme-less relative URI will be converted to an
302 absolute path using a ``file`` scheme. If `False` scheme-less URI
303 will remain scheme-less and will not be updated to ``file`` or
304 absolute path. URIs with a defined scheme will not be affected
305 by this parameter.
306 forceDirectory : `bool`, optional
307 If `True` forces the URI to end with a separator, otherwise given
308 URI is interpreted as is.
310 Returns
311 -------
312 modified : `~urllib.parse.ParseResult`
313 Update result if a URI is being handled.
314 dirLike : `bool`
315 `True` if given parsed URI has a trailing separator or
316 forceDirectory is True. Otherwise `False`.
318 Notes
319 -----
320 Relative paths are explicitly not supported by RFC8089 but `urllib`
321 does accept URIs of the form ``file:relative/path.ext``. They need
322 to be turned into absolute paths before they can be used. This is
323 always done regardless of the ``forceAbsolute`` parameter.
325 AWS S3 differentiates between keys with trailing POSIX separators (i.e
326 `/dir` and `/dir/`) whereas POSIX does not neccessarily.
328 Scheme-less paths are normalized.
329 """
330 # assume we are not dealing with a directory like URI
331 dirLike = False
332 if not parsed.scheme or parsed.scheme == "file":
334 # Replacement values for the URI
335 replacements = {}
337 if root is None:
338 root = os.path.abspath(os.path.curdir)
340 if not parsed.scheme:
341 # if there was no scheme this is a local OS file path
342 # which can support tilde expansion.
343 expandedPath = os.path.expanduser(parsed.path)
345 # Ensure that this is a file URI if it is already absolute
346 if os.path.isabs(expandedPath):
347 replacements["scheme"] = "file"
348 replacements["path"] = os2posix(os.path.normpath(expandedPath))
349 elif forceAbsolute:
350 # This can stay in OS path form, do not change to file
351 # scheme.
352 replacements["path"] = os.path.normpath(os.path.join(root, expandedPath))
353 else:
354 # No change needed for relative local path staying relative
355 # except normalization
356 replacements["path"] = os.path.normpath(expandedPath)
357 # normalization of empty path returns "." so we are dirLike
358 if expandedPath == "":
359 dirLike = True
361 # normpath strips trailing "/" which makes it hard to keep
362 # track of directory vs file when calling replaceFile
363 # find the appropriate separator
364 if "scheme" in replacements:
365 sep = posixpath.sep
366 else:
367 sep = os.sep
369 # add the trailing separator only if explicitly required or
370 # if it was stripped by normpath. Acknowledge that trailing
371 # separator exists.
372 endsOnSep = expandedPath.endswith(os.sep) and not replacements["path"].endswith(sep)
373 if (forceDirectory or endsOnSep or dirLike):
374 dirLike = True
375 replacements["path"] += sep
377 elif parsed.scheme == "file":
378 # file URI implies POSIX path separators so split as POSIX,
379 # then join as os, and convert to abspath. Do not handle
380 # home directories since "file" scheme is explicitly documented
381 # to not do tilde expansion.
382 sep = posixpath.sep
383 if posixpath.isabs(parsed.path):
384 if forceDirectory:
385 parsed = parsed._replace(path=parsed.path+sep)
386 dirLike = True
387 return copy.copy(parsed), dirLike
389 replacements["path"] = posixpath.normpath(posixpath.join(os2posix(root), parsed.path))
391 # normpath strips trailing "/" so put it back if necessary
392 # Acknowledge that trailing separator exists.
393 if forceDirectory or (parsed.path.endswith(sep) and not replacements["path"].endswith(sep)):
394 replacements["path"] += sep
395 dirLike = True
396 else:
397 raise RuntimeError("Unexpectedly got confused by URI scheme")
399 # ParseResult is a NamedTuple so _replace is standard API
400 parsed = parsed._replace(**replacements)
402 # URI is dir-like if explicitly stated or if it ends on a separator
403 endsOnSep = parsed.path.endswith(posixpath.sep)
404 if forceDirectory or endsOnSep:
405 dirLike = True
406 # only add the separator if it's not already there
407 if not endsOnSep:
408 parsed = parsed._replace(path=parsed.path+posixpath.sep)
410 if dirLike is None:
411 raise RuntimeError("ButlerURI.dirLike attribute not set successfully.")
413 return parsed, dirLike
416class Location:
417 """Identifies a location within the `Datastore`.
419 Parameters
420 ----------
421 datastoreRootUri : `ButlerURI` or `str`
422 Base URI for this datastore, must include an absolute path.
423 path : `str`
424 Relative path within datastore. Assumed to be using the local
425 path separator if a ``file`` scheme is being used for the URI,
426 else a POSIX separator.
427 """
429 __slots__ = ("_datastoreRootUri", "_path")
431 def __init__(self, datastoreRootUri, path):
432 if isinstance(datastoreRootUri, str):
433 datastoreRootUri = ButlerURI(datastoreRootUri, forceDirectory=True)
434 elif not isinstance(datastoreRootUri, ButlerURI):
435 raise ValueError("Datastore root must be a ButlerURI instance")
437 if not posixpath.isabs(datastoreRootUri.path):
438 raise ValueError(f"Supplied URI must be an absolute path (given {datastoreRootUri}).")
440 self._datastoreRootUri = datastoreRootUri
442 if self._datastoreRootUri.scheme == "file":
443 pathModule = os.path
444 else:
445 pathModule = posixpath
447 if pathModule.isabs(path):
448 raise ValueError("Path within datastore must be relative not absolute")
450 self._path = path
452 def __str__(self):
453 return self.uri
455 def __repr__(self):
456 uri = self._datastoreRootUri.geturl()
457 path = self._path
458 return f"{self.__class__.__name__}({uri!r}, {path!r})"
460 @property
461 def uri(self):
462 """URI string corresponding to fully-specified location in datastore.
463 """
464 uriPath = os2posix(self.path)
465 return self._datastoreRootUri.replace(path=uriPath).geturl()
467 @property
468 def path(self):
469 """Path corresponding to location.
471 This path includes the root of the `Datastore`, but does not include
472 non-path components of the root URI. If a file URI scheme is being
473 used the path will be returned with the local OS path separator.
474 """
475 if not self._datastoreRootUri.scheme:
476 # Entirely local file system
477 return os.path.normpath(os.path.join(self._datastoreRootUri.path, self.pathInStore))
478 elif self._datastoreRootUri.scheme == "file":
479 return os.path.normpath(os.path.join(posix2os(self._datastoreRootUri.path), self.pathInStore))
480 else:
481 return posixpath.join(self._datastoreRootUri.path, self.pathInStore)
483 @property
484 def pathInStore(self):
485 """Path corresponding to location relative to `Datastore` root.
487 Uses the same path separator as supplied to the object constructor.
488 """
489 return self._path
491 @property
492 def netloc(self):
493 """The URI network location."""
494 return self._datastoreRootUri.netloc
496 @property
497 def relativeToPathRoot(self):
498 """Returns the path component of the URI relative to the network
499 location.
501 Effectively, this is the path property with POSIX separator stripped
502 from the left hand side of the path.
503 """
504 if self._datastoreRootUri.scheme == 'file' or not self._datastoreRootUri.scheme:
505 p = PurePath(os2posix(self.path))
506 else:
507 p = PurePosixPath(self.path)
508 stripped = p.relative_to(p.root)
509 return str(posix2os(stripped))
511 def updateExtension(self, ext):
512 """Update the file extension associated with this `Location`.
514 Parameters
515 ----------
516 ext : `str`
517 New extension. If an empty string is given any extension will
518 be removed. If `None` is given there will be no change.
519 """
520 if ext is None:
521 return
523 path, _ = os.path.splitext(self.pathInStore)
525 # Ensure that we have a leading "." on file extension (and we do not
526 # try to modify the empty string)
527 if ext and not ext.startswith("."):
528 ext = "." + ext
530 self._path = path + ext
533class LocationFactory:
534 """Factory for `Location` instances.
536 The factory is constructed from the root location of the datastore.
537 This location can be a path on the file system (absolute or relative)
538 or as a URI.
540 Parameters
541 ----------
542 datastoreRoot : `str`
543 Root location of the `Datastore` either as a path in the local
544 filesystem or as a URI. File scheme URIs can be used. If a local
545 filesystem path is used without URI scheme, it will be converted
546 to an absolute path and any home directory indicators expanded.
547 If a file scheme is used with a relative path, the path will
548 be treated as a posixpath but then converted to an absolute path.
549 """
551 def __init__(self, datastoreRoot):
552 self._datastoreRootUri = ButlerURI(datastoreRoot, forceAbsolute=True,
553 forceDirectory=True)
555 def __str__(self):
556 return f"{self.__class__.__name__}@{self._datastoreRootUri}"
558 @property
559 def netloc(self):
560 """Returns the network location of root location of the `Datastore`."""
561 return self._datastoreRootUri.netloc
563 def fromPath(self, path):
564 """Factory function to create a `Location` from a POSIX path.
566 Parameters
567 ----------
568 path : `str`
569 A standard POSIX path, relative to the `Datastore` root.
571 Returns
572 -------
573 location : `Location`
574 The equivalent `Location`.
575 """
576 if os.path.isabs(path):
577 raise ValueError("LocationFactory path must be relative to datastore, not absolute.")
578 return Location(self._datastoreRootUri, path)