Coverage for python/lsst/daf/butler/core/location.py : 20%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22__all__ = ("Location", "LocationFactory", "ButlerURI")
24import os
25import os.path
26import urllib
27import posixpath
28from pathlib import Path, PurePath, PurePosixPath
29import copy
31# Determine if the path separator for the OS looks like POSIX
32IS_POSIX = os.sep == posixpath.sep
34# Root path for this operating system
35OS_ROOT_PATH = Path().resolve().root
38def os2posix(ospath):
39 """Convert a local path description to a POSIX path description.
41 Parameters
42 ----------
43 path : `str`
44 Path using the local path separator.
46 Returns
47 -------
48 posix : `str`
49 Path using POSIX path separator
50 """
51 if IS_POSIX:
52 return ospath
54 posix = PurePath(ospath).as_posix()
56 # PurePath strips trailing "/" from paths such that you can no
57 # longer tell if a path is meant to be referring to a directory
58 # Try to fix this.
59 if ospath.endswith(os.sep) and not posix.endswith(posixpath.sep):
60 posix += posixpath.sep
62 return posix
65def posix2os(posix):
66 """Convert a POSIX path description to a local path description.
68 Parameters
69 ----------
70 posix : `str`
71 Path using the POSIX path separator.
73 Returns
74 -------
75 ospath : `str`
76 Path using OS path separator
77 """
78 if IS_POSIX:
79 return posix
81 posixPath = PurePosixPath(posix)
82 paths = list(posixPath.parts)
84 # Have to convert the root directory after splitting
85 if paths[0] == posixPath.root:
86 paths[0] = OS_ROOT_PATH
88 # Trailing "/" is stripped so we need to add back an empty path
89 # for consistency
90 if posix.endswith(posixpath.sep):
91 paths.append("")
93 return os.path.join(*paths)
96class ButlerURI:
97 """Convenience wrapper around URI parsers.
99 Provides access to URI components and can convert file
100 paths into absolute path URIs. Scheme-less URIs are treated as if
101 they are local file system paths and are converted to absolute URIs.
103 Parameters
104 ----------
105 uri : `str` or `urllib.parse.ParseResult`
106 URI in string form. Can be scheme-less if referring to a local
107 filesystem path.
108 root : `str`, optional
109 When fixing up a relative path in a ``file`` scheme or if scheme-less,
110 use this as the root. Must be absolute. If `None` the current
111 working directory will be used.
112 forceAbsolute : `bool`, optional
113 If `True`, scheme-less relative URI will be converted to an absolute
114 path using a ``file`` scheme. If `False` scheme-less URI will remain
115 scheme-less and will not be updated to ``file`` or absolute path.
116 """
118 def __init__(self, uri, root=None, forceAbsolute=True):
119 if isinstance(uri, str):
120 parsed = urllib.parse.urlparse(uri)
121 elif isinstance(uri, urllib.parse.ParseResult):
122 parsed = copy.copy(uri)
123 else:
124 raise ValueError("Supplied URI must be either string or ParseResult")
126 parsed = self._fixupFileUri(parsed, root=root, forceAbsolute=forceAbsolute)
127 self._uri = parsed
129 @property
130 def scheme(self):
131 """The URI scheme (``://`` is not part of the scheme)."""
132 return self._uri.scheme
134 @property
135 def netloc(self):
136 """The URI network location."""
137 return self._uri.netloc
139 @property
140 def path(self):
141 """The path component of the URI."""
142 return self._uri.path
144 @property
145 def ospath(self):
146 """Path component of the URI localized to current OS."""
147 if self.scheme == 's3':
148 raise AttributeError('S3 URIs have no OS path.')
149 return posix2os(self._uri.path)
151 @property
152 def relativeToPathRoot(self):
153 """Returns path relative to network location.
155 Effectively, this is the path property with posix separator stripped
156 from the left hand side of the path.
157 """
158 if not self.scheme:
159 p = PurePath(self.path)
160 else:
161 p = PurePosixPath(self.path)
162 return str(p.relative_to(p.root))
164 @property
165 def fragment(self):
166 """The fragment component of the URI."""
167 return self._uri.fragment
169 @property
170 def params(self):
171 """Any parameters included in the URI."""
172 return self._uri.params
174 @property
175 def query(self):
176 """Any query strings included in the URI."""
177 return self._uri.query
179 def geturl(self):
180 """Return the URI in string form.
182 Returns
183 -------
184 url : `str`
185 String form of URI.
186 """
187 return self._uri.geturl()
189 def replace(self, **kwargs):
190 """Replace components in a URI with new values and return a new
191 instance.
193 Returns
194 -------
195 new : `ButlerURI`
196 New `ButlerURI` object with updated values.
197 """
198 return self.__class__(self._uri._replace(**kwargs))
200 def updateFile(self, newfile):
201 """Update in place the final component of the path with the supplied
202 file name.
204 Parameters
205 ----------
206 newfile : `str`
207 File name with no path component.
209 Notes
210 -----
211 Updates the URI in place.
212 """
213 if self.scheme:
214 # POSIX
215 pathclass = posixpath
216 else:
217 pathclass = os.path
219 dir, _ = pathclass.split(self.path)
220 newpath = pathclass.join(dir, newfile)
222 self._uri = self._uri._replace(path=newpath)
224 def __str__(self):
225 return self.geturl()
227 @staticmethod
228 def _fixupFileUri(parsed, root=None, forceAbsolute=False):
229 """Fix up relative paths in file URI instances.
231 Parameters
232 ----------
233 parsed : `~urllib.parse.ParseResult`
234 The result from parsing a URI using `urllib.parse`.
235 root : `str`, optional
236 Path to use as root when converting relative to absolute.
237 If `None`, it will be the current working directory. This
238 is a local file system path, not a URI.
239 forceAbsolute : `bool`
240 If `True`, scheme-less relative URI will be converted to an
241 absolute path using a ``file`` scheme. If `False` scheme-less URI
242 will remain scheme-less and will not be updated to ``file`` or
243 absolute path. URIs with a defined scheme will not be affected
244 by this parameter.
246 Returns
247 -------
248 modified : `~urllib.parse.ParseResult`
249 Update result if a file URI is being handled.
251 Notes
252 -----
253 Relative paths are explicitly not supported by RFC8089 but `urllib`
254 does accept URIs of the form ``file:relative/path.ext``. They need
255 to be turned into absolute paths before they can be used. This is
256 always done regardless of the ``forceAbsolute`` parameter.
258 Scheme-less paths are normalized.
259 """
260 if not parsed.scheme or parsed.scheme == "file":
262 # Replacement values for the URI
263 replacements = {}
265 if root is None:
266 root = os.path.abspath(os.path.curdir)
268 if not parsed.scheme:
269 # if there was no scheme this is a local OS file path
270 # which can support tilde expansion.
271 expandedPath = os.path.expanduser(parsed.path)
273 # Ensure that this is a file URI if it is already absolute
274 if os.path.isabs(expandedPath):
275 replacements["scheme"] = "file"
276 replacements["path"] = os2posix(os.path.normpath(expandedPath))
277 elif forceAbsolute:
278 # This can stay in OS path form, do not change to file
279 # scheme.
280 replacements["path"] = os.path.normpath(os.path.join(root, expandedPath))
281 else:
282 # No change needed for relative local path staying relative
283 # except normalization
284 replacements["path"] = os.path.normpath(expandedPath)
286 # normpath strips trailing "/" which makes it hard to keep
287 # track of directory vs file when calling replaceFile
288 # put it back.
289 if "scheme" in replacements:
290 sep = posixpath.sep
291 else:
292 sep = os.sep
294 if expandedPath.endswith(os.sep) and not replacements["path"].endswith(sep):
295 replacements["path"] += sep
297 elif parsed.scheme == "file":
298 # file URI implies POSIX path separators so split as POSIX,
299 # then join as os, and convert to abspath. Do not handle
300 # home directories since "file" scheme is explicitly documented
301 # to not do tilde expansion.
302 if posixpath.isabs(parsed.path):
303 # No change needed
304 return copy.copy(parsed)
306 replacements["path"] = posixpath.normpath(posixpath.join(os2posix(root), parsed.path))
308 # normpath strips trailing "/" so put it back if necessary
309 if parsed.path.endswith(posixpath.sep) and not replacements["path"].endswith(posixpath.sep):
310 replacements["path"] += posixpath.sep
312 else:
313 raise RuntimeError("Unexpectedly got confused by URI scheme")
315 # ParseResult is a NamedTuple so _replace is standard API
316 parsed = parsed._replace(**replacements)
318 return parsed
321class Location:
322 """Identifies a location within the `Datastore`.
324 Parameters
325 ----------
326 datastoreRootUri : `ButlerURI` or `str`
327 Base URI for this datastore, must include an absolute path.
328 path : `str`
329 Relative path within datastore. Assumed to be using the local
330 path separator if a ``file`` scheme is being used for the URI,
331 else a POSIX separator.
332 """
334 __slots__ = ("_datastoreRootUri", "_path")
336 def __init__(self, datastoreRootUri, path):
337 if isinstance(datastoreRootUri, str):
338 datastoreRootUri = ButlerURI(datastoreRootUri)
339 elif not isinstance(datastoreRootUri, ButlerURI):
340 raise ValueError("Datastore root must be a ButlerURI instance")
342 if not posixpath.isabs(datastoreRootUri.path):
343 raise ValueError(f"Supplied URI must be an absolute path (given {datastoreRootUri}).")
345 self._datastoreRootUri = datastoreRootUri
347 if self._datastoreRootUri.scheme == "file":
348 pathModule = os.path
349 else:
350 pathModule = posixpath
352 if pathModule.isabs(path):
353 raise ValueError("Path within datastore must be relative not absolute")
355 self._path = path
357 def __str__(self):
358 return self.uri
360 def __repr__(self):
361 uri = self._datastoreRootUri.geturl()
362 path = self._path
363 return f"{self.__class__.__name__}({uri!r}, {path!r})"
365 @property
366 def uri(self):
367 """URI string corresponding to fully-specified location in datastore.
368 """
369 uriPath = os2posix(self.path)
370 return self._datastoreRootUri.replace(path=uriPath).geturl()
372 @property
373 def path(self):
374 """Path corresponding to location.
376 This path includes the root of the `Datastore`, but does not include
377 non-path components of the root URI. If a file URI scheme is being
378 used the path will be returned with the local OS path separator.
379 """
380 if not self._datastoreRootUri.scheme:
381 # Entirely local file system
382 return os.path.normpath(os.path.join(self._datastoreRootUri.path, self.pathInStore))
383 elif self._datastoreRootUri.scheme == "file":
384 return os.path.normpath(os.path.join(posix2os(self._datastoreRootUri.path), self.pathInStore))
385 else:
386 return posixpath.join(self._datastoreRootUri.path, self.pathInStore)
388 @property
389 def pathInStore(self):
390 """Path corresponding to location relative to `Datastore` root.
392 Uses the same path separator as supplied to the object constructor.
393 """
394 return self._path
396 @property
397 def netloc(self):
398 """The URI network location."""
399 return self._datastoreRootUri.netloc
401 @property
402 def relativeToPathRoot(self):
403 """Returns the path component of the URI relative to the network
404 location.
406 Effectively, this is the path property with POSIX separator stripped
407 from the left hand side of the path.
408 """
409 if self._datastoreRootUri.scheme == 'file' or not self._datastoreRootUri.scheme:
410 p = PurePath(os2posix(self.path))
411 else:
412 p = PurePosixPath(self.path)
413 stripped = p.relative_to(p.root)
414 return str(posix2os(stripped))
416 def updateExtension(self, ext):
417 """Update the file extension associated with this `Location`.
419 Parameters
420 ----------
421 ext : `str`
422 New extension. If an empty string is given any extension will
423 be removed. If `None` is given there will be no change.
424 """
425 if ext is None:
426 return
428 path, _ = os.path.splitext(self.pathInStore)
430 # Ensure that we have a leading "." on file extension (and we do not
431 # try to modify the empty string)
432 if ext and not ext.startswith("."):
433 ext = "." + ext
435 self._path = path + ext
438class LocationFactory:
439 """Factory for `Location` instances.
441 The factory is constructed from the root location of the datastore.
442 This location can be a path on the file system (absolute or relative)
443 or as a URI.
445 Parameters
446 ----------
447 datastoreRoot : `str`
448 Root location of the `Datastore` either as a path in the local
449 filesystem or as a URI. File scheme URIs can be used. If a local
450 filesystem path is used without URI scheme, it will be converted
451 to an absolute path and any home directory indicators expanded.
452 If a file scheme is used with a relative path, the path will
453 be treated as a posixpath but then converted to an absolute path.
454 """
456 def __init__(self, datastoreRoot):
457 self._datastoreRootUri = ButlerURI(datastoreRoot, forceAbsolute=True)
459 def __str__(self):
460 return f"{self.__class__.__name__}@{self._datastoreRootUri}"
462 @property
463 def netloc(self):
464 """Returns the network location of root location of the `Datastore`."""
465 return self._datastoreRootUri.netloc
467 def fromPath(self, path):
468 """Factory function to create a `Location` from a POSIX path.
470 Parameters
471 ----------
472 path : `str`
473 A standard POSIX path, relative to the `Datastore` root.
475 Returns
476 -------
477 location : `Location`
478 The equivalent `Location`.
479 """
480 if os.path.isabs(path):
481 raise ValueError("LocationFactory path must be relative to datastore, not absolute.")
482 return Location(self._datastoreRootUri, path)