Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22__all__ = ("Location", "LocationFactory", "ButlerURI") 

23 

24import os 

25import os.path 

26import urllib 

27import posixpath 

28from pathlib import Path, PurePath, PurePosixPath 

29import copy 

30 

31# Determine if the path separator for the OS looks like POSIX 

32IS_POSIX = os.sep == posixpath.sep 

33 

34# Root path for this operating system 

35OS_ROOT_PATH = Path().resolve().root 

36 

37 

38def os2posix(ospath): 

39 """Convert a local path description to a POSIX path description. 

40 

41 Parameters 

42 ---------- 

43 path : `str` 

44 Path using the local path separator. 

45 

46 Returns 

47 ------- 

48 posix : `str` 

49 Path using POSIX path separator 

50 """ 

51 if IS_POSIX: 

52 return ospath 

53 

54 posix = PurePath(ospath).as_posix() 

55 

56 # PurePath strips trailing "/" from paths such that you can no 

57 # longer tell if a path is meant to be referring to a directory 

58 # Try to fix this. 

59 if ospath.endswith(os.sep) and not posix.endswith(posixpath.sep): 

60 posix += posixpath.sep 

61 

62 return posix 

63 

64 

65def posix2os(posix): 

66 """Convert a POSIX path description to a local path description. 

67 

68 Parameters 

69 ---------- 

70 posix : `str` 

71 Path using the POSIX path separator. 

72 

73 Returns 

74 ------- 

75 ospath : `str` 

76 Path using OS path separator 

77 """ 

78 if IS_POSIX: 

79 return posix 

80 

81 posixPath = PurePosixPath(posix) 

82 paths = list(posixPath.parts) 

83 

84 # Have to convert the root directory after splitting 

85 if paths[0] == posixPath.root: 

86 paths[0] = OS_ROOT_PATH 

87 

88 # Trailing "/" is stripped so we need to add back an empty path 

89 # for consistency 

90 if posix.endswith(posixpath.sep): 

91 paths.append("") 

92 

93 return os.path.join(*paths) 

94 

95 

96class ButlerURI: 

97 """Convenience wrapper around URI parsers. 

98 

99 Provides access to URI components and can convert file 

100 paths into absolute path URIs. Scheme-less URIs are treated as if 

101 they are local file system paths and are converted to absolute URIs. 

102 

103 Parameters 

104 ---------- 

105 uri : `str` or `urllib.parse.ParseResult` 

106 URI in string form. Can be scheme-less if referring to a local 

107 filesystem path. 

108 root : `str`, optional 

109 When fixing up a relative path in a ``file`` scheme or if scheme-less, 

110 use this as the root. Must be absolute. If `None` the current 

111 working directory will be used. 

112 forceAbsolute : `bool`, optional 

113 If `True`, scheme-less relative URI will be converted to an absolute 

114 path using a ``file`` scheme. If `False` scheme-less URI will remain 

115 scheme-less and will not be updated to ``file`` or absolute path. 

116 """ 

117 

118 def __init__(self, uri, root=None, forceAbsolute=True): 

119 if isinstance(uri, str): 

120 parsed = urllib.parse.urlparse(uri) 

121 elif isinstance(uri, urllib.parse.ParseResult): 

122 parsed = copy.copy(uri) 

123 else: 

124 raise ValueError("Supplied URI must be either string or ParseResult") 

125 

126 parsed = self._fixupFileUri(parsed, root=root, forceAbsolute=forceAbsolute) 

127 self._uri = parsed 

128 

129 @property 

130 def scheme(self): 

131 """The URI scheme (``://`` is not part of the scheme).""" 

132 return self._uri.scheme 

133 

134 @property 

135 def netloc(self): 

136 """The URI network location.""" 

137 return self._uri.netloc 

138 

139 @property 

140 def path(self): 

141 """The path component of the URI.""" 

142 return self._uri.path 

143 

144 @property 

145 def ospath(self): 

146 """Path component of the URI localized to current OS.""" 

147 if self.scheme == 's3': 

148 raise AttributeError('S3 URIs have no OS path.') 

149 return posix2os(self._uri.path) 

150 

151 @property 

152 def relativeToPathRoot(self): 

153 """Returns path relative to network location. 

154 

155 Effectively, this is the path property with posix separator stripped 

156 from the left hand side of the path. 

157 """ 

158 if not self.scheme: 

159 p = PurePath(self.path) 

160 else: 

161 p = PurePosixPath(self.path) 

162 return str(p.relative_to(p.root)) 

163 

164 @property 

165 def fragment(self): 

166 """The fragment component of the URI.""" 

167 return self._uri.fragment 

168 

169 @property 

170 def params(self): 

171 """Any parameters included in the URI.""" 

172 return self._uri.params 

173 

174 @property 

175 def query(self): 

176 """Any query strings included in the URI.""" 

177 return self._uri.query 

178 

179 def geturl(self): 

180 """Return the URI in string form. 

181 

182 Returns 

183 ------- 

184 url : `str` 

185 String form of URI. 

186 """ 

187 return self._uri.geturl() 

188 

189 def replace(self, **kwargs): 

190 """Replace components in a URI with new values and return a new 

191 instance. 

192 

193 Returns 

194 ------- 

195 new : `ButlerURI` 

196 New `ButlerURI` object with updated values. 

197 """ 

198 return self.__class__(self._uri._replace(**kwargs)) 

199 

200 def updateFile(self, newfile): 

201 """Update in place the final component of the path with the supplied 

202 file name. 

203 

204 Parameters 

205 ---------- 

206 newfile : `str` 

207 File name with no path component. 

208 

209 Notes 

210 ----- 

211 Updates the URI in place. 

212 """ 

213 if self.scheme: 

214 # POSIX 

215 pathclass = posixpath 

216 else: 

217 pathclass = os.path 

218 

219 dir, _ = pathclass.split(self.path) 

220 newpath = pathclass.join(dir, newfile) 

221 

222 self._uri = self._uri._replace(path=newpath) 

223 

224 def __str__(self): 

225 return self.geturl() 

226 

227 @staticmethod 

228 def _fixupFileUri(parsed, root=None, forceAbsolute=False): 

229 """Fix up relative paths in file URI instances. 

230 

231 Parameters 

232 ---------- 

233 parsed : `~urllib.parse.ParseResult` 

234 The result from parsing a URI using `urllib.parse`. 

235 root : `str`, optional 

236 Path to use as root when converting relative to absolute. 

237 If `None`, it will be the current working directory. This 

238 is a local file system path, not a URI. 

239 forceAbsolute : `bool` 

240 If `True`, scheme-less relative URI will be converted to an 

241 absolute path using a ``file`` scheme. If `False` scheme-less URI 

242 will remain scheme-less and will not be updated to ``file`` or 

243 absolute path. URIs with a defined scheme will not be affected 

244 by this parameter. 

245 

246 Returns 

247 ------- 

248 modified : `~urllib.parse.ParseResult` 

249 Update result if a file URI is being handled. 

250 

251 Notes 

252 ----- 

253 Relative paths are explicitly not supported by RFC8089 but `urllib` 

254 does accept URIs of the form ``file:relative/path.ext``. They need 

255 to be turned into absolute paths before they can be used. This is 

256 always done regardless of the ``forceAbsolute`` parameter. 

257 

258 Scheme-less paths are normalized. 

259 """ 

260 if not parsed.scheme or parsed.scheme == "file": 

261 

262 # Replacement values for the URI 

263 replacements = {} 

264 

265 if root is None: 

266 root = os.path.abspath(os.path.curdir) 

267 

268 if not parsed.scheme: 

269 # if there was no scheme this is a local OS file path 

270 # which can support tilde expansion. 

271 expandedPath = os.path.expanduser(parsed.path) 

272 

273 # Ensure that this is a file URI if it is already absolute 

274 if os.path.isabs(expandedPath): 

275 replacements["scheme"] = "file" 

276 replacements["path"] = os2posix(os.path.normpath(expandedPath)) 

277 elif forceAbsolute: 

278 # This can stay in OS path form, do not change to file 

279 # scheme. 

280 replacements["path"] = os.path.normpath(os.path.join(root, expandedPath)) 

281 else: 

282 # No change needed for relative local path staying relative 

283 # except normalization 

284 replacements["path"] = os.path.normpath(expandedPath) 

285 

286 # normpath strips trailing "/" which makes it hard to keep 

287 # track of directory vs file when calling replaceFile 

288 # put it back. 

289 if "scheme" in replacements: 

290 sep = posixpath.sep 

291 else: 

292 sep = os.sep 

293 

294 if expandedPath.endswith(os.sep) and not replacements["path"].endswith(sep): 

295 replacements["path"] += sep 

296 

297 elif parsed.scheme == "file": 

298 # file URI implies POSIX path separators so split as POSIX, 

299 # then join as os, and convert to abspath. Do not handle 

300 # home directories since "file" scheme is explicitly documented 

301 # to not do tilde expansion. 

302 if posixpath.isabs(parsed.path): 

303 # No change needed 

304 return copy.copy(parsed) 

305 

306 replacements["path"] = posixpath.normpath(posixpath.join(os2posix(root), parsed.path)) 

307 

308 # normpath strips trailing "/" so put it back if necessary 

309 if parsed.path.endswith(posixpath.sep) and not replacements["path"].endswith(posixpath.sep): 

310 replacements["path"] += posixpath.sep 

311 

312 else: 

313 raise RuntimeError("Unexpectedly got confused by URI scheme") 

314 

315 # ParseResult is a NamedTuple so _replace is standard API 

316 parsed = parsed._replace(**replacements) 

317 

318 return parsed 

319 

320 

321class Location: 

322 """Identifies a location within the `Datastore`. 

323 

324 Parameters 

325 ---------- 

326 datastoreRootUri : `ButlerURI` or `str` 

327 Base URI for this datastore, must include an absolute path. 

328 path : `str` 

329 Relative path within datastore. Assumed to be using the local 

330 path separator if a ``file`` scheme is being used for the URI, 

331 else a POSIX separator. 

332 """ 

333 

334 __slots__ = ("_datastoreRootUri", "_path") 

335 

336 def __init__(self, datastoreRootUri, path): 

337 if isinstance(datastoreRootUri, str): 

338 datastoreRootUri = ButlerURI(datastoreRootUri) 

339 elif not isinstance(datastoreRootUri, ButlerURI): 

340 raise ValueError("Datastore root must be a ButlerURI instance") 

341 

342 if not posixpath.isabs(datastoreRootUri.path): 

343 raise ValueError(f"Supplied URI must be an absolute path (given {datastoreRootUri}).") 

344 

345 self._datastoreRootUri = datastoreRootUri 

346 

347 if self._datastoreRootUri.scheme == "file": 

348 pathModule = os.path 

349 else: 

350 pathModule = posixpath 

351 

352 if pathModule.isabs(path): 

353 raise ValueError("Path within datastore must be relative not absolute") 

354 

355 self._path = path 

356 

357 def __str__(self): 

358 return self.uri 

359 

360 def __repr__(self): 

361 uri = self._datastoreRootUri.geturl() 

362 path = self._path 

363 return f"{self.__class__.__name__}({uri!r}, {path!r})" 

364 

365 @property 

366 def uri(self): 

367 """URI string corresponding to fully-specified location in datastore. 

368 """ 

369 uriPath = os2posix(self.path) 

370 return self._datastoreRootUri.replace(path=uriPath).geturl() 

371 

372 @property 

373 def path(self): 

374 """Path corresponding to location. 

375 

376 This path includes the root of the `Datastore`, but does not include 

377 non-path components of the root URI. If a file URI scheme is being 

378 used the path will be returned with the local OS path separator. 

379 """ 

380 if not self._datastoreRootUri.scheme: 

381 # Entirely local file system 

382 return os.path.normpath(os.path.join(self._datastoreRootUri.path, self.pathInStore)) 

383 elif self._datastoreRootUri.scheme == "file": 

384 return os.path.normpath(os.path.join(posix2os(self._datastoreRootUri.path), self.pathInStore)) 

385 else: 

386 return posixpath.join(self._datastoreRootUri.path, self.pathInStore) 

387 

388 @property 

389 def pathInStore(self): 

390 """Path corresponding to location relative to `Datastore` root. 

391 

392 Uses the same path separator as supplied to the object constructor. 

393 """ 

394 return self._path 

395 

396 @property 

397 def netloc(self): 

398 """The URI network location.""" 

399 return self._datastoreRootUri.netloc 

400 

401 @property 

402 def relativeToPathRoot(self): 

403 """Returns the path component of the URI relative to the network 

404 location. 

405 

406 Effectively, this is the path property with POSIX separator stripped 

407 from the left hand side of the path. 

408 """ 

409 if self._datastoreRootUri.scheme == 'file' or not self._datastoreRootUri.scheme: 

410 p = PurePath(os2posix(self.path)) 

411 else: 

412 p = PurePosixPath(self.path) 

413 stripped = p.relative_to(p.root) 

414 return str(posix2os(stripped)) 

415 

416 def updateExtension(self, ext): 

417 """Update the file extension associated with this `Location`. 

418 

419 Parameters 

420 ---------- 

421 ext : `str` 

422 New extension. If an empty string is given any extension will 

423 be removed. If `None` is given there will be no change. 

424 """ 

425 if ext is None: 

426 return 

427 

428 path, _ = os.path.splitext(self.pathInStore) 

429 

430 # Ensure that we have a leading "." on file extension (and we do not 

431 # try to modify the empty string) 

432 if ext and not ext.startswith("."): 

433 ext = "." + ext 

434 

435 self._path = path + ext 

436 

437 

438class LocationFactory: 

439 """Factory for `Location` instances. 

440 

441 The factory is constructed from the root location of the datastore. 

442 This location can be a path on the file system (absolute or relative) 

443 or as a URI. 

444 

445 Parameters 

446 ---------- 

447 datastoreRoot : `str` 

448 Root location of the `Datastore` either as a path in the local 

449 filesystem or as a URI. File scheme URIs can be used. If a local 

450 filesystem path is used without URI scheme, it will be converted 

451 to an absolute path and any home directory indicators expanded. 

452 If a file scheme is used with a relative path, the path will 

453 be treated as a posixpath but then converted to an absolute path. 

454 """ 

455 

456 def __init__(self, datastoreRoot): 

457 self._datastoreRootUri = ButlerURI(datastoreRoot, forceAbsolute=True) 

458 

459 def __str__(self): 

460 return f"{self.__class__.__name__}@{self._datastoreRootUri}" 

461 

462 @property 

463 def netloc(self): 

464 """Returns the network location of root location of the `Datastore`.""" 

465 return self._datastoreRootUri.netloc 

466 

467 def fromPath(self, path): 

468 """Factory function to create a `Location` from a POSIX path. 

469 

470 Parameters 

471 ---------- 

472 path : `str` 

473 A standard POSIX path, relative to the `Datastore` root. 

474 

475 Returns 

476 ------- 

477 location : `Location` 

478 The equivalent `Location`. 

479 """ 

480 if os.path.isabs(path): 

481 raise ValueError("LocationFactory path must be relative to datastore, not absolute.") 

482 return Location(self._datastoreRootUri, path)