Coverage for python/lsst/resources/http.py: 14%

381 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-02-22 03:00 -0800

1# This file is part of lsst-resources. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# Use of this source code is governed by a 3-clause BSD-style 

10# license that can be found in the LICENSE file. 

11 

12from __future__ import annotations 

13 

14__all__ = ("HttpResourcePath",) 

15 

16import contextlib 

17import functools 

18import io 

19import logging 

20import os 

21import os.path 

22import random 

23import re 

24import stat 

25import tempfile 

26import xml.etree.ElementTree as eTree 

27from typing import TYPE_CHECKING, BinaryIO, Iterator, List, Optional, Tuple, Union, cast 

28 

29import requests 

30from lsst.utils.timer import time_this 

31from requests.adapters import HTTPAdapter 

32from requests.auth import AuthBase 

33from urllib3.util.retry import Retry 

34 

35from ._resourceHandles import ResourceHandleProtocol 

36from ._resourceHandles._httpResourceHandle import HttpReadResourceHandle 

37from ._resourcePath import ResourcePath 

38 

39if TYPE_CHECKING: 39 ↛ 40line 39 didn't jump to line 40, because the condition on line 39 was never true

40 from .utils import TransactionProtocol 

41 

42log = logging.getLogger(__name__) 

43 

44 

45# Default timeouts for all HTTP requests, in seconds. 

46DEFAULT_TIMEOUT_CONNECT = 60 

47DEFAULT_TIMEOUT_READ = 300 

48 

49# Allow for network timeouts to be set in the environment. 

50TIMEOUT = ( 

51 int(os.environ.get("LSST_HTTP_TIMEOUT_CONNECT", DEFAULT_TIMEOUT_CONNECT)), 

52 int(os.environ.get("LSST_HTTP_TIMEOUT_READ", DEFAULT_TIMEOUT_READ)), 

53) 

54 

55# Should we send a "Expect: 100-continue" header on PUT requests? 

56# The "Expect: 100-continue" header is used by some servers (e.g. dCache) 

57# as an indication that the client knows how to handle redirects to 

58# the specific server that will actually receive the data for PUT 

59# requests. 

60_SEND_EXPECT_HEADER_ON_PUT = "LSST_HTTP_PUT_SEND_EXPECT_HEADER" in os.environ 

61 

62 

63@functools.lru_cache 

64def _is_webdav_endpoint(path: Union[ResourcePath, str]) -> bool: 

65 """Check whether the remote HTTP endpoint implements WebDAV features. 

66 

67 Parameters 

68 ---------- 

69 path : `ResourcePath` or `str` 

70 URL to the resource to be checked. 

71 Should preferably refer to the root since the status is shared 

72 by all paths in that server. 

73 

74 Returns 

75 ------- 

76 _is_webdav_endpoint : `bool` 

77 True if the endpoint implements WebDAV, False if it doesn't. 

78 """ 

79 log.debug("Detecting HTTP endpoint type for '%s'...", path) 

80 try: 

81 ca_cert_bundle = os.getenv("LSST_HTTP_CACERT_BUNDLE") 

82 verify: Union[bool, str] = ca_cert_bundle if ca_cert_bundle else True 

83 resp = requests.options(str(path), verify=verify, stream=True) 

84 

85 # Check that "1" is part of the value of the "DAV" header. We don't 

86 # use locks, so a server complying to class 1 is enough for our 

87 # purposes. All webDAV servers must advertise at least compliance 

88 # class "1". 

89 # 

90 # Compliance classes are documented in 

91 # http://www.webdav.org/specs/rfc4918.html#dav.compliance.classes 

92 # 

93 # Examples of values for header DAV are: 

94 # DAV: 1, 2 

95 # DAV: 1, <http://apache.org/dav/propset/fs/1> 

96 if "DAV" not in resp.headers: 

97 return False 

98 else: 

99 # Convert to str to keep mypy happy 

100 compliance_class = str(resp.headers.get("DAV")) 

101 return "1" in compliance_class.replace(" ", "").split(",") 

102 except requests.exceptions.SSLError as e: 

103 log.warning( 

104 "Environment variable LSST_HTTP_CACERT_BUNDLE can be used to " 

105 "specify a bundle of certificate authorities you trust which are " 

106 "not included in the default set of trusted authorities of your " 

107 "system." 

108 ) 

109 raise e 

110 

111 

112# Tuple (path, block_size) pointing to the location of a local directory 

113# to save temporary files and the block size of the underlying file system. 

114_TMPDIR: Optional[Tuple[str, int]] = None 

115 

116 

117def _get_temp_dir() -> Tuple[str, int]: 

118 """Return the temporary directory path and block size. 

119 

120 This function caches its results in _TMPDIR. 

121 """ 

122 global _TMPDIR 

123 if _TMPDIR: 

124 return _TMPDIR 

125 

126 # Use the value of environment variables 'LSST_RESOURCES_TMPDIR' or 

127 # 'TMPDIR', if defined. Otherwise use current working directory. 

128 tmpdir = os.getcwd() 

129 for dir in (os.getenv(v) for v in ("LSST_RESOURCES_TMPDIR", "TMPDIR")): 

130 if dir and os.path.isdir(dir): 

131 tmpdir = dir 

132 break 

133 

134 # Compute the block size as 256 blocks of typical size 

135 # (i.e. 4096 bytes) or 10 times the file system block size, 

136 # whichever is higher. This is a reasonable compromise between 

137 # using memory for buffering and the number of system calls 

138 # issued to read from or write to temporary files. 

139 fsstats = os.statvfs(tmpdir) 

140 return (_TMPDIR := (tmpdir, max(10 * fsstats.f_bsize, 256 * 4096))) 

141 

142 

143class BearerTokenAuth(AuthBase): 

144 """Attach a bearer token 'Authorization' header to each request. 

145 

146 Parameters 

147 ---------- 

148 token : `str` 

149 Can be either the path to a local protected file which contains the 

150 value of the token or the token itself. 

151 """ 

152 

153 def __init__(self, token: str): 

154 self._token = self._path = None 

155 self._mtime: float = -1.0 

156 if not token: 

157 return 

158 

159 self._token = token 

160 if os.path.isfile(token): 

161 self._path = os.path.abspath(token) 

162 if not _is_protected(self._path): 

163 raise PermissionError( 

164 f"Bearer token file at {self._path} must be protected for access only by its owner" 

165 ) 

166 self._refresh() 

167 

168 def _refresh(self) -> None: 

169 """Read the token file (if any) if its modification time is more recent 

170 than the last time we read it. 

171 """ 

172 if not self._path: 

173 return 

174 

175 if (mtime := os.stat(self._path).st_mtime) > self._mtime: 

176 log.debug("Reading bearer token file at %s", self._path) 

177 self._mtime = mtime 

178 with open(self._path) as f: 

179 self._token = f.read().rstrip("\n") 

180 

181 def __call__(self, req: requests.PreparedRequest) -> requests.PreparedRequest: 

182 if self._token: 

183 self._refresh() 

184 req.headers["Authorization"] = f"Bearer {self._token}" 

185 return req 

186 

187 

188class SessionStore: 

189 """Cache a single reusable HTTP client session per enpoint.""" 

190 

191 def __init__(self) -> None: 

192 # The key of the dictionary is a root URI and the value is the 

193 # session 

194 self._sessions: dict[str, requests.Session] = {} 

195 

196 def get(self, rpath: ResourcePath, persist: bool = True) -> requests.Session: 

197 """Retrieve a session for accessing the remote resource at rpath. 

198 

199 Parameters 

200 ---------- 

201 rpath : `ResourcePath` 

202 URL to a resource at the remote server for which a session is to 

203 be retrieved. 

204 

205 persist : `bool` 

206 if `True`, make the network connection with the front end server 

207 of the endpoint persistent. Connections to the backend servers 

208 are persisted. 

209 

210 Notes 

211 ----- 

212 Once a session is created for a given endpoint it is cached and 

213 returned every time a session is requested for any path under that same 

214 endpoint. For instance, a single session will be cached and shared 

215 for paths "https://www.example.org/path/to/file" and 

216 "https://www.example.org/any/other/path". 

217 

218 Note that "https://www.example.org" and "https://www.example.org:12345" 

219 will have different sessions since the port number is not identical. 

220 

221 In order to configure the session, some environment variables are 

222 inspected: 

223 

224 - LSST_HTTP_CACERT_BUNDLE: path to a .pem file containing the CA 

225 certificates to trust when verifying the server's certificate. 

226 

227 - LSST_HTTP_AUTH_BEARER_TOKEN: value of a bearer token or path to a 

228 local file containing a bearer token to be used as the client 

229 authentication mechanism with all requests. 

230 The permissions of the token file must be set so that only its 

231 owner can access it. 

232 If initialized, takes precedence over LSST_HTTP_AUTH_CLIENT_CERT 

233 and LSST_HTTP_AUTH_CLIENT_KEY. 

234 

235 - LSST_HTTP_AUTH_CLIENT_CERT: path to a .pem file which contains the 

236 client certificate for authenticating to the server. 

237 If initialized, the variable LSST_HTTP_AUTH_CLIENT_KEY must also be 

238 initialized with the path of the client private key file. 

239 The permissions of the client private key must be set so that only 

240 its owner can access it, at least for reading. 

241 """ 

242 root_uri = str(rpath.root_uri()) 

243 if root_uri not in self._sessions: 

244 # We don't have yet a session for this endpoint: create a new one 

245 self._sessions[root_uri] = self._make_session(rpath, persist) 

246 return self._sessions[root_uri] 

247 

248 def _make_session(self, rpath: ResourcePath, persist: bool) -> requests.Session: 

249 """Make a new session configured from values from the environment.""" 

250 session = requests.Session() 

251 root_uri = str(rpath.root_uri()) 

252 log.debug("Creating new HTTP session for endpoint %s (persist connection=%s)...", root_uri, persist) 

253 

254 retries = Retry( 

255 # Total number of retries to allow. Takes precedence over other 

256 # counts. 

257 total=3, 

258 # How many connection-related errors to retry on. 

259 connect=3, 

260 # How many times to retry on read errors. 

261 read=3, 

262 # Backoff factor to apply between attempts after the second try 

263 # (seconds) 

264 backoff_factor=5.0 + random.random(), 

265 # How many times to retry on bad status codes 

266 status=3, 

267 # HTTP status codes that we should force a retry on 

268 status_forcelist=[ 

269 requests.codes.too_many_requests, # 429 

270 requests.codes.internal_server_error, # 500 

271 requests.codes.bad_gateway, # 502 

272 requests.codes.service_unavailable, # 503 

273 requests.codes.gateway_timeout, # 504 

274 ], 

275 ) 

276 

277 # Persist a single connection to the front end server, if required 

278 num_connections = 1 if persist else 0 

279 session.mount( 

280 root_uri, 

281 HTTPAdapter( 

282 pool_connections=1, pool_maxsize=num_connections, pool_block=False, max_retries=retries 

283 ), 

284 ) 

285 

286 # Prevent persisting connections to back-end servers which may vary 

287 # from request to request. Systematically persisting connections to 

288 # those servers may exhaust their capabilities when there are thousands 

289 # of simultaneous clients 

290 session.mount( 

291 f"{rpath.scheme}://", 

292 HTTPAdapter(pool_connections=1, pool_maxsize=0, pool_block=False, max_retries=retries), 

293 ) 

294 

295 # If the remote endpoint don't use secure HTTP we dont include bearer 

296 # tokens in the requests nor need to authenticate the remove server. 

297 if rpath.scheme != "https": 

298 return session 

299 

300 # Should we use a specific CA cert bundle for authenticating the 

301 # server? 

302 session.verify = True 

303 if ca_bundle := os.getenv("LSST_HTTP_CACERT_BUNDLE"): 

304 session.verify = ca_bundle 

305 

306 # Should we use bearer tokens for client authentication? 

307 if token := os.getenv("LSST_HTTP_AUTH_BEARER_TOKEN"): 

308 log.debug("... using bearer token authentication") 

309 session.auth = BearerTokenAuth(token) 

310 return session 

311 

312 # Should we instead use client certificate and private key? If so, both 

313 # LSST_HTTP_AUTH_CLIENT_CERT and LSST_HTTP_AUTH_CLIENT_KEY must be 

314 # initialized. 

315 client_cert = os.getenv("LSST_HTTP_AUTH_CLIENT_CERT") 

316 client_key = os.getenv("LSST_HTTP_AUTH_CLIENT_KEY") 

317 if client_cert and client_key: 

318 if not _is_protected(client_key): 

319 raise PermissionError( 

320 f"Private key file at {client_key} must be protected for access only by its owner" 

321 ) 

322 log.debug("... using client certificate authentication.") 

323 session.cert = (client_cert, client_key) 

324 return session 

325 

326 if client_cert: 

327 # Only the client certificate was provided. 

328 raise ValueError( 

329 "Environment variable LSST_HTTP_AUTH_CLIENT_KEY must be set to client private key file path" 

330 ) 

331 

332 if client_key: 

333 # Only the client private key was provided. 

334 raise ValueError( 

335 "Environment variable LSST_HTTP_AUTH_CLIENT_CERT must be set to client certificate file path" 

336 ) 

337 

338 log.debug( 

339 "Neither LSST_HTTP_AUTH_BEARER_TOKEN nor (LSST_HTTP_AUTH_CLIENT_CERT and " 

340 "LSST_HTTP_AUTH_CLIENT_KEY) are initialized. Client authentication is disabled." 

341 ) 

342 return session 

343 

344 

345class HttpResourcePath(ResourcePath): 

346 """General HTTP(S) resource. 

347 

348 Notes 

349 ----- 

350 In order to configure the behavior of the object, one environment variable 

351 is inspected: 

352 

353 - LSST_HTTP_PUT_SEND_EXPECT_HEADER: if set (with any value), a 

354 "Expect: 100-Continue" header will be added to all HTTP PUT requests. 

355 This header is required by some servers to detect if the client 

356 knows how to handle redirections. In case of redirection, the body 

357 of the PUT request is sent to the redirected location and not to 

358 the front end server. 

359 """ 

360 

361 _is_webdav: Optional[bool] = None 

362 _sessions_store = SessionStore() 

363 _put_sessions_store = SessionStore() 

364 

365 # Use a session exclusively for PUT requests and another session for 

366 # all other requests. PUT requests may be redirected and in that case 

367 # the server may close the persisted connection. If that is the case 

368 # only the connection persisted for PUT requests will be closed and 

369 # the other persisted connection will be kept alive and reused for 

370 # other requests. 

371 

372 @property 

373 def session(self) -> requests.Session: 

374 """Client session to address remote resource for all HTTP methods but 

375 PUT. 

376 """ 

377 if hasattr(self, "_session"): 

378 return self._session 

379 

380 self._session: requests.Session = self._sessions_store.get(self) 

381 return self._session 

382 

383 @property 

384 def put_session(self) -> requests.Session: 

385 """Client session for uploading data to the remote resource.""" 

386 if hasattr(self, "_put_session"): 

387 return self._put_session 

388 

389 self._put_session: requests.Session = self._put_sessions_store.get(self) 

390 return self._put_session 

391 

392 @property 

393 def is_webdav_endpoint(self) -> bool: 

394 """Check if the current endpoint implements WebDAV features. 

395 

396 This is stored per URI but cached by root so there is 

397 only one check per hostname. 

398 """ 

399 if self._is_webdav is not None: 

400 return self._is_webdav 

401 

402 self._is_webdav = _is_webdav_endpoint(self.root_uri()) 

403 return self._is_webdav 

404 

405 def exists(self) -> bool: 

406 """Check that a remote HTTP resource exists.""" 

407 log.debug("Checking if resource exists: %s", self.geturl()) 

408 if not self.is_webdav_endpoint: 

409 # The remote is a plain HTTP server. Let's attempt a HEAD 

410 # request, even if the behavior for such a request against a 

411 # directory is not specified, so it depends on the server 

412 # implementation. 

413 resp = self.session.head(self.geturl(), timeout=TIMEOUT, allow_redirects=True, stream=True) 

414 return resp.status_code == requests.codes.ok # 200 

415 

416 # The remote endpoint is a webDAV server: send a PROPFIND request 

417 # requesting only the 'getlastmodified' property. 

418 request_body = ( 

419 """<?xml version="1.0" encoding="utf-8" ?>""" 

420 """<D:propfind xmlns:D="DAV:"><D:prop><D:getlastmodified/></D:prop></D:propfind>""" 

421 ) 

422 resp = self._propfind(request_body) 

423 if resp.status_code == requests.codes.multi_status: # 207 

424 # Retrieve the status of the first and only element in the response 

425 propfind_resp = _parse_propfind_response_body(resp.text)[0] 

426 return propfind_resp.status_code == requests.codes.ok 

427 elif resp.status_code == requests.codes.not_found: # 404 

428 return False 

429 else: 

430 raise ValueError( 

431 f"Unexpected status received for PROPFIND request for {self}: {resp.status_code}" 

432 ) 

433 

434 def size(self) -> int: 

435 """Return the size of the remote resource in bytes.""" 

436 if self.dirLike: 

437 return 0 

438 

439 if not self.is_webdav_endpoint: 

440 # The remote is a plain HTTP server. Send a HEAD request to 

441 # retrieve the size of the resource. 

442 resp = self.session.head(self.geturl(), timeout=TIMEOUT, allow_redirects=True, stream=True) 

443 if resp.status_code == requests.codes.ok: # 200 

444 if "Content-Length" in resp.headers: 

445 return int(resp.headers["Content-Length"]) 

446 else: 

447 raise ValueError( 

448 f"Response to HEAD request to {self} does not contain 'Content-Length' header" 

449 ) 

450 elif resp.status_code == requests.codes.not_found: 

451 raise FileNotFoundError(f"Resource {self} does not exist, status code: {resp.status_code}") 

452 else: 

453 raise ValueError( 

454 f"Unexpected response for HEAD request for {self}, status code: {resp.status_code}" 

455 ) 

456 

457 # The remote is a webDAV server: send a PROPFIND request to retrieve 

458 # the 'getcontentlength' property of the resource. 

459 request_body = ( 

460 """<?xml version="1.0" encoding="utf-8" ?>""" 

461 """<D:propfind xmlns:D="DAV:"><D:prop><D:getcontentlength/></D:prop></D:propfind>""" 

462 ) 

463 resp = self._propfind(body=request_body) 

464 if resp.status_code == requests.codes.multi_status: # 207 

465 # Parse the response body and retrieve the 'getcontentlength' 

466 # property 

467 propfind_resp = _parse_propfind_response_body(resp.text)[0] 

468 if propfind_resp.status_code == requests.codes.ok: # 200 

469 return propfind_resp.getcontentlength 

470 else: 

471 raise FileNotFoundError(f"Resource {self} does not exist") 

472 elif resp.status_code == requests.codes.not_found: 

473 raise FileNotFoundError(f"Resource {self} does not exist, status code: {resp.status_code}") 

474 else: 

475 raise ValueError( 

476 f"Unexpected response for PROPFIND request for {self}, status code: {resp.status_code}" 

477 ) 

478 

479 def mkdir(self) -> None: 

480 """Create the directory resource if it does not already exist.""" 

481 # Creating directories is only available on WebDAV backends. 

482 if not self.is_webdav_endpoint: 

483 raise NotImplementedError( 

484 f"Creation of directory {self} is not implemented by plain HTTP servers" 

485 ) 

486 

487 if not self.dirLike: 

488 raise NotADirectoryError(f"Can not create a 'directory' for file-like URI {self}") 

489 

490 if not self.exists(): 

491 # We need to test the absence of the parent directory, 

492 # but also if parent URL is different from self URL, 

493 # otherwise we could be stuck in a recursive loop 

494 # where self == parent. 

495 if not self.parent().exists() and self.parent().geturl() != self.geturl(): 

496 self.parent().mkdir() 

497 

498 log.debug("Creating new directory: %s", self.geturl()) 

499 self._mkcol() 

500 

501 def remove(self) -> None: 

502 """Remove the resource.""" 

503 self._delete() 

504 

505 def read(self, size: int = -1) -> bytes: 

506 """Open the resource and return the contents in bytes. 

507 

508 Parameters 

509 ---------- 

510 size : `int`, optional 

511 The number of bytes to read. Negative or omitted indicates 

512 that all data should be read. 

513 """ 

514 log.debug("Reading from remote resource: %s", self.geturl()) 

515 stream = True if size > 0 else False 

516 with time_this(log, msg="Read from remote resource %s", args=(self,)): 

517 resp = self.session.get(self.geturl(), stream=stream, timeout=TIMEOUT) 

518 

519 if resp.status_code != requests.codes.ok: # 200 

520 raise FileNotFoundError(f"Unable to read resource {self}; status code: {resp.status_code}") 

521 if not stream: 

522 return resp.content 

523 else: 

524 return next(resp.iter_content(chunk_size=size)) 

525 

526 def write(self, data: bytes, overwrite: bool = True) -> None: 

527 """Write the supplied bytes to the new resource. 

528 

529 Parameters 

530 ---------- 

531 data : `bytes` 

532 The bytes to write to the resource. The entire contents of the 

533 resource will be replaced. 

534 overwrite : `bool`, optional 

535 If `True` the resource will be overwritten if it exists. Otherwise 

536 the write will fail. 

537 """ 

538 log.debug("Writing to remote resource: %s", self.geturl()) 

539 if not overwrite: 

540 if self.exists(): 

541 raise FileExistsError(f"Remote resource {self} exists and overwrite has been disabled") 

542 

543 # Ensure the parent directory exists 

544 self.parent().mkdir() 

545 

546 # Upload the data 

547 with time_this(log, msg="Write to remote %s (%d bytes)", args=(self, len(data))): 

548 self._put(data=data) 

549 

550 def transfer_from( 

551 self, 

552 src: ResourcePath, 

553 transfer: str = "copy", 

554 overwrite: bool = False, 

555 transaction: Optional[TransactionProtocol] = None, 

556 ) -> None: 

557 """Transfer the current resource to a Webdav repository. 

558 

559 Parameters 

560 ---------- 

561 src : `ResourcePath` 

562 Source URI. 

563 transfer : `str` 

564 Mode to use for transferring the resource. Supports the following 

565 options: copy. 

566 transaction : `~lsst.resources.utils.TransactionProtocol`, optional 

567 Currently unused. 

568 """ 

569 # Fail early to prevent delays if remote resources are requested 

570 if transfer not in self.transferModes: 

571 raise ValueError(f"Transfer mode {transfer} not supported by URI scheme {self.scheme}") 

572 

573 # Existence checks cost time so do not call this unless we know 

574 # that debugging is enabled. 

575 if log.isEnabledFor(logging.DEBUG): 

576 log.debug( 

577 "Transferring %s [exists: %s] -> %s [exists: %s] (transfer=%s)", 

578 src, 

579 src.exists(), 

580 self, 

581 self.exists(), 

582 transfer, 

583 ) 

584 

585 # Short circuit immediately if the URIs are identical. 

586 if self == src: 

587 log.debug( 

588 "Target and destination URIs are identical: %s, returning immediately." 

589 " No further action required.", 

590 self, 

591 ) 

592 return 

593 

594 if self.exists() and not overwrite: 

595 raise FileExistsError(f"Destination path {self} already exists.") 

596 

597 if transfer == "auto": 

598 transfer = self.transferDefault 

599 

600 # We can use webDAV 'COPY' or 'MOVE' if both the current and source 

601 # resources are located in the same server. 

602 if isinstance(src, type(self)) and self.root_uri() == src.root_uri() and self.is_webdav_endpoint: 

603 with time_this(log, msg="Transfer from %s to %s directly", args=(src, self)): 

604 return self._move(src) if transfer == "move" else self._copy(src) 

605 

606 # For resources of different classes or for plain HTTP resources we can 

607 # perform the copy or move operation by downloading to a local file 

608 # and uploading to the destination. 

609 with time_this(log, msg="Transfer from %s to %s via local copy", args=(src, self)): 

610 self._copy_via_local(src) 

611 

612 # This was an explicit move, try to remove the source. 

613 if transfer == "move": 

614 src.remove() 

615 

616 def _as_local(self) -> Tuple[str, bool]: 

617 """Download object over HTTP and place in temporary directory. 

618 

619 Returns 

620 ------- 

621 path : `str` 

622 Path to local temporary file. 

623 temporary : `bool` 

624 Always returns `True`. This is always a temporary file. 

625 """ 

626 resp = self.session.get(self.geturl(), stream=True, timeout=TIMEOUT) 

627 if resp.status_code != requests.codes.ok: 

628 raise FileNotFoundError(f"Unable to download resource {self}; status code: {resp.status_code}") 

629 

630 tmpdir, buffering = _get_temp_dir() 

631 with tempfile.NamedTemporaryFile( 

632 suffix=self.getExtension(), buffering=buffering, dir=tmpdir, delete=False 

633 ) as tmpFile: 

634 with time_this( 

635 log, 

636 msg="Downloading %s [length=%s] to local file %s [chunk_size=%d]", 

637 args=(self, resp.headers.get("Content-Length"), tmpFile.name, buffering), 

638 ): 

639 for chunk in resp.iter_content(chunk_size=buffering): 

640 tmpFile.write(chunk) 

641 

642 return tmpFile.name, True 

643 

644 def _send_webdav_request( 

645 self, method: str, url: Optional[str] = None, headers: dict[str, str] = {}, body: Optional[str] = None 

646 ) -> requests.Response: 

647 """Send a webDAV request and correctly handle redirects. 

648 

649 Parameters 

650 ---------- 

651 method : `str` 

652 The mthod of the HTTP request to be sent, e.g. PROPFIND, MKCOL. 

653 headers : `dict`, optional 

654 A dictionary of key-value pairs (both strings) to include as 

655 headers in the request. 

656 body: `str`, optional 

657 The body of the request. 

658 

659 Notes 

660 ----- 

661 This way of sending webDAV requests is necessary for handling 

662 redirection ourselves, since the 'requests' package changes the method 

663 of the redirected request when the server responds with status 302 and 

664 the method of the original request is not HEAD (which is the case for 

665 webDAV requests). 

666 

667 That means that when the webDAV server we interact with responds with 

668 a redirection to a PROPFIND or MKCOL request, the request gets 

669 converted to a GET request when sent to the redirected location. 

670 

671 See `requests.sessions.SessionRedirectMixin.rebuild_method()` in 

672 https://github.com/psf/requests/blob/main/requests/sessions.py 

673 

674 This behavior of the 'requests' package is meant to be compatible with 

675 what is specified in RFC 9110: 

676 

677 https://www.rfc-editor.org/rfc/rfc9110#name-302-found 

678 

679 For our purposes, we do need to follow the redirection and send a new 

680 request using the same HTTP verb. 

681 """ 

682 if url is None: 

683 url = self.geturl() 

684 

685 for _ in range(max_redirects := 5): 

686 resp = self.session.request( 

687 method, url, data=body, headers=headers, stream=True, timeout=TIMEOUT, allow_redirects=False 

688 ) 

689 if resp.is_redirect: 

690 url = resp.headers["Location"] 

691 else: 

692 return resp 

693 

694 # We reached the maximum allowed number of redirects. Stop trying. 

695 raise ValueError( 

696 f"Could not get a response to {method} request for {self} after {max_redirects} redirections" 

697 ) 

698 

699 def _propfind(self, body: Optional[str] = None, depth: str = "0") -> requests.Response: 

700 """Send a PROPFIND webDAV request and return the response. 

701 

702 Parameters 

703 ---------- 

704 body : `str`, optional 

705 The body of the PROPFIND request to send to the server. If 

706 provided, it is expected to be a XML document. 

707 depth : `str`, optional 

708 The value of the 'Depth' header to include in the request. 

709 

710 Returns 

711 ------- 

712 response : `requests.Response` 

713 Response to the PROPFIND request. 

714 """ 

715 headers = { 

716 "Depth": depth, 

717 } 

718 if body is not None: 

719 headers.update( 

720 {"Content-Type": 'application/xml; charset="utf-8"', "Content-Length": str(len(body))} 

721 ) 

722 return self._send_webdav_request("PROPFIND", headers=headers, body=body) 

723 

724 def _options(self) -> requests.Response: 

725 """Send a OPTIONS webDAV request for this resource.""" 

726 

727 return self._send_webdav_request("OPTIONS") 

728 

729 def _head(self) -> requests.Response: 

730 """Send a HEAD webDAV request for this resource.""" 

731 

732 return self._send_webdav_request("HEAD") 

733 

734 def _mkcol(self) -> None: 

735 """Send a MKCOL webDAV request to create a collection. The collection 

736 may already exist. 

737 """ 

738 resp = self._send_webdav_request("MKCOL") 

739 if resp.status_code == requests.codes.created: # 201 

740 return 

741 

742 if resp.status_code == requests.codes.method_not_allowed: # 405 

743 # The remote directory already exists 

744 log.debug("Can not create directory: %s may already exist: skipping.", self.geturl()) 

745 else: 

746 raise ValueError(f"Can not create directory {self}, status code: {resp.status_code}") 

747 

748 def _delete(self) -> None: 

749 """Send a DELETE webDAV request for this resource.""" 

750 

751 log.debug("Deleting %s ...", self.geturl()) 

752 

753 # If this is a directory, ensure the remote is a webDAV server because 

754 # plain HTTP servers don't support DELETE requests on non-file 

755 # paths. 

756 if self.dirLike and not self.is_webdav_endpoint: 

757 raise NotImplementedError( 

758 f"Deletion of directory {self} is not implemented by plain HTTP servers" 

759 ) 

760 

761 resp = self._send_webdav_request("DELETE") 

762 if resp.status_code in (requests.codes.ok, requests.codes.accepted, requests.codes.no_content): 

763 return 

764 elif resp.status_code == requests.codes.not_found: 

765 raise FileNotFoundError(f"Resource {self} does not exist, status code: {resp.status_code}") 

766 else: 

767 # TODO: the response to a DELETE request against a webDAV server 

768 # may be multistatus. If so, we need to parse the reponse body to 

769 # determine more precisely the reason of the failure (e.g. a lock) 

770 # and provide a more helpful error message. 

771 raise ValueError(f"Unable to delete resource {self}; status code: {resp.status_code}") 

772 

773 def _copy_via_local(self, src: ResourcePath) -> None: 

774 """Replace the contents of this resource with the contents of a remote 

775 resource by using a local temporary file. 

776 

777 Parameters 

778 ---------- 

779 src : `HttpResourcePath` 

780 The source of the contents to copy to `self`. 

781 """ 

782 with src.as_local() as local_uri: 

783 with open(local_uri.ospath, "rb") as f: 

784 with time_this(log, msg="Transfer from %s to %s via local file", args=(src, self)): 

785 self._put(data=f) 

786 

787 def _copy_or_move(self, method: str, src: HttpResourcePath) -> None: 

788 """Send a COPY or MOVE webDAV request to copy or replace the contents 

789 of this resource with the contents of another resource located in the 

790 same server. 

791 

792 Parameters 

793 ---------- 

794 method : `str` 

795 The method to perform. Valid values are "COPY" or "MOVE" (in 

796 uppercase). 

797 

798 src : `HttpResourcePath` 

799 The source of the contents to move to `self`. 

800 """ 

801 headers = {"Destination": self.geturl()} 

802 resp = self._send_webdav_request(method, url=src.geturl(), headers=headers) 

803 if resp.status_code in (requests.codes.created, requests.codes.no_content): 

804 return 

805 

806 if resp.status_code == requests.codes.multi_status: 

807 tree = eTree.fromstring(resp.content) 

808 status_element = tree.find("./{DAV:}response/{DAV:}status") 

809 status = status_element.text if status_element is not None else "unknown" 

810 error = tree.find("./{DAV:}response/{DAV:}error") 

811 raise ValueError(f"{method} returned multistatus reponse with status {status} and error {error}") 

812 else: 

813 raise ValueError( 

814 f"{method} operation from {src} to {self} failed, status code: {resp.status_code}" 

815 ) 

816 

817 def _copy(self, src: HttpResourcePath) -> None: 

818 """Send a COPY webDAV request to replace the contents of this resource 

819 (if any) with the contents of another resource located in the same 

820 server. 

821 

822 Parameters 

823 ---------- 

824 src : `HttpResourcePath` 

825 The source of the contents to copy to `self`. 

826 """ 

827 # Neither dCache nor XrootD currently implement the COPY 

828 # webDAV method as documented in 

829 # http://www.webdav.org/specs/rfc4918.html#METHOD_COPY 

830 # (See issues DM-37603 and DM-37651 for details) 

831 # 

832 # For the time being, we use a temporary local file to 

833 # perform the copy client side. 

834 # TODO: when those 2 issues above are solved remove the 3 lines below. 

835 must_use_local = True 

836 if must_use_local: 

837 return self._copy_via_local(src) 

838 

839 return self._copy_or_move("COPY", src) 

840 

841 def _move(self, src: HttpResourcePath) -> None: 

842 """Send a MOVE webDAV request to replace the contents of this resource 

843 with the contents of another resource located in the same server. 

844 

845 Parameters 

846 ---------- 

847 src : `HttpResourcePath` 

848 The source of the contents to move to `self`. 

849 """ 

850 return self._copy_or_move("MOVE", src) 

851 

852 def _put(self, data: Union[BinaryIO, bytes]) -> None: 

853 """Perform an HTTP PUT request and handle redirection. 

854 

855 Parameters 

856 ---------- 

857 data : `Union[BinaryIO, bytes]` 

858 The data to be included in the body of the PUT request. 

859 """ 

860 # Retrieve the final URL for this upload by sending a PUT request with 

861 # no content. Follow a single server redirection to retrieve the 

862 # final URL. 

863 headers = {"Content-Length": "0"} 

864 if _SEND_EXPECT_HEADER_ON_PUT: 

865 headers["Expect"] = "100-continue" 

866 

867 url = self.geturl() 

868 log.debug("Sending empty PUT request to %s", url) 

869 resp = self.session.request( 

870 "PUT", url, data=None, headers=headers, stream=True, timeout=TIMEOUT, allow_redirects=False 

871 ) 

872 if resp.is_redirect: 

873 url = resp.headers["Location"] 

874 

875 # Send data to its final destination using the PUT session 

876 log.debug("Uploading data to %s", url) 

877 resp = self.put_session.put(url, data=data, timeout=TIMEOUT, allow_redirects=False, stream=True) 

878 if resp.status_code not in (requests.codes.ok, requests.codes.created, requests.codes.no_content): 

879 raise ValueError(f"Can not write file {self}, status code: {resp.status_code}") 

880 

881 @contextlib.contextmanager 

882 def _openImpl( 

883 self, 

884 mode: str = "r", 

885 *, 

886 encoding: Optional[str] = None, 

887 ) -> Iterator[ResourceHandleProtocol]: 

888 resp = self._head() 

889 accepts_range = resp.status_code == requests.codes.ok and resp.headers.get("Accept-Ranges") == "bytes" 

890 handle: ResourceHandleProtocol 

891 if mode in ("rb", "r") and accepts_range: 

892 handle = HttpReadResourceHandle( 

893 mode, log, url=self.geturl(), session=self.session, timeout=TIMEOUT 

894 ) 

895 if mode == "r": 

896 # cast because the protocol is compatible, but does not have 

897 # BytesIO in the inheritance tree 

898 yield io.TextIOWrapper(cast(io.BytesIO, handle), encoding=encoding) 

899 else: 

900 yield handle 

901 else: 

902 with super()._openImpl(mode, encoding=encoding) as http_handle: 

903 yield http_handle 

904 

905 

906def _dump_response(resp: requests.Response) -> None: 

907 """Log the contents of a HTTP or webDAV request and its response. 

908 

909 Parameters 

910 ---------- 

911 resp : `requests.Response` 

912 The response to log. 

913 

914 Notes 

915 ----- 

916 Intended for development purposes only. 

917 """ 

918 log.debug("-----------------------------------------------") 

919 log.debug("Request") 

920 log.debug(" method=%s", resp.request.method) 

921 log.debug(" URL=%s", resp.request.url) 

922 log.debug(" headers=%s", resp.request.headers) 

923 if resp.request.method == "PUT": 

924 log.debug(" body=<data>") 

925 elif resp.request.body is None: 

926 log.debug(" body=<empty>") 

927 else: 

928 log.debug(" body=%r", resp.request.body[:120]) 

929 

930 log.debug("Response:") 

931 log.debug(" status_code=%d", resp.status_code) 

932 log.debug(" headers=%s", resp.headers) 

933 if not resp.content: 

934 log.debug(" body=<empty>") 

935 elif "Content-Type" in resp.headers and resp.headers["Content-Type"] == "text/plain": 

936 log.debug(" body=%r", resp.content) 

937 else: 

938 log.debug(" body=%r", resp.content[:80]) 

939 

940 

941def _is_protected(filepath: str) -> bool: 

942 """Return true if the permissions of file at filepath only allow for access 

943 by its owner. 

944 

945 Parameters 

946 ---------- 

947 filepath : `str` 

948 Path of a local file. 

949 """ 

950 if not os.path.isfile(filepath): 

951 return False 

952 mode = stat.S_IMODE(os.stat(filepath).st_mode) 

953 owner_accessible = bool(mode & stat.S_IRWXU) 

954 group_accessible = bool(mode & stat.S_IRWXG) 

955 other_accessible = bool(mode & stat.S_IRWXO) 

956 return owner_accessible and not group_accessible and not other_accessible 

957 

958 

959def _parse_propfind_response_body(body: str) -> List[PropfindResponse]: 

960 """Parse the XML-encoded contents of the response body to a webDAV PROPFIND 

961 request. 

962 

963 Parameters 

964 ---------- 

965 body : `str` 

966 XML-encoded response body to a PROPFIND request 

967 

968 Returns 

969 ------- 

970 responses : `List[PropfindResponse]` 

971 

972 Notes 

973 ----- 

974 Is is expected that there is at least one reponse in `body`, otherwise 

975 this function raises. 

976 """ 

977 # A response body to a PROPFIND request is of the form (indented for 

978 # readability): 

979 # 

980 # <?xml version="1.0" encoding="UTF-8"?> 

981 # <D:multistatus xmlns:D="DAV:"> 

982 # <D:response> 

983 # <D:href>path/to/resource</D:href> 

984 # <D:propstat> 

985 # <D:prop> 

986 # <D:resourcetype> 

987 # <D:collection xmlns:D="DAV:"/> 

988 # </D:resourcetype> 

989 # <D:getlastmodified> 

990 # Fri, 27 Jan 2 023 13:59:01 GMT 

991 # </D:getlastmodified> 

992 # <D:getcontentlength> 

993 # 12345 

994 # </D:getcontentlength> 

995 # </D:prop> 

996 # <D:status> 

997 # HTTP/1.1 200 OK 

998 # </D:status> 

999 # </D:propstat> 

1000 # </D:response> 

1001 # <D:response> 

1002 # ... 

1003 # </D:response> 

1004 # <D:response> 

1005 # ... 

1006 # </D:response> 

1007 # </D:multistatus> 

1008 

1009 # Scan all the 'response' elements and extract the relevant properties 

1010 responses = [] 

1011 multistatus = eTree.fromstring(body.strip()) 

1012 for response in multistatus.findall("./{DAV:}response"): 

1013 responses.append(PropfindResponse(response)) 

1014 

1015 if len(responses) == 0: 

1016 # Could not parse the body 

1017 raise ValueError(f"Unable to parse response for PROPFIND request: {response}") 

1018 else: 

1019 return responses 

1020 

1021 

1022class PropfindResponse: 

1023 """Helper class to contain the parsed response to a PROFIND request for 

1024 a single resource. 

1025 """ 

1026 

1027 # Regular expression to extract the status code and reason from 

1028 # the 'status' element of a PROPFIND response. 

1029 _status_rex = re.compile(r"^HTTP/.* +(?P<status_code>\d{3}) +(?P<reason>.*)$", re.IGNORECASE) 

1030 

1031 def __init__(self, response: Optional[eTree.Element]): 

1032 self.status_code: int = 0 

1033 self.reason: str = "" 

1034 self.href: str = "" 

1035 self.collection: bool = False 

1036 self.getlastmodified: str = "" 

1037 self.getcontentlength: int = 0 

1038 

1039 if response is not None: 

1040 self._parse(response) 

1041 

1042 def _parse(self, response: eTree.Element) -> None: 

1043 element = response.find("./{DAV:}propstat/{DAV:}status") 

1044 if element is not None: 

1045 # We need to use "str(element.text)"" instead of "element.text" to 

1046 # keep mypy happy 

1047 if match := self._status_rex.match(str(element.text)): 

1048 self.status_code = int(match["status_code"]) 

1049 self.reason = match["reason"] 

1050 

1051 # Parse "href" 

1052 element = response.find("./{DAV:}href") 

1053 if element is not None: 

1054 self.href = str(element.text).strip() 

1055 

1056 # Parse "collection" 

1057 element = response.find("./{DAV:}propstat/{DAV:}prop/{DAV:}resourcetype/{DAV:}collection") 

1058 if element is not None: 

1059 self.collection = True 

1060 

1061 # Parse "getlastmodified" 

1062 element = response.find("./{DAV:}propstat/{DAV:}prop/{DAV:}getlastmodified") 

1063 if element is not None: 

1064 self.getlastmodified = str(element.text).strip() 

1065 

1066 # Parse "getcontentlength" 

1067 element = response.find("./{DAV:}propstat/{DAV:}prop/{DAV:}getcontentlength") 

1068 if element is not None: 

1069 self.getcontentlength = int(str(element.text).strip())