Coverage for python/lsst/resources/http.py: 17%
503 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-03-25 02:02 -0700
« prev ^ index » next coverage.py v6.5.0, created at 2023-03-25 02:02 -0700
1# This file is part of lsst-resources.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# Use of this source code is governed by a 3-clause BSD-style
10# license that can be found in the LICENSE file.
12from __future__ import annotations
14__all__ = ("HttpResourcePath",)
16import contextlib
17import functools
18import io
19import logging
20import os
21import os.path
22import random
23import re
24import stat
25import tempfile
26import xml.etree.ElementTree as eTree
27from typing import TYPE_CHECKING, BinaryIO, Iterator, List, Optional, Tuple, Union, cast
29import requests
30from astropy import units as u
31from lsst.utils.timer import time_this
32from requests.adapters import HTTPAdapter
33from requests.auth import AuthBase
34from urllib3.util.retry import Retry
36from ._resourceHandles import ResourceHandleProtocol
37from ._resourceHandles._httpResourceHandle import HttpReadResourceHandle
38from ._resourcePath import ResourcePath
40if TYPE_CHECKING: 40 ↛ 41line 40 didn't jump to line 41, because the condition on line 40 was never true
41 from .utils import TransactionProtocol
43log = logging.getLogger(__name__)
46# Default timeouts for all HTTP requests, in seconds.
47DEFAULT_TIMEOUT_CONNECT = 300
48DEFAULT_TIMEOUT_READ = 1500
50# Default number of connections to persist with both the front end and back end
51# servers.
52DEFAULT_FRONTEND_PERSISTENT_CONNECTIONS = "2"
53DEFAULT_BACKEND_PERSISTENT_CONNECTIONS = "1"
55# Accepted digest algorithms
56ACCEPTED_DIGESTS = ("adler32", "md5", "sha-256", "sha-512")
59class HttpResourcePathConfig:
60 """Configuration class to encapsulate the configurable items used by class
61 HttpResourcePath.
62 """
64 _front_end_connections: Optional[int] = None
65 _back_end_connections: Optional[int] = None
66 _digest_algorithm: Optional[str] = None
67 _send_expect_on_put: Optional[bool] = None
68 _timeout: Optional[tuple[int, int]] = None
70 @property
71 def front_end_connections(self) -> int:
72 """Number of persistent connections to the front end server."""
74 if self._front_end_connections is not None: 74 ↛ 75line 74 didn't jump to line 75, because the condition on line 74 was never true
75 return self._front_end_connections
77 self._front_end_connections = int(
78 os.environ.get(
79 "LSST_HTTP_FRONTEND_PERSISTENT_CONNECTIONS", DEFAULT_FRONTEND_PERSISTENT_CONNECTIONS
80 )
81 )
82 return self._front_end_connections
84 @property
85 def back_end_connections(self) -> int:
86 """Number of persistent connections to the back end servers."""
88 if self._back_end_connections is not None: 88 ↛ 89line 88 didn't jump to line 89, because the condition on line 88 was never true
89 return self._back_end_connections
91 self._back_end_connections = int(
92 os.environ.get("LSST_HTTP_BACKEND_PERSISTENT_CONNECTIONS", DEFAULT_BACKEND_PERSISTENT_CONNECTIONS)
93 )
94 return self._back_end_connections
96 @property
97 def digest_algorithm(self) -> str:
98 """Algorithm to ask the server to use for computing and recording
99 digests of each file contents in PUT requests.
101 Returns
102 -------
103 digest_algorithm: `str`
104 The name of a digest algorithm or the empty string if no algotihm
105 is configured.
106 """
108 if self._digest_algorithm is not None:
109 return self._digest_algorithm
111 digest = os.environ.get("LSST_HTTP_DIGEST", "").lower()
112 if digest not in ACCEPTED_DIGESTS:
113 digest = ""
115 self._digest_algorithm = digest
116 return self._digest_algorithm
118 @property
119 def send_expect_on_put(self) -> bool:
120 """Return True if a "Expect: 100-continue" header is to be sent to
121 the server on each PUT request.
123 Some servers (e.g. dCache) uses this information as an indication that
124 the client knows how to handle redirects to the specific server that
125 will actually receive the data for PUT requests.
126 """
128 if self._send_expect_on_put is not None:
129 return self._send_expect_on_put
131 self._send_expect_on_put = "LSST_HTTP_PUT_SEND_EXPECT_HEADER" in os.environ
132 return self._send_expect_on_put
134 @property
135 def timeout(self) -> tuple[int, int]:
136 """Return a tuple with the values of timeouts for connecting to the
137 server and reading its response, respectively. Both values are in
138 seconds.
139 """
141 if self._timeout is not None:
142 return self._timeout
144 self._timeout = (
145 int(os.environ.get("LSST_HTTP_TIMEOUT_CONNECT", DEFAULT_TIMEOUT_CONNECT)),
146 int(os.environ.get("LSST_HTTP_TIMEOUT_READ", DEFAULT_TIMEOUT_READ)),
147 )
148 return self._timeout
151@functools.lru_cache
152def _is_webdav_endpoint(path: Union[ResourcePath, str]) -> bool:
153 """Check whether the remote HTTP endpoint implements WebDAV features.
155 Parameters
156 ----------
157 path : `ResourcePath` or `str`
158 URL to the resource to be checked.
159 Should preferably refer to the root since the status is shared
160 by all paths in that server.
162 Returns
163 -------
164 _is_webdav_endpoint : `bool`
165 True if the endpoint implements WebDAV, False if it doesn't.
166 """
167 log.debug("Detecting HTTP endpoint type for '%s'...", path)
168 try:
169 ca_cert_bundle = os.getenv("LSST_HTTP_CACERT_BUNDLE")
170 verify: Union[bool, str] = ca_cert_bundle if ca_cert_bundle else True
171 resp = requests.options(str(path), verify=verify, stream=False)
173 # Check that "1" is part of the value of the "DAV" header. We don't
174 # use locks, so a server complying to class 1 is enough for our
175 # purposes. All webDAV servers must advertise at least compliance
176 # class "1".
177 #
178 # Compliance classes are documented in
179 # http://www.webdav.org/specs/rfc4918.html#dav.compliance.classes
180 #
181 # Examples of values for header DAV are:
182 # DAV: 1, 2
183 # DAV: 1, <http://apache.org/dav/propset/fs/1>
184 if "DAV" not in resp.headers:
185 return False
186 else:
187 # Convert to str to keep mypy happy
188 compliance_class = str(resp.headers.get("DAV"))
189 return "1" in compliance_class.replace(" ", "").split(",")
190 except requests.exceptions.SSLError as e:
191 log.warning(
192 "Environment variable LSST_HTTP_CACERT_BUNDLE can be used to "
193 "specify a bundle of certificate authorities you trust which are "
194 "not included in the default set of trusted authorities of your "
195 "system."
196 )
197 raise e
200# Tuple (path, block_size) pointing to the location of a local directory
201# to save temporary files and the block size of the underlying file system.
202_TMPDIR: Optional[tuple[str, int]] = None
205def _get_temp_dir() -> tuple[str, int]:
206 """Return the temporary directory path and block size.
208 This function caches its results in _TMPDIR.
209 """
210 global _TMPDIR
211 if _TMPDIR:
212 return _TMPDIR
214 # Use the value of environment variables 'LSST_RESOURCES_TMPDIR' or
215 # 'TMPDIR', if defined. Otherwise use current working directory.
216 tmpdir = os.getcwd()
217 for dir in (os.getenv(v) for v in ("LSST_RESOURCES_TMPDIR", "TMPDIR")):
218 if dir and os.path.isdir(dir):
219 tmpdir = dir
220 break
222 # Compute the block size as 256 blocks of typical size
223 # (i.e. 4096 bytes) or 10 times the file system block size,
224 # whichever is higher. This is a reasonable compromise between
225 # using memory for buffering and the number of system calls
226 # issued to read from or write to temporary files.
227 fsstats = os.statvfs(tmpdir)
228 return (_TMPDIR := (tmpdir, max(10 * fsstats.f_bsize, 256 * 4096)))
231class BearerTokenAuth(AuthBase):
232 """Attach a bearer token 'Authorization' header to each request.
234 Parameters
235 ----------
236 token : `str`
237 Can be either the path to a local protected file which contains the
238 value of the token or the token itself.
239 """
241 def __init__(self, token: str):
242 self._token = self._path = None
243 self._mtime: float = -1.0
244 if not token:
245 return
247 self._token = token
248 if os.path.isfile(token):
249 self._path = os.path.abspath(token)
250 if not _is_protected(self._path):
251 raise PermissionError(
252 f"Bearer token file at {self._path} must be protected for access only by its owner"
253 )
254 self._refresh()
256 def _refresh(self) -> None:
257 """Read the token file (if any) if its modification time is more recent
258 than the last time we read it.
259 """
260 if not self._path:
261 return
263 if (mtime := os.stat(self._path).st_mtime) > self._mtime:
264 log.debug("Reading bearer token file at %s", self._path)
265 self._mtime = mtime
266 with open(self._path) as f:
267 self._token = f.read().rstrip("\n")
269 def __call__(self, req: requests.PreparedRequest) -> requests.PreparedRequest:
270 if self._token:
271 self._refresh()
272 req.headers["Authorization"] = f"Bearer {self._token}"
273 return req
276class SessionStore:
277 """Cache a reusable HTTP client session per endpoint."""
279 def __init__(self, num_pools: int = 10, max_persistent_connections: int = 1) -> None:
280 # Dictionary to store the session associated to a given URI. The key
281 # of the dictionary is a root URI and the value is the session.
282 self._sessions: dict[str, requests.Session] = {}
284 # Number of connection pools to keep: there is one pool per remote
285 # host. See documentation of urllib3 PoolManager class:
286 # https://urllib3.readthedocs.io
287 self._num_pools = num_pools
289 # Maximum number of connections per remote host to persist in each
290 # connection pool. See urllib3 Advanced Usage documentation:
291 # https://urllib3.readthedocs.io/en/stable/advanced-usage.html
292 self._max_persistent_connections = max_persistent_connections
294 def clear(self) -> None:
295 """Destroy all previously created sessions and attempt to close
296 underlying idle network connections.
297 """
299 # Close all sessions and empty the store. Idle network connections
300 # should be closed as a consequence. We don't have means through
301 # the API exposed by Requests to actually force closing the
302 # underlying open sockets.
303 for session in self._sessions.values():
304 session.close()
306 self._sessions.clear()
308 def get(self, rpath: ResourcePath) -> requests.Session:
309 """Retrieve a session for accessing the remote resource at rpath.
311 Parameters
312 ----------
313 rpath : `ResourcePath`
314 URL to a resource at the remote server for which a session is to
315 be retrieved.
317 Notes
318 -----
319 Once a session is created for a given endpoint it is cached and
320 returned every time a session is requested for any path under that same
321 endpoint. For instance, a single session will be cached and shared
322 for paths "https://www.example.org/path/to/file" and
323 "https://www.example.org/any/other/path".
325 Note that "https://www.example.org" and "https://www.example.org:12345"
326 will have different sessions since the port number is not identical.
328 In order to configure the session, some environment variables are
329 inspected:
331 - LSST_HTTP_CACERT_BUNDLE: path to a .pem file containing the CA
332 certificates to trust when verifying the server's certificate.
334 - LSST_HTTP_AUTH_BEARER_TOKEN: value of a bearer token or path to a
335 local file containing a bearer token to be used as the client
336 authentication mechanism with all requests.
337 The permissions of the token file must be set so that only its
338 owner can access it.
339 If initialized, takes precedence over LSST_HTTP_AUTH_CLIENT_CERT
340 and LSST_HTTP_AUTH_CLIENT_KEY.
342 - LSST_HTTP_AUTH_CLIENT_CERT: path to a .pem file which contains the
343 client certificate for authenticating to the server.
344 If initialized, the variable LSST_HTTP_AUTH_CLIENT_KEY must also be
345 initialized with the path of the client private key file.
346 The permissions of the client private key must be set so that only
347 its owner can access it, at least for reading.
348 """
349 root_uri = str(rpath.root_uri())
350 if root_uri not in self._sessions:
351 # We don't have yet a session for this endpoint: create a new one.
352 self._sessions[root_uri] = self._make_session(rpath)
354 return self._sessions[root_uri]
356 def _make_session(self, rpath: ResourcePath) -> requests.Session:
357 """Make a new session configured from values from the environment."""
358 session = requests.Session()
359 root_uri = str(rpath.root_uri())
360 log.debug("Creating new HTTP session for endpoint %s ...", root_uri)
362 retries = Retry(
363 # Total number of retries to allow. Takes precedence over other
364 # counts.
365 total=6,
366 # How many connection-related errors to retry on.
367 connect=3,
368 # How many times to retry on read errors.
369 read=3,
370 # Backoff factor to apply between attempts after the second try
371 # (seconds)
372 backoff_factor=30 * (1 + random.random()),
373 # How many times to retry on bad status codes.
374 status=5,
375 # Set of uppercased HTTP method verbs that we should retry on.
376 # We only automatically retry idempotent requests.
377 allowed_methods=frozenset(
378 [
379 "COPY",
380 "DELETE",
381 "GET",
382 "HEAD",
383 "MKCOL",
384 "OPTIONS",
385 "PROPFIND",
386 "PUT",
387 ]
388 ),
389 # HTTP status codes that we should force a retry on.
390 status_forcelist=frozenset(
391 [
392 requests.codes.too_many_requests, # 429
393 requests.codes.internal_server_error, # 500
394 requests.codes.bad_gateway, # 502
395 requests.codes.service_unavailable, # 503
396 requests.codes.gateway_timeout, # 504
397 ]
398 ),
399 # Whether to respect Retry-After header on status codes defined
400 # above.
401 respect_retry_after_header=True,
402 )
404 # Persist the specified number of connections to the front end server.
405 session.mount(
406 root_uri,
407 HTTPAdapter(
408 pool_connections=self._num_pools,
409 pool_maxsize=self._max_persistent_connections,
410 pool_block=False,
411 max_retries=retries,
412 ),
413 )
415 # Do not persist the connections to back end servers which may vary
416 # from request to request. Systematically persisting connections to
417 # those servers may exhaust their capabilities when there are thousands
418 # of simultaneous clients.
419 session.mount(
420 f"{rpath.scheme}://",
421 HTTPAdapter(
422 pool_connections=self._num_pools,
423 pool_maxsize=0,
424 pool_block=False,
425 max_retries=retries,
426 ),
427 )
429 # If the remote endpoint doesn't use secure HTTP we don't include
430 # bearer tokens in the requests nor need to authenticate the remote
431 # server.
432 if rpath.scheme != "https":
433 return session
435 # Should we use a specific CA cert bundle for authenticating the
436 # server?
437 session.verify = True
438 if ca_bundle := os.getenv("LSST_HTTP_CACERT_BUNDLE"):
439 session.verify = ca_bundle
441 # Should we use bearer tokens for client authentication?
442 if token := os.getenv("LSST_HTTP_AUTH_BEARER_TOKEN"):
443 log.debug("... using bearer token authentication")
444 session.auth = BearerTokenAuth(token)
445 return session
447 # Should we instead use client certificate and private key? If so, both
448 # LSST_HTTP_AUTH_CLIENT_CERT and LSST_HTTP_AUTH_CLIENT_KEY must be
449 # initialized.
450 client_cert = os.getenv("LSST_HTTP_AUTH_CLIENT_CERT")
451 client_key = os.getenv("LSST_HTTP_AUTH_CLIENT_KEY")
452 if client_cert and client_key:
453 if not _is_protected(client_key):
454 raise PermissionError(
455 f"Private key file at {client_key} must be protected for access only by its owner"
456 )
457 log.debug("... using client certificate authentication.")
458 session.cert = (client_cert, client_key)
459 return session
461 if client_cert:
462 # Only the client certificate was provided.
463 raise ValueError(
464 "Environment variable LSST_HTTP_AUTH_CLIENT_KEY must be set to client private key file path"
465 )
467 if client_key:
468 # Only the client private key was provided.
469 raise ValueError(
470 "Environment variable LSST_HTTP_AUTH_CLIENT_CERT must be set to client certificate file path"
471 )
473 log.debug(
474 "Neither LSST_HTTP_AUTH_BEARER_TOKEN nor (LSST_HTTP_AUTH_CLIENT_CERT and "
475 "LSST_HTTP_AUTH_CLIENT_KEY) are initialized. Client authentication is disabled."
476 )
477 return session
480class HttpResourcePath(ResourcePath):
481 """General HTTP(S) resource.
483 Notes
484 -----
485 In order to configure the behavior of instances of this class, the
486 environment variables below are inspected:
488 - LSST_HTTP_PUT_SEND_EXPECT_HEADER: if set (with any value), a
489 "Expect: 100-Continue" header will be added to all HTTP PUT requests.
490 This header is required by some servers to detect if the client
491 knows how to handle redirections. In case of redirection, the body
492 of the PUT request is sent to the redirected location and not to
493 the front end server.
495 - LSST_HTTP_TIMEOUT_CONNECT and LSST_HTTP_TIMEOUT_READ: if set to a
496 numeric value, they are interpreted as the number of seconds to wait
497 for establishing a connection with the server and for reading its
498 response, respectively.
500 - LSST_HTTP_FRONTEND_PERSISTENT_CONNECTIONS and
501 LSST_HTTP_BACKEND_PERSISTENT_CONNECTIONS: contain the maximum number
502 of connections to attempt to persist with both the front end servers
503 and the back end servers.
504 Default values: DEFAULT_FRONTEND_PERSISTENT_CONNECTIONS and
505 DEFAULT_BACKEND_PERSISTENT_CONNECTIONS.
507 - LSST_HTTP_DIGEST: case-insensitive name of the digest algorithm to
508 ask the server to compute for every file's content sent to the server
509 via a PUT request. No digest is requested if this variable is not set
510 or is set to an invalid value.
511 Valid values are those in ACCEPTED_DIGESTS.
512 """
514 _is_webdav: Optional[bool] = None
516 # Configuration items for this class instances.
517 _config = HttpResourcePathConfig()
519 # The session for metadata requests is used for interacting with
520 # the front end servers for requests such as PROPFIND, HEAD, etc. Those
521 # interactions are typically served by the front end servers. We want to
522 # keep the connection to the front end servers open, to reduce the cost
523 # associated to TCP and TLS handshaking for each new request.
524 _metadata_session_store = SessionStore(
525 num_pools=5,
526 max_persistent_connections=_config.front_end_connections,
527 )
529 # The data session is used for interaction with the front end servers which
530 # typically redirect to the back end servers for serving our PUT and GET
531 # requests. We attempt to keep a single connection open with the front end
532 # server, if possible. This depends on how the server behaves and the
533 # kind of request. Some servers close the connection when redirecting
534 # the client to a back end server, for instance when serving a PUT
535 # request.
536 _data_session_store = SessionStore(
537 num_pools=25,
538 max_persistent_connections=_config.back_end_connections,
539 )
541 # Process ID which created the sessions above. We need to store this
542 # to replace sessions created by a parent process and inherited by a
543 # child process after a fork, to avoid confusing the SSL layer.
544 _pid: int = -1
546 @property
547 def metadata_session(self) -> requests.Session:
548 """Client session to send requests which do not require upload or
549 download of data, i.e. mostly metadata requests.
550 """
552 if hasattr(self, "_metadata_session") and self._pid == os.getpid():
553 return self._metadata_session
555 # Reset the store in case it was created by another process and
556 # retrieve a session.
557 self._metadata_session_store.clear()
558 self._pid = os.getpid()
559 self._metadata_session: requests.Session = self._metadata_session_store.get(self)
560 return self._metadata_session
562 @property
563 def data_session(self) -> requests.Session:
564 """Client session for uploading and downloading data."""
566 if hasattr(self, "_data_session") and self._pid == os.getpid():
567 return self._data_session
569 # Reset the store in case it was created by another process and
570 # retrieve a session.
571 self._data_session_store.clear()
572 self._pid = os.getpid()
573 self._data_session: requests.Session = self._data_session_store.get(self)
574 return self._data_session
576 def _clear_sessions(self) -> None:
577 """Internal method to close the socket connections still open. Used
578 only in test suites to avoid warnings.
579 """
580 self._metadata_session_store.clear()
581 self._data_session_store.clear()
583 if hasattr(self, "_metadata_session"):
584 delattr(self, "_metadata_session")
586 if hasattr(self, "_data_session"):
587 delattr(self, "_data_session")
589 @property
590 def is_webdav_endpoint(self) -> bool:
591 """Check if the current endpoint implements WebDAV features.
593 This is stored per URI but cached by root so there is
594 only one check per hostname.
595 """
596 if self._is_webdav is not None:
597 return self._is_webdav
599 self._is_webdav = _is_webdav_endpoint(self.root_uri())
600 return self._is_webdav
602 def exists(self) -> bool:
603 """Check that a remote HTTP resource exists."""
604 log.debug("Checking if resource exists: %s", self.geturl())
605 if not self.is_webdav_endpoint:
606 # The remote is a plain HTTP server. Let's attempt a HEAD
607 # request, even if the behavior for such a request against a
608 # directory is not specified, so it depends on the server
609 # implementation.
610 resp = self.metadata_session.head(
611 self.geturl(), timeout=self._config.timeout, allow_redirects=True, stream=False
612 )
613 return resp.status_code == requests.codes.ok # 200
615 # The remote endpoint is a webDAV server: send a PROPFIND request
616 # to determine if it exists.
617 resp = self._propfind()
618 if resp.status_code == requests.codes.multi_status: # 207
619 prop = _parse_propfind_response_body(resp.text)[0]
620 return prop.exists
621 else: # 404 Not Found
622 return False
624 def size(self) -> int:
625 """Return the size of the remote resource in bytes."""
626 if self.dirLike:
627 return 0
629 if not self.is_webdav_endpoint:
630 # The remote is a plain HTTP server. Send a HEAD request to
631 # retrieve the size of the resource.
632 resp = self.metadata_session.head(
633 self.geturl(), timeout=self._config.timeout, allow_redirects=True, stream=False
634 )
635 if resp.status_code == requests.codes.ok: # 200
636 if "Content-Length" in resp.headers:
637 return int(resp.headers["Content-Length"])
638 else:
639 raise ValueError(
640 f"Response to HEAD request to {self} does not contain 'Content-Length' header"
641 )
642 elif resp.status_code == requests.codes.not_found:
643 raise FileNotFoundError(
644 f"Resource {self} does not exist, status: {resp.status_code} {resp.reason}"
645 )
646 else:
647 raise ValueError(
648 f"Unexpected response for HEAD request for {self}, status: {resp.status_code} "
649 f"{resp.reason}"
650 )
652 # The remote is a webDAV server: send a PROPFIND request to retrieve
653 # the size of the resource. Sizes are only meaningful for files.
654 resp = self._propfind()
655 if resp.status_code == requests.codes.multi_status: # 207
656 prop = _parse_propfind_response_body(resp.text)[0]
657 if prop.is_file:
658 return prop.size
659 elif prop.is_directory:
660 raise IsADirectoryError(
661 f"Resource {self} is reported by server as a directory but has a file path"
662 )
663 else:
664 raise FileNotFoundError(f"Resource {self} does not exist")
665 else: # 404 Not Found
666 raise FileNotFoundError(
667 f"Resource {self} does not exist, status: {resp.status_code} {resp.reason}"
668 )
670 def mkdir(self) -> None:
671 """Create the directory resource if it does not already exist."""
672 # Creating directories is only available on WebDAV back ends.
673 if not self.is_webdav_endpoint:
674 raise NotImplementedError(
675 f"Creation of directory {self} is not implemented by plain HTTP servers"
676 )
678 if not self.dirLike:
679 raise NotADirectoryError(f"Can not create a 'directory' for file-like URI {self}")
681 # Check if the target directory already exists.
682 resp = self._propfind()
683 if resp.status_code == requests.codes.multi_status: # 207
684 prop = _parse_propfind_response_body(resp.text)[0]
685 if prop.exists:
686 if prop.is_directory:
687 return
688 else:
689 # A file exists at this path
690 raise NotADirectoryError(
691 f"Can not create a directory for {self} because a file already exists at that path"
692 )
694 # Target directory does not exist. Create it and its ancestors as
695 # needed. We need to test if parent URL is different from self URL,
696 # otherwise we could be stuck in a recursive loop
697 # where self == parent.
698 if self.geturl() != self.parent().geturl():
699 self.parent().mkdir()
701 log.debug("Creating new directory: %s", self.geturl())
702 self._mkcol()
704 def remove(self) -> None:
705 """Remove the resource."""
706 self._delete()
708 def read(self, size: int = -1) -> bytes:
709 """Open the resource and return the contents in bytes.
711 Parameters
712 ----------
713 size : `int`, optional
714 The number of bytes to read. Negative or omitted indicates
715 that all data should be read.
716 """
718 # Use the data session as a context manager to ensure that the
719 # network connections to both the front end and back end servers are
720 # closed after downloading the data.
721 log.debug("Reading from remote resource: %s", self.geturl())
722 stream = True if size > 0 else False
723 with self.data_session as session:
724 with time_this(log, msg="GET %s", args=(self,)):
725 resp = session.get(self.geturl(), stream=stream, timeout=self._config.timeout)
727 if resp.status_code != requests.codes.ok: # 200
728 raise FileNotFoundError(
729 f"Unable to read resource {self}; status: {resp.status_code} {resp.reason}"
730 )
731 if not stream:
732 return resp.content
733 else:
734 return next(resp.iter_content(chunk_size=size))
736 def write(self, data: bytes, overwrite: bool = True) -> None:
737 """Write the supplied bytes to the new resource.
739 Parameters
740 ----------
741 data : `bytes`
742 The bytes to write to the resource. The entire contents of the
743 resource will be replaced.
744 overwrite : `bool`, optional
745 If `True` the resource will be overwritten if it exists. Otherwise
746 the write will fail.
747 """
748 log.debug("Writing to remote resource: %s", self.geturl())
749 if not overwrite:
750 if self.exists():
751 raise FileExistsError(f"Remote resource {self} exists and overwrite has been disabled")
753 # Ensure the parent directory exists.
754 self.parent().mkdir()
756 # Upload the data.
757 log.debug("Writing data to remote resource: %s", self.geturl())
758 self._put(data=data)
760 def transfer_from(
761 self,
762 src: ResourcePath,
763 transfer: str = "copy",
764 overwrite: bool = False,
765 transaction: Optional[TransactionProtocol] = None,
766 ) -> None:
767 """Transfer the current resource to a Webdav repository.
769 Parameters
770 ----------
771 src : `ResourcePath`
772 Source URI.
773 transfer : `str`
774 Mode to use for transferring the resource. Supports the following
775 options: copy.
776 transaction : `~lsst.resources.utils.TransactionProtocol`, optional
777 Currently unused.
778 """
779 # Fail early to prevent delays if remote resources are requested.
780 if transfer not in self.transferModes:
781 raise ValueError(f"Transfer mode {transfer} not supported by URI scheme {self.scheme}")
783 # Existence checks cost time so do not call this unless we know
784 # that debugging is enabled.
785 if log.isEnabledFor(logging.DEBUG):
786 log.debug(
787 "Transferring %s [exists: %s] -> %s [exists: %s] (transfer=%s)",
788 src,
789 src.exists(),
790 self,
791 self.exists(),
792 transfer,
793 )
795 # Short circuit immediately if the URIs are identical.
796 if self == src:
797 log.debug(
798 "Target and destination URIs are identical: %s, returning immediately."
799 " No further action required.",
800 self,
801 )
802 return
804 if not overwrite and self.exists():
805 raise FileExistsError(f"Destination path {self} already exists.")
807 if transfer == "auto":
808 transfer = self.transferDefault
810 # We can use webDAV 'COPY' or 'MOVE' if both the current and source
811 # resources are located in the same server.
812 if isinstance(src, type(self)) and self.root_uri() == src.root_uri() and self.is_webdav_endpoint:
813 log.debug("Transfer from %s to %s directly", src, self)
814 return self._move(src) if transfer == "move" else self._copy(src)
816 # For resources of different classes or for plain HTTP resources we can
817 # perform the copy or move operation by downloading to a local file
818 # and uploading to the destination.
819 self._copy_via_local(src)
821 # This was an explicit move, try to remove the source.
822 if transfer == "move":
823 src.remove()
825 def walk(
826 self, file_filter: Optional[Union[str, re.Pattern]] = None
827 ) -> Iterator[Union[List, Tuple[ResourcePath, List[str], List[str]]]]:
828 """Walk the directory tree returning matching files and directories.
829 Parameters
830 ----------
831 file_filter : `str` or `re.Pattern`, optional
832 Regex to filter out files from the list before it is returned.
833 Yields
834 ------
835 dirpath : `ResourcePath`
836 Current directory being examined.
837 dirnames : `list` of `str`
838 Names of subdirectories within dirpath.
839 filenames : `list` of `str`
840 Names of all the files within dirpath.
841 """
842 if not self.dirLike:
843 raise ValueError("Can not walk a non-directory URI")
845 # Walking directories is only available on WebDAV back ends.
846 if not self.is_webdav_endpoint:
847 raise NotImplementedError(f"Walking directory {self} is not implemented by plain HTTP servers")
849 if isinstance(file_filter, str):
850 file_filter = re.compile(file_filter)
852 resp = self._propfind(depth="1")
853 if resp.status_code == requests.codes.multi_status: # 207
854 files: List[str] = []
855 dirs: List[str] = []
857 for prop in _parse_propfind_response_body(resp.text):
858 if prop.is_file:
859 files.append(prop.name)
860 elif not self.path.endswith(prop.href):
861 # Only include the names of sub-directories not the
862 # directory being walked.
863 dirs.append(prop.name)
865 if file_filter is not None:
866 files = [f for f in files if file_filter.search(f)]
868 if not dirs and not files:
869 return
870 else:
871 yield type(self)(self, forceAbsolute=False, forceDirectory=True), dirs, files
873 for dir in dirs:
874 new_uri = self.join(dir, forceDirectory=True)
875 yield from new_uri.walk(file_filter)
877 def _as_local(self) -> Tuple[str, bool]:
878 """Download object over HTTP and place in temporary directory.
880 Returns
881 -------
882 path : `str`
883 Path to local temporary file.
884 temporary : `bool`
885 Always returns `True`. This is always a temporary file.
886 """
888 # Use the session as a context manager to ensure that connections
889 # to both the front end and back end servers are closed after the
890 # download operation is finished.
891 with self.data_session as session:
892 resp = session.get(self.geturl(), stream=True, timeout=self._config.timeout)
893 if resp.status_code != requests.codes.ok:
894 raise FileNotFoundError(
895 f"Unable to download resource {self}; status: {resp.status_code} {resp.reason}"
896 )
898 content_length = 0
899 expected_length = int(resp.headers.get("Content-Length", "-1"))
900 tmpdir, buffering = _get_temp_dir()
902 with tempfile.NamedTemporaryFile(
903 suffix=self.getExtension(), buffering=buffering, dir=tmpdir, delete=False
904 ) as tmpFile:
905 with time_this(
906 log,
907 msg="GET %s [length=%d] to local file %s [chunk_size=%d]",
908 args=(self, expected_length, tmpFile.name, buffering),
909 mem_usage=True,
910 mem_unit=u.mebibyte,
911 ):
912 for chunk in resp.iter_content(chunk_size=buffering):
913 tmpFile.write(chunk)
914 content_length += len(chunk)
916 # Check that the expected and actual content lengths match
917 if expected_length >= 0 and expected_length != content_length:
918 raise ValueError(
919 f"Size of downloaded file does not match value in Content-Length header for {self}: "
920 f"expecting {expected_length} and got {content_length} bytes"
921 )
923 return tmpFile.name, True
925 def _send_webdav_request(
926 self,
927 method: str,
928 url: Optional[str] = None,
929 headers: dict[str, str] = {},
930 body: Optional[str] = None,
931 session: Optional[requests.Session] = None,
932 timeout: Optional[tuple[int, int]] = None,
933 ) -> requests.Response:
934 """Send a webDAV request and correctly handle redirects.
936 Parameters
937 ----------
938 method : `str`
939 The mthod of the HTTP request to be sent, e.g. PROPFIND, MKCOL.
940 headers : `dict`, optional
941 A dictionary of key-value pairs (both strings) to include as
942 headers in the request.
943 body: `str`, optional
944 The body of the request.
946 Notes
947 -----
948 This way of sending webDAV requests is necessary for handling
949 redirection ourselves, since the 'requests' package changes the method
950 of the redirected request when the server responds with status 302 and
951 the method of the original request is not HEAD (which is the case for
952 webDAV requests).
954 That means that when the webDAV server we interact with responds with
955 a redirection to a PROPFIND or MKCOL request, the request gets
956 converted to a GET request when sent to the redirected location.
958 See `requests.sessions.SessionRedirectMixin.rebuild_method()` in
959 https://github.com/psf/requests/blob/main/requests/sessions.py
961 This behavior of the 'requests' package is meant to be compatible with
962 what is specified in RFC 9110:
964 https://www.rfc-editor.org/rfc/rfc9110#name-302-found
966 For our purposes, we do need to follow the redirection and send a new
967 request using the same HTTP verb.
968 """
969 if url is None:
970 url = self.geturl()
972 if session is None:
973 session = self.metadata_session
975 if timeout is None:
976 timeout = self._config.timeout
978 with time_this(
979 log,
980 msg="%s %s",
981 args=(
982 method,
983 url,
984 ),
985 mem_usage=True,
986 mem_unit=u.mebibyte,
987 ):
988 for _ in range(max_redirects := 5):
989 resp = session.request(
990 method,
991 url,
992 data=body,
993 headers=headers,
994 stream=False,
995 timeout=timeout,
996 allow_redirects=False,
997 )
998 if resp.is_redirect:
999 url = resp.headers["Location"]
1000 else:
1001 return resp
1003 # We reached the maximum allowed number of redirects.
1004 # Stop trying.
1005 raise ValueError(
1006 f"Could not get a response to {method} request for {self} after "
1007 f"{max_redirects} redirections"
1008 )
1010 def _propfind(self, body: Optional[str] = None, depth: str = "0") -> requests.Response:
1011 """Send a PROPFIND webDAV request and return the response.
1013 Parameters
1014 ----------
1015 body : `str`, optional
1016 The body of the PROPFIND request to send to the server. If
1017 provided, it is expected to be a XML document.
1018 depth : `str`, optional
1019 The value of the 'Depth' header to include in the request.
1021 Returns
1022 -------
1023 response : `requests.Response`
1024 Response to the PROPFIND request.
1026 Notes
1027 -----
1028 It raises `ValueError` if the status code of the PROPFIND request
1029 is different from "207 Multistatus" or "404 Not Found".
1030 """
1031 if body is None:
1032 # Request only the DAV live properties we are explicitly interested
1033 # in namely 'resourcetype', 'getcontentlength', 'getlastmodified'
1034 # and 'displayname'.
1035 body = (
1036 """<?xml version="1.0" encoding="utf-8" ?>"""
1037 """<D:propfind xmlns:D="DAV:"><D:prop>"""
1038 """<D:resourcetype/><D:getcontentlength/><D:getlastmodified/><D:displayname/>"""
1039 """</D:prop></D:propfind>"""
1040 )
1041 headers = {
1042 "Depth": depth,
1043 "Content-Type": 'application/xml; charset="utf-8"',
1044 "Content-Length": str(len(body)),
1045 }
1046 resp = self._send_webdav_request("PROPFIND", headers=headers, body=body)
1047 if resp.status_code in (requests.codes.multi_status, requests.codes.not_found):
1048 return resp
1049 else:
1050 raise ValueError(
1051 f"Unexpected response for PROPFIND request for {self}, status: {resp.status_code} "
1052 f"{resp.reason}"
1053 )
1055 def _options(self) -> requests.Response:
1056 """Send a OPTIONS webDAV request for this resource."""
1058 return self._send_webdav_request("OPTIONS")
1060 def _head(self) -> requests.Response:
1061 """Send a HEAD webDAV request for this resource."""
1063 return self._send_webdav_request("HEAD")
1065 def _mkcol(self) -> None:
1066 """Send a MKCOL webDAV request to create a collection. The collection
1067 may already exist.
1068 """
1069 resp = self._send_webdav_request("MKCOL")
1070 if resp.status_code == requests.codes.created: # 201
1071 return
1073 if resp.status_code == requests.codes.method_not_allowed: # 405
1074 # The remote directory already exists
1075 log.debug("Can not create directory: %s may already exist: skipping.", self.geturl())
1076 else:
1077 raise ValueError(f"Can not create directory {self}, status: {resp.status_code} {resp.reason}")
1079 def _delete(self) -> None:
1080 """Send a DELETE webDAV request for this resource."""
1082 log.debug("Deleting %s ...", self.geturl())
1084 # If this is a directory, ensure the remote is a webDAV server because
1085 # plain HTTP servers don't support DELETE requests on non-file
1086 # paths.
1087 if self.dirLike and not self.is_webdav_endpoint:
1088 raise NotImplementedError(
1089 f"Deletion of directory {self} is not implemented by plain HTTP servers"
1090 )
1092 # Deleting non-empty directories may take some time, so increase
1093 # the timeout for getting a response from the server.
1094 timeout = self._config.timeout
1095 if self.dirLike:
1096 timeout = (timeout[0], timeout[1] * 100)
1097 resp = self._send_webdav_request("DELETE", timeout=timeout)
1098 if resp.status_code in (
1099 requests.codes.ok,
1100 requests.codes.accepted,
1101 requests.codes.no_content,
1102 requests.codes.not_found,
1103 ):
1104 # We can get a "404 Not Found" error when the file or directory
1105 # does not exist or when the DELETE request was retried several
1106 # times and a previous attempt actually deleted the resource.
1107 # Therefore we consider that a "Not Found" response is not an
1108 # error since we reached the state desired by the user.
1109 return
1110 else:
1111 # TODO: the response to a DELETE request against a webDAV server
1112 # may be multistatus. If so, we need to parse the reponse body to
1113 # determine more precisely the reason of the failure (e.g. a lock)
1114 # and provide a more helpful error message.
1115 raise ValueError(f"Unable to delete resource {self}; status: {resp.status_code} {resp.reason}")
1117 def _copy_via_local(self, src: ResourcePath) -> None:
1118 """Replace the contents of this resource with the contents of a remote
1119 resource by using a local temporary file.
1121 Parameters
1122 ----------
1123 src : `HttpResourcePath`
1124 The source of the contents to copy to `self`.
1125 """
1126 with src.as_local() as local_uri:
1127 log.debug("Transfer from %s to %s via local file %s", src, self, local_uri)
1128 with open(local_uri.ospath, "rb") as f:
1129 self._put(data=f)
1131 def _copy_or_move(self, method: str, src: HttpResourcePath) -> None:
1132 """Send a COPY or MOVE webDAV request to copy or replace the contents
1133 of this resource with the contents of another resource located in the
1134 same server.
1136 Parameters
1137 ----------
1138 method : `str`
1139 The method to perform. Valid values are "COPY" or "MOVE" (in
1140 uppercase).
1142 src : `HttpResourcePath`
1143 The source of the contents to move to `self`.
1144 """
1145 headers = {"Destination": self.geturl()}
1146 resp = self._send_webdav_request(method, url=src.geturl(), headers=headers, session=self.data_session)
1147 if resp.status_code in (requests.codes.created, requests.codes.no_content):
1148 return
1150 if resp.status_code == requests.codes.multi_status:
1151 tree = eTree.fromstring(resp.content)
1152 status_element = tree.find("./{DAV:}response/{DAV:}status")
1153 status = status_element.text if status_element is not None else "unknown"
1154 error = tree.find("./{DAV:}response/{DAV:}error")
1155 raise ValueError(f"{method} returned multistatus reponse with status {status} and error {error}")
1156 else:
1157 raise ValueError(
1158 f"{method} operation from {src} to {self} failed, status: {resp.status_code} {resp.reason}"
1159 )
1161 def _copy(self, src: HttpResourcePath) -> None:
1162 """Send a COPY webDAV request to replace the contents of this resource
1163 (if any) with the contents of another resource located in the same
1164 server.
1166 Parameters
1167 ----------
1168 src : `HttpResourcePath`
1169 The source of the contents to copy to `self`.
1170 """
1171 # Neither dCache nor XrootD currently implement the COPY
1172 # webDAV method as documented in
1173 # http://www.webdav.org/specs/rfc4918.html#METHOD_COPY
1174 # (See issues DM-37603 and DM-37651 for details)
1175 #
1176 # For the time being, we use a temporary local file to
1177 # perform the copy client side.
1178 # TODO: when those 2 issues above are solved remove the 3 lines below.
1179 must_use_local = True
1180 if must_use_local:
1181 return self._copy_via_local(src)
1183 return self._copy_or_move("COPY", src)
1185 def _move(self, src: HttpResourcePath) -> None:
1186 """Send a MOVE webDAV request to replace the contents of this resource
1187 with the contents of another resource located in the same server.
1189 Parameters
1190 ----------
1191 src : `HttpResourcePath`
1192 The source of the contents to move to `self`.
1193 """
1194 return self._copy_or_move("MOVE", src)
1196 def _put(self, data: Union[BinaryIO, bytes]) -> None:
1197 """Perform an HTTP PUT request and handle redirection.
1199 Parameters
1200 ----------
1201 data : `Union[BinaryIO, bytes]`
1202 The data to be included in the body of the PUT request.
1203 """
1204 # Retrieve the final URL for this upload by sending a PUT request with
1205 # no content. Follow a single server redirection to retrieve the
1206 # final URL.
1207 headers = {"Content-Length": "0"}
1208 if self._config.send_expect_on_put:
1209 headers["Expect"] = "100-continue"
1211 url = self.geturl()
1213 # Use the session as a context manager to ensure the underlying
1214 # connections are closed after finishing uploading the data.
1215 with self.data_session as session:
1216 # Send an empty PUT request to get redirected to the final
1217 # destination.
1218 log.debug("Sending empty PUT request to %s", url)
1219 with time_this(log, msg="PUT (no data) %s", args=(url,), mem_usage=True, mem_unit=u.mebibyte):
1220 resp = session.request(
1221 "PUT",
1222 url,
1223 data=None,
1224 headers=headers,
1225 stream=False,
1226 timeout=self._config.timeout,
1227 allow_redirects=False,
1228 )
1229 if resp.is_redirect:
1230 url = resp.headers["Location"]
1232 # Upload the data to the final destination.
1233 log.debug("Uploading data to %s", url)
1235 # Ask the server to compute and record a checksum of the uploaded
1236 # file contents, for later integrity checks. Since we don't compute
1237 # the digest ourselves while uploading the data, we cannot control
1238 # after the request is complete that the data we uploaded is
1239 # identical to the data recorded by the server, but at least the
1240 # server has recorded a digest of the data it stored.
1241 #
1242 # See RFC-3230 for details and
1243 # https://www.iana.org/assignments/http-dig-alg/http-dig-alg.xhtml
1244 # for the list of supported digest algorithhms.
1245 # In addition, note that not all servers implement this RFC so
1246 # the checksum may not be computed by the server.
1247 put_headers: Optional[dict[str, str]] = None
1248 if digest := self._config.digest_algorithm:
1249 put_headers = {"Want-Digest": digest}
1251 with time_this(log, msg="PUT %s", args=(url,), mem_usage=True, mem_unit=u.mebibyte):
1252 resp = session.request(
1253 "PUT",
1254 url,
1255 data=data,
1256 headers=put_headers,
1257 stream=False,
1258 timeout=self._config.timeout,
1259 allow_redirects=False,
1260 )
1261 if resp.status_code in (
1262 requests.codes.ok,
1263 requests.codes.created,
1264 requests.codes.no_content,
1265 ):
1266 return
1267 else:
1268 raise ValueError(f"Can not write file {self}, status: {resp.status_code} {resp.reason}")
1270 @contextlib.contextmanager
1271 def _openImpl(
1272 self,
1273 mode: str = "r",
1274 *,
1275 encoding: Optional[str] = None,
1276 ) -> Iterator[ResourceHandleProtocol]:
1277 resp = self._head()
1278 accepts_range = resp.status_code == requests.codes.ok and resp.headers.get("Accept-Ranges") == "bytes"
1279 handle: ResourceHandleProtocol
1280 if mode in ("rb", "r") and accepts_range:
1281 handle = HttpReadResourceHandle(
1282 mode, log, url=self.geturl(), session=self.data_session, timeout=self._config.timeout
1283 )
1284 if mode == "r":
1285 # cast because the protocol is compatible, but does not have
1286 # BytesIO in the inheritance tree
1287 yield io.TextIOWrapper(cast(io.BytesIO, handle), encoding=encoding)
1288 else:
1289 yield handle
1290 else:
1291 with super()._openImpl(mode, encoding=encoding) as http_handle:
1292 yield http_handle
1295def _dump_response(resp: requests.Response) -> None:
1296 """Log the contents of a HTTP or webDAV request and its response.
1298 Parameters
1299 ----------
1300 resp : `requests.Response`
1301 The response to log.
1303 Notes
1304 -----
1305 Intended for development purposes only.
1306 """
1307 log.debug("-----------------------------------------------")
1308 log.debug("Request")
1309 log.debug(" method=%s", resp.request.method)
1310 log.debug(" URL=%s", resp.request.url)
1311 log.debug(" headers=%s", resp.request.headers)
1312 if resp.request.method == "PUT":
1313 log.debug(" body=<data>")
1314 elif resp.request.body is None:
1315 log.debug(" body=<empty>")
1316 else:
1317 log.debug(" body=%r", resp.request.body[:120])
1319 log.debug("Response:")
1320 log.debug(" status_code=%d", resp.status_code)
1321 log.debug(" headers=%s", resp.headers)
1322 if not resp.content:
1323 log.debug(" body=<empty>")
1324 elif "Content-Type" in resp.headers and resp.headers["Content-Type"] == "text/plain":
1325 log.debug(" body=%r", resp.content)
1326 else:
1327 log.debug(" body=%r", resp.content[:80])
1330def _is_protected(filepath: str) -> bool:
1331 """Return true if the permissions of file at filepath only allow for access
1332 by its owner.
1334 Parameters
1335 ----------
1336 filepath : `str`
1337 Path of a local file.
1338 """
1339 if not os.path.isfile(filepath):
1340 return False
1341 mode = stat.S_IMODE(os.stat(filepath).st_mode)
1342 owner_accessible = bool(mode & stat.S_IRWXU)
1343 group_accessible = bool(mode & stat.S_IRWXG)
1344 other_accessible = bool(mode & stat.S_IRWXO)
1345 return owner_accessible and not group_accessible and not other_accessible
1348def _parse_propfind_response_body(body: str) -> List[DavProperty]:
1349 """Parse the XML-encoded contents of the response body to a webDAV PROPFIND
1350 request.
1352 Parameters
1353 ----------
1354 body : `str`
1355 XML-encoded response body to a PROPFIND request
1357 Returns
1358 -------
1359 responses : `List[DavProperty]`
1361 Notes
1362 -----
1363 Is is expected that there is at least one reponse in `body`, otherwise
1364 this function raises.
1365 """
1366 # A response body to a PROPFIND request is of the form (indented for
1367 # readability):
1368 #
1369 # <?xml version="1.0" encoding="UTF-8"?>
1370 # <D:multistatus xmlns:D="DAV:">
1371 # <D:response>
1372 # <D:href>path/to/resource</D:href>
1373 # <D:propstat>
1374 # <D:prop>
1375 # <D:resourcetype>
1376 # <D:collection xmlns:D="DAV:"/>
1377 # </D:resourcetype>
1378 # <D:getlastmodified>
1379 # Fri, 27 Jan 2 023 13:59:01 GMT
1380 # </D:getlastmodified>
1381 # <D:getcontentlength>
1382 # 12345
1383 # </D:getcontentlength>
1384 # </D:prop>
1385 # <D:status>
1386 # HTTP/1.1 200 OK
1387 # </D:status>
1388 # </D:propstat>
1389 # </D:response>
1390 # <D:response>
1391 # ...
1392 # </D:response>
1393 # <D:response>
1394 # ...
1395 # </D:response>
1396 # </D:multistatus>
1398 # Scan all the 'response' elements and extract the relevant properties
1399 responses = []
1400 multistatus = eTree.fromstring(body.strip())
1401 for response in multistatus.findall("./{DAV:}response"):
1402 responses.append(DavProperty(response))
1404 if responses:
1405 return responses
1406 else:
1407 # Could not parse the body
1408 raise ValueError(f"Unable to parse response for PROPFIND request: {response}")
1411class DavProperty:
1412 """Helper class to encapsulate select live DAV properties of a single
1413 resource, as retrieved via a PROPFIND request.
1414 """
1416 # Regular expression to compare against the 'status' element of a
1417 # PROPFIND response's 'propstat' element.
1418 _status_ok_rex = re.compile(r"^HTTP/.* 200 .*$", re.IGNORECASE)
1420 def __init__(self, response: Optional[eTree.Element]):
1421 self._href: str = ""
1422 self._displayname: str = ""
1423 self._collection: bool = False
1424 self._getlastmodified: str = ""
1425 self._getcontentlength: int = -1
1427 if response is not None:
1428 self._parse(response)
1430 def _parse(self, response: eTree.Element) -> None:
1431 # Extract 'href'
1432 if (element := response.find("./{DAV:}href")) is not None:
1433 # We need to use "str(element.text)"" instead of "element.text" to
1434 # keep mypy happy
1435 self._href = str(element.text).strip()
1437 for propstat in response.findall("./{DAV:}propstat"):
1438 # Only extract properties of interest with status OK.
1439 status = propstat.find("./{DAV:}status")
1440 if status is None or not self._status_ok_rex.match(str(status.text)):
1441 continue
1443 for prop in propstat.findall("./{DAV:}prop"):
1444 # Parse "collection".
1445 if (element := prop.find("./{DAV:}resourcetype/{DAV:}collection")) is not None:
1446 self._collection = True
1448 # Parse "getlastmodified".
1449 if (element := prop.find("./{DAV:}getlastmodified")) is not None:
1450 self._getlastmodified = str(element.text)
1452 # Parse "getcontentlength".
1453 if (element := prop.find("./{DAV:}getcontentlength")) is not None:
1454 self._getcontentlength = int(str(element.text))
1456 # Parse "displayname".
1457 if (element := prop.find("./{DAV:}displayname")) is not None:
1458 self._displayname = str(element.text)
1460 @property
1461 def exists(self) -> bool:
1462 # It is either a directory or a file with length of at least zero
1463 return self._collection or self._getcontentlength >= 0
1465 @property
1466 def is_directory(self) -> bool:
1467 return self._collection
1469 @property
1470 def is_file(self) -> bool:
1471 return self._getcontentlength >= 0
1473 @property
1474 def size(self) -> int:
1475 # Only valid if is_file is True
1476 return self._getcontentlength
1478 @property
1479 def name(self) -> str:
1480 return self._displayname
1482 @property
1483 def href(self) -> str:
1484 return self._href