Coverage for python/lsst/daf/butler/core/_butlerUri/s3.py : 75%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24import logging
25import tempfile
27__all__ = ('ButlerS3URI',)
29from typing import (
30 TYPE_CHECKING,
31 Optional,
32 Any,
33 Callable,
34 Tuple,
35 Union,
36)
38from .utils import NoTransaction
39from ._butlerUri import ButlerURI
40from .s3utils import getS3Client, s3CheckFileExists, bucketExists
42from botocore.exceptions import ClientError
43from http.client import ImproperConnectionState, HTTPException
44from urllib3.exceptions import RequestError, HTTPError
46if TYPE_CHECKING: 46 ↛ 47line 46 didn't jump to line 47, because the condition on line 46 was never true
47 try:
48 import boto3
49 except ImportError:
50 pass
51 from ..datastore import DatastoreTransaction
53# https://pypi.org/project/backoff/
54try:
55 import backoff
56except ImportError:
57 class Backoff():
58 @staticmethod
59 def expo(func: Callable, *args: Any, **kwargs: Any) -> Callable:
60 return func
62 @staticmethod
63 def on_exception(func: Callable, *args: Any, **kwargs: Any) -> Callable:
64 return func
66 backoff = Backoff
68# settings for "backoff" retry decorators. these retries are belt-and-
69# suspenders along with the retries built into Boto3, to account for
70# semantic differences in errors between S3-like providers.
71retryable_io_errors = (
72 # http.client
73 ImproperConnectionState, HTTPException,
74 # urllib3.exceptions
75 RequestError, HTTPError,
76 # built-ins
77 TimeoutError, ConnectionError)
78retryable_client_errors = (
79 # botocore.exceptions
80 ClientError,
81 # built-ins
82 PermissionError)
83all_retryable_errors = retryable_client_errors + retryable_io_errors
84max_retry_time = 60
87log = logging.getLogger(__name__)
90class ButlerS3URI(ButlerURI):
91 """S3 URI"""
93 @property
94 def client(self) -> boto3.client:
95 """Client object to address remote resource."""
96 # Defer import for circular dependencies
97 return getS3Client()
99 @backoff.on_exception(backoff.expo, retryable_client_errors, max_time=max_retry_time)
100 def exists(self) -> bool:
101 if self.is_root: 101 ↛ 103line 101 didn't jump to line 103, because the condition on line 101 was never true
102 # Only check for the bucket since the path is irrelevant
103 return bucketExists(self.netloc)
104 exists, _ = s3CheckFileExists(self, client=self.client)
105 return exists
107 @backoff.on_exception(backoff.expo, retryable_client_errors, max_time=max_retry_time)
108 def size(self) -> int:
109 if self.dirLike: 109 ↛ 110line 109 didn't jump to line 110, because the condition on line 109 was never true
110 return 0
111 _, sz = s3CheckFileExists(self, client=self.client)
112 return sz
114 @backoff.on_exception(backoff.expo, retryable_client_errors, max_time=max_retry_time)
115 def remove(self) -> None:
116 """Remove the resource."""
118 # https://github.com/boto/boto3/issues/507 - there is no
119 # way of knowing if the file was actually deleted except
120 # for checking all the keys again, reponse is HTTP 204 OK
121 # response all the time
122 self.client.delete_object(Bucket=self.netloc, Key=self.relativeToPathRoot)
124 @backoff.on_exception(backoff.expo, all_retryable_errors, max_time=max_retry_time)
125 def read(self, size: int = -1) -> bytes:
126 args = {}
127 if size > 0:
128 args["Range"] = f"bytes=0-{size-1}"
129 try:
130 response = self.client.get_object(Bucket=self.netloc,
131 Key=self.relativeToPathRoot,
132 **args)
133 except (self.client.exceptions.NoSuchKey, self.client.exceptions.NoSuchBucket) as err:
134 raise FileNotFoundError(f"No such resource: {self}") from err
135 body = response["Body"].read()
136 response["Body"].close()
137 return body
139 @backoff.on_exception(backoff.expo, all_retryable_errors, max_time=max_retry_time)
140 def write(self, data: bytes, overwrite: bool = True) -> None:
141 if not overwrite:
142 if self.exists(): 142 ↛ 143line 142 didn't jump to line 143, because the condition on line 142 was never true
143 raise FileExistsError(f"Remote resource {self} exists and overwrite has been disabled")
144 self.client.put_object(Bucket=self.netloc, Key=self.relativeToPathRoot,
145 Body=data)
147 @backoff.on_exception(backoff.expo, all_retryable_errors, max_time=max_retry_time)
148 def mkdir(self) -> None:
149 if not bucketExists(self.netloc): 149 ↛ 150line 149 didn't jump to line 150, because the condition on line 149 was never true
150 raise ValueError(f"Bucket {self.netloc} does not exist for {self}!")
152 if not self.dirLike: 152 ↛ 153line 152 didn't jump to line 153, because the condition on line 152 was never true
153 raise ValueError(f"Can not create a 'directory' for file-like URI {self}")
155 # don't create S3 key when root is at the top-level of an Bucket
156 if not self.path == "/": 156 ↛ exitline 156 didn't return from function 'mkdir', because the condition on line 156 was never false
157 self.client.put_object(Bucket=self.netloc, Key=self.relativeToPathRoot)
159 @backoff.on_exception(backoff.expo, all_retryable_errors, max_time=max_retry_time)
160 def _as_local(self) -> Tuple[str, bool]:
161 """Download object from S3 and place in temporary directory.
163 Returns
164 -------
165 path : `str`
166 Path to local temporary file.
167 temporary : `bool`
168 Always returns `True`. This is always a temporary file.
169 """
170 with tempfile.NamedTemporaryFile(suffix=self.getExtension(), delete=False) as tmpFile:
171 self.client.download_fileobj(self.netloc, self.relativeToPathRoot, tmpFile)
172 return tmpFile.name, True
174 @backoff.on_exception(backoff.expo, all_retryable_errors, max_time=max_retry_time)
175 def transfer_from(self, src: ButlerURI, transfer: str = "copy",
176 overwrite: bool = False,
177 transaction: Optional[Union[DatastoreTransaction, NoTransaction]] = None) -> None:
178 """Transfer the current resource to an S3 bucket.
180 Parameters
181 ----------
182 src : `ButlerURI`
183 Source URI.
184 transfer : `str`
185 Mode to use for transferring the resource. Supports the following
186 options: copy.
187 overwrite : `bool`, optional
188 Allow an existing file to be overwritten. Defaults to `False`.
189 transaction : `DatastoreTransaction`, optional
190 Currently unused.
191 """
192 # Fail early to prevent delays if remote resources are requested
193 if transfer not in self.transferModes:
194 raise ValueError(f"Transfer mode '{transfer}' not supported by URI scheme {self.scheme}")
196 log.debug(f"Transferring {src} [exists: {src.exists()}] -> "
197 f"{self} [exists: {self.exists()}] (transfer={transfer})")
199 if not overwrite and self.exists():
200 raise FileExistsError(f"Destination path '{self}' already exists.")
202 if transfer == "auto": 202 ↛ 203line 202 didn't jump to line 203, because the condition on line 202 was never true
203 transfer = self.transferDefault
205 if isinstance(src, type(self)):
206 # Looks like an S3 remote uri so we can use direct copy
207 # note that boto3.resource.meta.copy is cleverer than the low
208 # level copy_object
209 copy_source = {
210 "Bucket": src.netloc,
211 "Key": src.relativeToPathRoot,
212 }
213 self.client.copy_object(CopySource=copy_source, Bucket=self.netloc, Key=self.relativeToPathRoot)
214 else:
215 # Use local file and upload it
216 with src.as_local() as local_uri:
218 # resource.meta.upload_file seems like the right thing
219 # but we have a low level client
220 with open(local_uri.ospath, "rb") as fh:
221 self.client.put_object(Bucket=self.netloc,
222 Key=self.relativeToPathRoot, Body=fh)
224 # This was an explicit move requested from a remote resource
225 # try to remove that resource
226 if transfer == "move": 226 ↛ 228line 226 didn't jump to line 228, because the condition on line 226 was never true
227 # Transactions do not work here
228 src.remove()