Coverage for python/lsst/daf/butler/core/_butlerUri/s3.py : 75%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24import os
25import os.path
26import logging
27import tempfile
29__all__ = ('ButlerS3URI',)
31from typing import (
32 TYPE_CHECKING,
33 Optional,
34 Any,
35 Callable,
36 Tuple,
37 Union,
38)
40from .utils import NoTransaction
41from ._butlerUri import ButlerURI
42from .s3utils import getS3Client, s3CheckFileExists, bucketExists
44from botocore.exceptions import ClientError
45from http.client import ImproperConnectionState, HTTPException
46from urllib3.exceptions import RequestError, HTTPError
48if TYPE_CHECKING: 48 ↛ 49line 48 didn't jump to line 49, because the condition on line 48 was never true
49 try:
50 import boto3
51 except ImportError:
52 pass
53 from ..datastore import DatastoreTransaction
55# https://pypi.org/project/backoff/
56try:
57 import backoff
58except ImportError:
59 class Backoff():
60 @staticmethod
61 def expo(func: Callable, *args: Any, **kwargs: Any) -> Callable:
62 return func
64 @staticmethod
65 def on_exception(func: Callable, *args: Any, **kwargs: Any) -> Callable:
66 return func
68 backoff = Backoff
70# settings for "backoff" retry decorators. these retries are belt-and-
71# suspenders along with the retries built into Boto3, to account for
72# semantic differences in errors between S3-like providers.
73retryable_io_errors = (
74 # http.client
75 ImproperConnectionState, HTTPException,
76 # urllib3.exceptions
77 RequestError, HTTPError,
78 # built-ins
79 TimeoutError, ConnectionError)
80retryable_client_errors = (
81 # botocore.exceptions
82 ClientError,
83 # built-ins
84 PermissionError)
85all_retryable_errors = retryable_client_errors + retryable_io_errors
86max_retry_time = 60
89log = logging.getLogger(__name__)
92class ButlerS3URI(ButlerURI):
93 """S3 URI"""
95 @property
96 def client(self) -> boto3.client:
97 """Client object to address remote resource."""
98 # Defer import for circular dependencies
99 return getS3Client()
101 @backoff.on_exception(backoff.expo, retryable_client_errors, max_time=max_retry_time)
102 def exists(self) -> bool:
103 if self.is_root: 103 ↛ 105line 103 didn't jump to line 105, because the condition on line 103 was never true
104 # Only check for the bucket since the path is irrelevant
105 return bucketExists(self.netloc)
106 exists, _ = s3CheckFileExists(self, client=self.client)
107 return exists
109 @backoff.on_exception(backoff.expo, retryable_client_errors, max_time=max_retry_time)
110 def size(self) -> int:
111 if self.dirLike: 111 ↛ 112line 111 didn't jump to line 112, because the condition on line 111 was never true
112 return 0
113 _, sz = s3CheckFileExists(self, client=self.client)
114 return sz
116 @backoff.on_exception(backoff.expo, retryable_client_errors, max_time=max_retry_time)
117 def remove(self) -> None:
118 """Remove the resource."""
120 # https://github.com/boto/boto3/issues/507 - there is no
121 # way of knowing if the file was actually deleted except
122 # for checking all the keys again, reponse is HTTP 204 OK
123 # response all the time
124 self.client.delete_object(Bucket=self.netloc, Key=self.relativeToPathRoot)
126 @backoff.on_exception(backoff.expo, all_retryable_errors, max_time=max_retry_time)
127 def read(self, size: int = -1) -> bytes:
128 args = {}
129 if size > 0:
130 args["Range"] = f"bytes=0-{size-1}"
131 try:
132 response = self.client.get_object(Bucket=self.netloc,
133 Key=self.relativeToPathRoot,
134 **args)
135 except (self.client.exceptions.NoSuchKey, self.client.exceptions.NoSuchBucket) as err:
136 raise FileNotFoundError(f"No such resource: {self}") from err
137 body = response["Body"].read()
138 response["Body"].close()
139 return body
141 @backoff.on_exception(backoff.expo, all_retryable_errors, max_time=max_retry_time)
142 def write(self, data: bytes, overwrite: bool = True) -> None:
143 if not overwrite:
144 if self.exists(): 144 ↛ 145line 144 didn't jump to line 145, because the condition on line 144 was never true
145 raise FileExistsError(f"Remote resource {self} exists and overwrite has been disabled")
146 self.client.put_object(Bucket=self.netloc, Key=self.relativeToPathRoot,
147 Body=data)
149 @backoff.on_exception(backoff.expo, all_retryable_errors, max_time=max_retry_time)
150 def mkdir(self) -> None:
151 if not bucketExists(self.netloc): 151 ↛ 152line 151 didn't jump to line 152, because the condition on line 151 was never true
152 raise ValueError(f"Bucket {self.netloc} does not exist for {self}!")
154 if not self.dirLike: 154 ↛ 155line 154 didn't jump to line 155, because the condition on line 154 was never true
155 raise ValueError(f"Can not create a 'directory' for file-like URI {self}")
157 # don't create S3 key when root is at the top-level of an Bucket
158 if not self.path == "/": 158 ↛ exitline 158 didn't return from function 'mkdir', because the condition on line 158 was never false
159 self.client.put_object(Bucket=self.netloc, Key=self.relativeToPathRoot)
161 @backoff.on_exception(backoff.expo, all_retryable_errors, max_time=max_retry_time)
162 def as_local(self) -> Tuple[str, bool]:
163 """Download object from S3 and place in temporary directory.
165 Returns
166 -------
167 path : `str`
168 Path to local temporary file.
169 temporary : `bool`
170 Always returns `True`. This is always a temporary file.
171 """
172 with tempfile.NamedTemporaryFile(suffix=self.getExtension(), delete=False) as tmpFile:
173 self.client.download_fileobj(self.netloc, self.relativeToPathRoot, tmpFile)
174 return tmpFile.name, True
176 @backoff.on_exception(backoff.expo, all_retryable_errors, max_time=max_retry_time)
177 def transfer_from(self, src: ButlerURI, transfer: str = "copy",
178 overwrite: bool = False,
179 transaction: Optional[Union[DatastoreTransaction, NoTransaction]] = None) -> None:
180 """Transfer the current resource to an S3 bucket.
182 Parameters
183 ----------
184 src : `ButlerURI`
185 Source URI.
186 transfer : `str`
187 Mode to use for transferring the resource. Supports the following
188 options: copy.
189 overwrite : `bool`, optional
190 Allow an existing file to be overwritten. Defaults to `False`.
191 transaction : `DatastoreTransaction`, optional
192 Currently unused.
193 """
194 # Fail early to prevent delays if remote resources are requested
195 if transfer not in self.transferModes:
196 raise ValueError(f"Transfer mode '{transfer}' not supported by URI scheme {self.scheme}")
198 log.debug(f"Transferring {src} [exists: {src.exists()}] -> "
199 f"{self} [exists: {self.exists()}] (transfer={transfer})")
201 if not overwrite and self.exists():
202 raise FileExistsError(f"Destination path '{self}' already exists.")
204 if transfer == "auto": 204 ↛ 205line 204 didn't jump to line 205, because the condition on line 204 was never true
205 transfer = self.transferDefault
207 if isinstance(src, type(self)):
208 # Looks like an S3 remote uri so we can use direct copy
209 # note that boto3.resource.meta.copy is cleverer than the low
210 # level copy_object
211 copy_source = {
212 "Bucket": src.netloc,
213 "Key": src.relativeToPathRoot,
214 }
215 self.client.copy_object(CopySource=copy_source, Bucket=self.netloc, Key=self.relativeToPathRoot)
216 else:
217 # Use local file and upload it
218 local_src, is_temporary = src.as_local()
220 # resource.meta.upload_file seems like the right thing
221 # but we have a low level client
222 with open(local_src, "rb") as fh:
223 self.client.put_object(Bucket=self.netloc,
224 Key=self.relativeToPathRoot, Body=fh)
225 if is_temporary: 225 ↛ 226line 225 didn't jump to line 226, because the condition on line 225 was never true
226 os.remove(local_src)
228 # This was an explicit move requested from a remote resource
229 # try to remove that resource
230 if transfer == "move": 230 ↛ 232line 230 didn't jump to line 232, because the condition on line 230 was never true
231 # Transactions do not work here
232 src.remove()