Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24import os 

25import os.path 

26import logging 

27import tempfile 

28 

29__all__ = ('ButlerS3URI',) 

30 

31from typing import ( 

32 TYPE_CHECKING, 

33 Optional, 

34 Any, 

35 Callable, 

36 Tuple, 

37 Union, 

38) 

39 

40from .utils import NoTransaction 

41from ._butlerUri import ButlerURI 

42from .s3utils import getS3Client, s3CheckFileExists, bucketExists 

43 

44from botocore.exceptions import ClientError 

45from http.client import ImproperConnectionState, HTTPException 

46from urllib3.exceptions import RequestError, HTTPError 

47 

48if TYPE_CHECKING: 48 ↛ 49line 48 didn't jump to line 49, because the condition on line 48 was never true

49 try: 

50 import boto3 

51 except ImportError: 

52 pass 

53 from ..datastore import DatastoreTransaction 

54 

55# https://pypi.org/project/backoff/ 

56try: 

57 import backoff 

58except ImportError: 

59 class Backoff(): 

60 @staticmethod 

61 def expo(func: Callable, *args: Any, **kwargs: Any) -> Callable: 

62 return func 

63 

64 @staticmethod 

65 def on_exception(func: Callable, *args: Any, **kwargs: Any) -> Callable: 

66 return func 

67 

68 backoff = Backoff 

69 

70# settings for "backoff" retry decorators. these retries are belt-and- 

71# suspenders along with the retries built into Boto3, to account for 

72# semantic differences in errors between S3-like providers. 

73retryable_io_errors = ( 

74 # http.client 

75 ImproperConnectionState, HTTPException, 

76 # urllib3.exceptions 

77 RequestError, HTTPError, 

78 # built-ins 

79 TimeoutError, ConnectionError) 

80retryable_client_errors = ( 

81 # botocore.exceptions 

82 ClientError, 

83 # built-ins 

84 PermissionError) 

85all_retryable_errors = retryable_client_errors + retryable_io_errors 

86max_retry_time = 60 

87 

88 

89log = logging.getLogger(__name__) 

90 

91 

92class ButlerS3URI(ButlerURI): 

93 """S3 URI""" 

94 

95 @property 

96 def client(self) -> boto3.client: 

97 """Client object to address remote resource.""" 

98 # Defer import for circular dependencies 

99 return getS3Client() 

100 

101 @backoff.on_exception(backoff.expo, retryable_client_errors, max_time=max_retry_time) 

102 def exists(self) -> bool: 

103 if self.is_root: 103 ↛ 105line 103 didn't jump to line 105, because the condition on line 103 was never true

104 # Only check for the bucket since the path is irrelevant 

105 return bucketExists(self.netloc) 

106 exists, _ = s3CheckFileExists(self, client=self.client) 

107 return exists 

108 

109 @backoff.on_exception(backoff.expo, retryable_client_errors, max_time=max_retry_time) 

110 def size(self) -> int: 

111 if self.dirLike: 111 ↛ 112line 111 didn't jump to line 112, because the condition on line 111 was never true

112 return 0 

113 _, sz = s3CheckFileExists(self, client=self.client) 

114 return sz 

115 

116 @backoff.on_exception(backoff.expo, retryable_client_errors, max_time=max_retry_time) 

117 def remove(self) -> None: 

118 """Remove the resource.""" 

119 

120 # https://github.com/boto/boto3/issues/507 - there is no 

121 # way of knowing if the file was actually deleted except 

122 # for checking all the keys again, reponse is HTTP 204 OK 

123 # response all the time 

124 self.client.delete_object(Bucket=self.netloc, Key=self.relativeToPathRoot) 

125 

126 @backoff.on_exception(backoff.expo, all_retryable_errors, max_time=max_retry_time) 

127 def read(self, size: int = -1) -> bytes: 

128 args = {} 

129 if size > 0: 

130 args["Range"] = f"bytes=0-{size-1}" 

131 try: 

132 response = self.client.get_object(Bucket=self.netloc, 

133 Key=self.relativeToPathRoot, 

134 **args) 

135 except (self.client.exceptions.NoSuchKey, self.client.exceptions.NoSuchBucket) as err: 

136 raise FileNotFoundError(f"No such resource: {self}") from err 

137 body = response["Body"].read() 

138 response["Body"].close() 

139 return body 

140 

141 @backoff.on_exception(backoff.expo, all_retryable_errors, max_time=max_retry_time) 

142 def write(self, data: bytes, overwrite: bool = True) -> None: 

143 if not overwrite: 

144 if self.exists(): 144 ↛ 145line 144 didn't jump to line 145, because the condition on line 144 was never true

145 raise FileExistsError(f"Remote resource {self} exists and overwrite has been disabled") 

146 self.client.put_object(Bucket=self.netloc, Key=self.relativeToPathRoot, 

147 Body=data) 

148 

149 @backoff.on_exception(backoff.expo, all_retryable_errors, max_time=max_retry_time) 

150 def mkdir(self) -> None: 

151 if not bucketExists(self.netloc): 151 ↛ 152line 151 didn't jump to line 152, because the condition on line 151 was never true

152 raise ValueError(f"Bucket {self.netloc} does not exist for {self}!") 

153 

154 if not self.dirLike: 154 ↛ 155line 154 didn't jump to line 155, because the condition on line 154 was never true

155 raise ValueError(f"Can not create a 'directory' for file-like URI {self}") 

156 

157 # don't create S3 key when root is at the top-level of an Bucket 

158 if not self.path == "/": 158 ↛ exitline 158 didn't return from function 'mkdir', because the condition on line 158 was never false

159 self.client.put_object(Bucket=self.netloc, Key=self.relativeToPathRoot) 

160 

161 @backoff.on_exception(backoff.expo, all_retryable_errors, max_time=max_retry_time) 

162 def as_local(self) -> Tuple[str, bool]: 

163 """Download object from S3 and place in temporary directory. 

164 

165 Returns 

166 ------- 

167 path : `str` 

168 Path to local temporary file. 

169 temporary : `bool` 

170 Always returns `True`. This is always a temporary file. 

171 """ 

172 with tempfile.NamedTemporaryFile(suffix=self.getExtension(), delete=False) as tmpFile: 

173 self.client.download_fileobj(self.netloc, self.relativeToPathRoot, tmpFile) 

174 return tmpFile.name, True 

175 

176 @backoff.on_exception(backoff.expo, all_retryable_errors, max_time=max_retry_time) 

177 def transfer_from(self, src: ButlerURI, transfer: str = "copy", 

178 overwrite: bool = False, 

179 transaction: Optional[Union[DatastoreTransaction, NoTransaction]] = None) -> None: 

180 """Transfer the current resource to an S3 bucket. 

181 

182 Parameters 

183 ---------- 

184 src : `ButlerURI` 

185 Source URI. 

186 transfer : `str` 

187 Mode to use for transferring the resource. Supports the following 

188 options: copy. 

189 overwrite : `bool`, optional 

190 Allow an existing file to be overwritten. Defaults to `False`. 

191 transaction : `DatastoreTransaction`, optional 

192 Currently unused. 

193 """ 

194 # Fail early to prevent delays if remote resources are requested 

195 if transfer not in self.transferModes: 

196 raise ValueError(f"Transfer mode '{transfer}' not supported by URI scheme {self.scheme}") 

197 

198 log.debug(f"Transferring {src} [exists: {src.exists()}] -> " 

199 f"{self} [exists: {self.exists()}] (transfer={transfer})") 

200 

201 if not overwrite and self.exists(): 

202 raise FileExistsError(f"Destination path '{self}' already exists.") 

203 

204 if transfer == "auto": 204 ↛ 205line 204 didn't jump to line 205, because the condition on line 204 was never true

205 transfer = self.transferDefault 

206 

207 if isinstance(src, type(self)): 

208 # Looks like an S3 remote uri so we can use direct copy 

209 # note that boto3.resource.meta.copy is cleverer than the low 

210 # level copy_object 

211 copy_source = { 

212 "Bucket": src.netloc, 

213 "Key": src.relativeToPathRoot, 

214 } 

215 self.client.copy_object(CopySource=copy_source, Bucket=self.netloc, Key=self.relativeToPathRoot) 

216 else: 

217 # Use local file and upload it 

218 local_src, is_temporary = src.as_local() 

219 

220 # resource.meta.upload_file seems like the right thing 

221 # but we have a low level client 

222 with open(local_src, "rb") as fh: 

223 self.client.put_object(Bucket=self.netloc, 

224 Key=self.relativeToPathRoot, Body=fh) 

225 if is_temporary: 225 ↛ 226line 225 didn't jump to line 226, because the condition on line 225 was never true

226 os.remove(local_src) 

227 

228 # This was an explicit move requested from a remote resource 

229 # try to remove that resource 

230 if transfer == "move": 230 ↛ 232line 230 didn't jump to line 232, because the condition on line 230 was never true

231 # Transactions do not work here 

232 src.remove()