Coverage for python/lsst/daf/butler/core/_butlerUri/s3.py : 77%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24import os
25import os.path
26import logging
27import tempfile
29__all__ = ('ButlerS3URI',)
31from typing import (
32 TYPE_CHECKING,
33 Optional,
34 Tuple,
35 Union,
36)
38from .utils import NoTransaction
39from ._butlerUri import ButlerURI
40from .s3utils import getS3Client, s3CheckFileExists, bucketExists
43if TYPE_CHECKING: 43 ↛ 44line 43 didn't jump to line 44, because the condition on line 43 was never true
44 try:
45 import boto3
46 except ImportError:
47 pass
48 from ..datastore import DatastoreTransaction
50log = logging.getLogger(__name__)
53class ButlerS3URI(ButlerURI):
54 """S3 URI"""
56 @property
57 def client(self) -> boto3.client:
58 """Client object to address remote resource."""
59 # Defer import for circular dependencies
60 return getS3Client()
62 def exists(self) -> bool:
63 if self.is_root: 63 ↛ 65line 63 didn't jump to line 65, because the condition on line 63 was never true
64 # Only check for the bucket since the path is irrelevant
65 return bucketExists(self.netloc)
66 exists, _ = s3CheckFileExists(self, client=self.client)
67 return exists
69 def size(self) -> int:
70 if self.dirLike: 70 ↛ 71line 70 didn't jump to line 71, because the condition on line 70 was never true
71 return 0
72 _, sz = s3CheckFileExists(self, client=self.client)
73 return sz
75 def remove(self) -> None:
76 """Remove the resource."""
78 # https://github.com/boto/boto3/issues/507 - there is no
79 # way of knowing if the file was actually deleted except
80 # for checking all the keys again, reponse is HTTP 204 OK
81 # response all the time
82 self.client.delete_object(Bucket=self.netloc, Key=self.relativeToPathRoot)
84 def read(self, size: int = -1) -> bytes:
85 args = {}
86 if size > 0:
87 args["Range"] = f"bytes=0-{size-1}"
88 try:
89 response = self.client.get_object(Bucket=self.netloc,
90 Key=self.relativeToPathRoot,
91 **args)
92 except (self.client.exceptions.NoSuchKey, self.client.exceptions.NoSuchBucket) as err:
93 raise FileNotFoundError(f"No such resource: {self}") from err
94 body = response["Body"].read()
95 response["Body"].close()
96 return body
98 def write(self, data: bytes, overwrite: bool = True) -> None:
99 if not overwrite:
100 if self.exists(): 100 ↛ 101line 100 didn't jump to line 101, because the condition on line 100 was never true
101 raise FileExistsError(f"Remote resource {self} exists and overwrite has been disabled")
102 self.client.put_object(Bucket=self.netloc, Key=self.relativeToPathRoot,
103 Body=data)
105 def mkdir(self) -> None:
106 # Defer import for circular dependencies
107 if not bucketExists(self.netloc): 107 ↛ 108line 107 didn't jump to line 108, because the condition on line 107 was never true
108 raise ValueError(f"Bucket {self.netloc} does not exist for {self}!")
110 if not self.dirLike: 110 ↛ 111line 110 didn't jump to line 111, because the condition on line 110 was never true
111 raise ValueError(f"Can not create a 'directory' for file-like URI {self}")
113 # don't create S3 key when root is at the top-level of an Bucket
114 if not self.path == "/": 114 ↛ exitline 114 didn't return from function 'mkdir', because the condition on line 114 was never false
115 self.client.put_object(Bucket=self.netloc, Key=self.relativeToPathRoot)
117 def as_local(self) -> Tuple[str, bool]:
118 """Download object from S3 and place in temporary directory.
120 Returns
121 -------
122 path : `str`
123 Path to local temporary file.
124 temporary : `bool`
125 Always returns `True`. This is always a temporary file.
126 """
127 with tempfile.NamedTemporaryFile(suffix=self.getExtension(), delete=False) as tmpFile:
128 self.client.download_fileobj(self.netloc, self.relativeToPathRoot, tmpFile)
129 return tmpFile.name, True
131 def transfer_from(self, src: ButlerURI, transfer: str = "copy",
132 overwrite: bool = False,
133 transaction: Optional[Union[DatastoreTransaction, NoTransaction]] = None) -> None:
134 """Transfer the current resource to an S3 bucket.
136 Parameters
137 ----------
138 src : `ButlerURI`
139 Source URI.
140 transfer : `str`
141 Mode to use for transferring the resource. Supports the following
142 options: copy.
143 overwrite : `bool`, optional
144 Allow an existing file to be overwritten. Defaults to `False`.
145 transaction : `DatastoreTransaction`, optional
146 Currently unused.
147 """
148 # Fail early to prevent delays if remote resources are requested
149 if transfer not in self.transferModes:
150 raise ValueError(f"Transfer mode '{transfer}' not supported by URI scheme {self.scheme}")
152 log.debug(f"Transferring {src} [exists: {src.exists()}] -> "
153 f"{self} [exists: {self.exists()}] (transfer={transfer})")
155 if not overwrite and self.exists():
156 raise FileExistsError(f"Destination path '{self}' already exists.")
158 if transfer == "auto": 158 ↛ 159line 158 didn't jump to line 159, because the condition on line 158 was never true
159 transfer = self.transferDefault
161 if isinstance(src, type(self)):
162 # Looks like an S3 remote uri so we can use direct copy
163 # note that boto3.resource.meta.copy is cleverer than the low
164 # level copy_object
165 copy_source = {
166 "Bucket": src.netloc,
167 "Key": src.relativeToPathRoot,
168 }
169 self.client.copy_object(CopySource=copy_source, Bucket=self.netloc, Key=self.relativeToPathRoot)
170 else:
171 # Use local file and upload it
172 local_src, is_temporary = src.as_local()
174 # resource.meta.upload_file seems like the right thing
175 # but we have a low level client
176 with open(local_src, "rb") as fh:
177 self.client.put_object(Bucket=self.netloc,
178 Key=self.relativeToPathRoot, Body=fh)
179 if is_temporary: 179 ↛ 180line 179 didn't jump to line 180, because the condition on line 179 was never true
180 os.remove(local_src)
182 # This was an explicit move requested from a remote resource
183 # try to remove that resource
184 if transfer == "move": 184 ↛ 186line 184 didn't jump to line 186, because the condition on line 184 was never true
185 # Transactions do not work here
186 src.remove()