Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24"""S3 datastore.""" 

25 

26__all__ = ("S3Datastore", ) 

27 

28import logging 

29 

30from botocore.exceptions import ClientError 

31from http.client import ImproperConnectionState, HTTPException 

32from urllib3.exceptions import RequestError, HTTPError 

33 

34from typing import ( 

35 TYPE_CHECKING, 

36 Any, 

37 Union, 

38 Callable 

39) 

40 

41# https://pypi.org/project/backoff/ 

42try: 

43 import backoff 

44except ImportError: 

45 class Backoff(): 

46 @staticmethod 

47 def expo(func: Callable, *args: Any, **kwargs: Any) -> Callable: 

48 return func 

49 

50 @staticmethod 

51 def on_exception(func: Callable, *args: Any, **kwargs: Any) -> Callable: 

52 return func 

53 

54 backoff = Backoff 

55 

56from lsst.daf.butler import ( 

57 DatasetRef, 

58 Location, 

59 StoredFileInfo, 

60) 

61 

62from .remoteFileDatastore import RemoteFileDatastore 

63from lsst.daf.butler.core._butlerUri.s3utils import getS3Client, bucketExists 

64 

65if TYPE_CHECKING: 65 ↛ 66line 65 didn't jump to line 66, because the condition on line 65 was never true

66 from .fileLikeDatastore import DatastoreFileGetInformation 

67 from lsst.daf.butler import DatastoreConfig 

68 from lsst.daf.butler.registry.interfaces import DatastoreRegistryBridgeManager 

69 

70log = logging.getLogger(__name__) 

71 

72# settings for "backoff" retry decorators. these retries are belt-and- 

73# suspenders along with the retries built into Boto3, to account for 

74# semantic differences in errors between S3-like providers. 

75retryable_io_errors = ( 

76 # http.client 

77 ImproperConnectionState, HTTPException, 

78 # urllib3.exceptions 

79 RequestError, HTTPError, 

80 # built-ins 

81 TimeoutError, ConnectionError) 

82retryable_client_errors = ( 

83 # botocore.exceptions 

84 ClientError, 

85 # built-ins 

86 PermissionError) 

87all_retryable_errors = retryable_client_errors + retryable_io_errors 

88max_retry_time = 60 

89 

90 

91class S3Datastore(RemoteFileDatastore): 

92 """Basic S3 Object Storage backed Datastore. 

93 

94 Parameters 

95 ---------- 

96 config : `DatastoreConfig` or `str` 

97 Configuration. A string should refer to the name of the config file. 

98 bridgeManager : `DatastoreRegistryBridgeManager` 

99 Object that manages the interface between `Registry` and datastores. 

100 butlerRoot : `str`, optional 

101 New datastore root to use to override the configuration value. 

102 

103 Raises 

104 ------ 

105 ValueError 

106 If root location does not exist and ``create`` is `False` in the 

107 configuration. 

108 

109 Notes 

110 ----- 

111 S3Datastore supports non-link transfer modes for file-based ingest: 

112 `"move"`, `"copy"`, and `None` (no transfer). 

113 """ 

114 

115 defaultConfigFile = "datastores/s3Datastore.yaml" 

116 """Path to configuration defaults. Accessed within the ``configs`` resource 

117 or relative to a search path. Can be None if no defaults specified. 

118 """ 

119 

120 def __init__(self, config: Union[DatastoreConfig, str], 

121 bridgeManager: DatastoreRegistryBridgeManager, butlerRoot: str = None): 

122 super().__init__(config, bridgeManager, butlerRoot) 

123 

124 self.client = getS3Client() 

125 if not bucketExists(self.locationFactory.netloc): 125 ↛ 131line 125 didn't jump to line 131, because the condition on line 125 was never true

126 # PosixDatastore creates the root directory if one does not exist. 

127 # Calling s3 client.create_bucket is possible but also requires 

128 # ACL LocationConstraints, Permissions and other configuration 

129 # parameters, so for now we do not create a bucket if one is 

130 # missing. Further discussion can make this happen though. 

131 raise IOError(f"Bucket {self.locationFactory.netloc} does not exist!") 

132 

133 @backoff.on_exception(backoff.expo, retryable_client_errors, max_time=max_retry_time) 

134 def _artifact_exists(self, location: Location) -> bool: 

135 """Check that an artifact exists in this datastore at the specified 

136 location. 

137 

138 Parameters 

139 ---------- 

140 location : `Location` 

141 Expected location of the artifact associated with this datastore. 

142 

143 Returns 

144 ------- 

145 exists : `bool` 

146 True if the location can be found, false otherwise. 

147 """ 

148 # Exists to allow backoff retry 

149 return super()._artifact_exists(location) 

150 

151 @backoff.on_exception(backoff.expo, retryable_client_errors, max_time=max_retry_time) 

152 def _delete_artifact(self, location: Location) -> None: 

153 """Delete the artifact from the datastore. 

154 

155 Parameters 

156 ---------- 

157 location : `Location` 

158 Location of the artifact associated with this datastore. 

159 """ 

160 # Exists to allow backoff retry 

161 return super()._delete_artifact(location) 

162 

163 @backoff.on_exception(backoff.expo, all_retryable_errors, max_time=max_retry_time) 

164 def _read_artifact_into_memory(self, getInfo: DatastoreFileGetInformation, 

165 ref: DatasetRef, isComponent: bool = False) -> Any: 

166 # Exists to allow backoff retry 

167 return super()._read_artifact_into_memory(getInfo, ref, isComponent) 

168 

169 @backoff.on_exception(backoff.expo, all_retryable_errors, max_time=max_retry_time) 

170 def _write_in_memory_to_artifact(self, inMemoryDataset: Any, ref: DatasetRef) -> StoredFileInfo: 

171 # Exists to allow backoff retry 

172 return super()._write_in_memory_to_artifact(inMemoryDataset, ref)