Coverage for python/lsst/daf/butler/datastores/s3Datastore.py : 73%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24"""S3 datastore."""
26__all__ = ("S3Datastore", )
28import logging
30from botocore.exceptions import ClientError
31from http.client import ImproperConnectionState, HTTPException
32from urllib3.exceptions import RequestError, HTTPError
34from typing import (
35 TYPE_CHECKING,
36 Any,
37 Union,
38 Callable
39)
41# https://pypi.org/project/backoff/
42try:
43 import backoff
44except ImportError:
45 class Backoff():
46 @staticmethod
47 def expo(func: Callable, *args: Any, **kwargs: Any) -> Callable:
48 return func
50 @staticmethod
51 def on_exception(func: Callable, *args: Any, **kwargs: Any) -> Callable:
52 return func
54 backoff = Backoff
56from lsst.daf.butler import (
57 DatasetRef,
58 Location,
59 StoredFileInfo,
60)
62from .remoteFileDatastore import RemoteFileDatastore
63from lsst.daf.butler.core.s3utils import getS3Client, bucketExists
65if TYPE_CHECKING: 65 ↛ 66line 65 didn't jump to line 66, because the condition on line 65 was never true
66 from .fileLikeDatastore import DatastoreFileGetInformation
67 from lsst.daf.butler import DatastoreConfig
68 from lsst.daf.butler.registry.interfaces import DatastoreRegistryBridgeManager
70log = logging.getLogger(__name__)
72# settings for "backoff" retry decorators. these retries are belt-and-
73# suspenders along with the retries built into Boto3, to account for
74# semantic differences in errors between S3-like providers.
75retryable_io_errors = (
76 # http.client
77 ImproperConnectionState, HTTPException,
78 # urllib3.exceptions
79 RequestError, HTTPError,
80 # built-ins
81 TimeoutError, ConnectionError)
82retryable_client_errors = (
83 # botocore.exceptions
84 ClientError,
85 # built-ins
86 PermissionError)
87all_retryable_errors = retryable_client_errors + retryable_io_errors
88max_retry_time = 60
91class S3Datastore(RemoteFileDatastore):
92 """Basic S3 Object Storage backed Datastore.
94 Parameters
95 ----------
96 config : `DatastoreConfig` or `str`
97 Configuration. A string should refer to the name of the config file.
98 bridgeManager : `DatastoreRegistryBridgeManager`
99 Object that manages the interface between `Registry` and datastores.
100 butlerRoot : `str`, optional
101 New datastore root to use to override the configuration value.
103 Raises
104 ------
105 ValueError
106 If root location does not exist and ``create`` is `False` in the
107 configuration.
109 Notes
110 -----
111 S3Datastore supports non-link transfer modes for file-based ingest:
112 `"move"`, `"copy"`, and `None` (no transfer).
113 """
115 defaultConfigFile = "datastores/s3Datastore.yaml"
116 """Path to configuration defaults. Accessed within the ``configs`` resource
117 or relative to a search path. Can be None if no defaults specified.
118 """
120 def __init__(self, config: Union[DatastoreConfig, str],
121 bridgeManager: DatastoreRegistryBridgeManager, butlerRoot: str = None):
122 super().__init__(config, bridgeManager, butlerRoot)
124 self.client = getS3Client()
125 if not bucketExists(self.locationFactory.netloc): 125 ↛ 131line 125 didn't jump to line 131, because the condition on line 125 was never true
126 # PosixDatastore creates the root directory if one does not exist.
127 # Calling s3 client.create_bucket is possible but also requires
128 # ACL LocationConstraints, Permissions and other configuration
129 # parameters, so for now we do not create a bucket if one is
130 # missing. Further discussion can make this happen though.
131 raise IOError(f"Bucket {self.locationFactory.netloc} does not exist!")
133 @backoff.on_exception(backoff.expo, retryable_client_errors, max_time=max_retry_time)
134 def _artifact_exists(self, location: Location) -> bool:
135 """Check that an artifact exists in this datastore at the specified
136 location.
138 Parameters
139 ----------
140 location : `Location`
141 Expected location of the artifact associated with this datastore.
143 Returns
144 -------
145 exists : `bool`
146 True if the location can be found, false otherwise.
147 """
148 # Exists to allow backoff retry
149 return super()._artifact_exists(location)
151 @backoff.on_exception(backoff.expo, retryable_client_errors, max_time=max_retry_time)
152 def _delete_artifact(self, location: Location) -> None:
153 """Delete the artifact from the datastore.
155 Parameters
156 ----------
157 location : `Location`
158 Location of the artifact associated with this datastore.
159 """
160 # Exists to allow backoff retry
161 return super()._delete_artifact(location)
163 @backoff.on_exception(backoff.expo, all_retryable_errors, max_time=max_retry_time)
164 def _read_artifact_into_memory(self, getInfo: DatastoreFileGetInformation,
165 ref: DatasetRef, isComponent: bool = False) -> Any:
166 # Exists to allow backoff retry
167 return super()._read_artifact_into_memory(getInfo, ref, isComponent)
169 @backoff.on_exception(backoff.expo, all_retryable_errors, max_time=max_retry_time)
170 def _write_in_memory_to_artifact(self, inMemoryDataset: Any, ref: DatasetRef) -> StoredFileInfo:
171 # Exists to allow backoff retry
172 return super()._write_in_memory_to_artifact(inMemoryDataset, ref)