Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24import logging 

25import os.path 

26import tempfile 

27 

28from typing import ( 

29 TYPE_CHECKING, 

30 Any, 

31 Union, 

32) 

33 

34from .fileLikeDatastore import FileLikeDatastore 

35 

36from lsst.daf.butler import ( 

37 DatasetRef, 

38 Location, 

39 StoredFileInfo, 

40) 

41 

42if TYPE_CHECKING: 42 ↛ 43line 42 didn't jump to line 43, because the condition on line 42 was never true

43 from .fileLikeDatastore import DatastoreFileGetInformation 

44 from lsst.daf.butler import DatastoreConfig 

45 from lsst.daf.butler.registry.interfaces import DatastoreRegistryBridgeManager 

46 

47log = logging.getLogger(__name__) 

48 

49 

50class RemoteFileDatastore(FileLikeDatastore): 

51 """A datastore designed for files at remote locations. 

52 

53 Parameters 

54 ---------- 

55 config : `DatastoreConfig` or `str` 

56 Configuration. A string should refer to the name of the config file. 

57 bridgeManager : `DatastoreRegistryBridgeManager` 

58 Object that manages the interface between `Registry` and datastores. 

59 butlerRoot : `str`, optional 

60 New datastore root to use to override the configuration value. 

61 

62 Raises 

63 ------ 

64 ValueError 

65 If root location does not exist and ``create`` is `False` in the 

66 configuration. 

67 

68 Notes 

69 ----- 

70 Datastore supports non-link transfer modes for file-based ingest: 

71 `"move"`, `"copy"`, and `None` (no transfer). 

72 """ 

73 

74 def __init__(self, config: Union[DatastoreConfig, str], 

75 bridgeManager: DatastoreRegistryBridgeManager, butlerRoot: str = None): 

76 super().__init__(config, bridgeManager, butlerRoot) 

77 if not self.root.exists(): 77 ↛ 78line 77 didn't jump to line 78, because the condition on line 77 was never true

78 if "create" not in self.config or not self.config["create"]: 

79 raise ValueError(f"No valid root and not allowed to create one at: {self.root}") 

80 try: 

81 self.root.mkdir() 

82 except ValueError as e: 

83 raise ValueError(f"Can not create datastore root '{self.root}', check permissions.") from e 

84 

85 def _read_artifact_into_memory(self, getInfo: DatastoreFileGetInformation, 

86 ref: DatasetRef, isComponent: bool = False) -> Any: 

87 location = getInfo.location 

88 

89 log.debug("Downloading data from %s", location.uri) 

90 serializedDataset = location.uri.read() 

91 

92 storedFileInfo = getInfo.info 

93 if len(serializedDataset) != storedFileInfo.file_size: 93 ↛ 94line 93 didn't jump to line 94, because the condition on line 93 was never true

94 raise RuntimeError("Integrity failure in Datastore. " 

95 f"Size of file {location.path} ({len(serializedDataset)}) " 

96 f"does not match recorded size of {storedFileInfo.file_size}") 

97 

98 # format the downloaded bytes into appropriate object directly, or via 

99 # tempfile (when formatter does not support to/from/Bytes). This is 

100 # equivalent of PosixDatastore formatter.read try-except block. 

101 formatter = getInfo.formatter 

102 try: 

103 result = formatter.fromBytes(serializedDataset, 

104 component=getInfo.component if isComponent else None) 

105 except NotImplementedError: 105 ↛ 117line 105 didn't jump to line 117

106 # formatter might not always have an extension so mypy complains 

107 # We can either ignore the complaint or use a temporary location 

108 tmpLoc = Location(".", "temp") 

109 tmpLoc = formatter.makeUpdatedLocation(tmpLoc) 

110 with tempfile.NamedTemporaryFile(suffix=tmpLoc.getExtension()) as tmpFile: 

111 tmpFile.write(serializedDataset) 

112 # Flush the write. Do not close the file because that 

113 # will delete it. 

114 tmpFile.flush() 

115 formatter._fileDescriptor.location = Location(*os.path.split(tmpFile.name)) 

116 result = formatter.read(component=getInfo.component if isComponent else None) 

117 except Exception as e: 

118 raise ValueError(f"Failure from formatter '{formatter.name()}' for dataset {ref.id}" 

119 f" ({ref.datasetType.name} from {location.uri}): {e}") from e 

120 

121 return self._post_process_get(result, getInfo.readStorageClass, getInfo.assemblerParams, 

122 isComponent=isComponent) 

123 

124 def _write_in_memory_to_artifact(self, inMemoryDataset: Any, ref: DatasetRef) -> StoredFileInfo: 

125 location, formatter = self._prepare_for_put(inMemoryDataset, ref) 

126 

127 if location.uri.exists(): 127 ↛ 136line 127 didn't jump to line 136, because the condition on line 127 was never true

128 # Assume that by this point if registry thinks the file should 

129 # not exist then the file should not exist and therefore we can 

130 # overwrite it. This can happen if a put was interrupted by 

131 # an external interrupt. The only time this could be problematic is 

132 # if the file template is incomplete and multiple dataset refs 

133 # result in identical filenames. 

134 # Eventually we should remove the check completely (it takes 

135 # non-zero time for network). 

136 log.warning("Object %s exists in datastore for ref %s", location.uri, ref) 

137 

138 if not location.uri.dirname().exists(): 

139 log.debug("Folder %s does not exist yet.", location.uri.dirname()) 

140 location.uri.dirname().mkdir() 

141 

142 if self._transaction is None: 142 ↛ 143line 142 didn't jump to line 143, because the condition on line 142 was never true

143 raise RuntimeError("Attempting to write artifact without transaction enabled") 

144 

145 # upload the file directly from bytes or by using a temporary file if 

146 # _toBytes is not implemented 

147 try: 

148 serializedDataset = formatter.toBytes(inMemoryDataset) 

149 log.debug("Writing bytes directly to %s", location.uri) 

150 location.uri.write(serializedDataset, overwrite=True) 

151 log.debug("Successfully wrote bytes directly to %s", location.uri) 

152 except NotImplementedError: 

153 with tempfile.NamedTemporaryFile(suffix=location.getExtension()) as tmpFile: 

154 tmpLocation = Location(*os.path.split(tmpFile.name)) 

155 formatter._fileDescriptor.location = tmpLocation 

156 log.debug("Writing dataset to temporary directory at ", tmpLocation.uri) 

157 formatter.write(inMemoryDataset) 

158 location.uri.transfer_from(tmpLocation.uri, transfer="copy", overwrite=True) 

159 log.debug("Successfully wrote dataset to %s via a temporary file.", location.uri) 

160 

161 # Register a callback to try to delete the uploaded data if 

162 # the ingest fails below 

163 self._transaction.registerUndo("remoteWrite", location.uri.remove) 

164 

165 # URI is needed to resolve what ingest case are we dealing with 

166 return self._extractIngestInfo(location.uri, ref, formatter=formatter)