Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Support for reading and writing files to a POSIX file system.""" 

23 

24__all__ = ("FileFormatter",) 

25 

26from abc import abstractmethod 

27 

28from lsst.daf.butler import Formatter 

29 

30 

31class FileFormatter(Formatter): 

32 """Interface for reading and writing files on a POSIX file system. 

33 """ 

34 

35 extension = None 

36 """Default file extension to use for writing files. None means that no 

37 modifications will be made to the supplied file extension. (`str`)""" 

38 

39 @abstractmethod 

40 def _readFile(self, path, pytype=None): 

41 """Read a file from the path in the correct format. 

42 

43 Parameters 

44 ---------- 

45 path : `str` 

46 Path to use to open the file. 

47 pytype : `class`, optional 

48 Class to use to read the file. 

49 

50 Returns 

51 ------- 

52 data : `object` 

53 Data read from file. Returns `None` if the file can not be 

54 found at the given path. 

55 

56 Raises 

57 ------ 

58 Exception 

59 Some problem reading the file. 

60 """ 

61 pass 

62 

63 @abstractmethod 

64 def _writeFile(self, inMemoryDataset): 

65 """Write the in memory dataset to file on disk. 

66 

67 Parameters 

68 ---------- 

69 inMemoryDataset : `object` 

70 Object to serialize. 

71 

72 Raises 

73 ------ 

74 Exception 

75 The file could not be written. 

76 """ 

77 pass 

78 

79 def _assembleDataset(self, data, component=None): 

80 """Assembles and coerces the dataset, or one of its components, 

81 into an appropriate python type and returns it. 

82 

83 Parameters 

84 ---------- 

85 data : `dict` or `object` 

86 Composite or a dict that, or which component, needs to be 

87 coerced to the python type specified in "fileDescriptor" 

88 component : `str`, optional 

89 Component to read from the file. Only used if the `StorageClass` 

90 for reading differed from the `StorageClass` used to write the 

91 file. 

92 

93 Returns 

94 ------- 

95 inMemoryDataset : `object` 

96 The requested data as a Python object. The type of object 

97 is controlled by the specific formatter. 

98 """ 

99 fileDescriptor = self.fileDescriptor 

100 

101 # if read and write storage classes differ, more work is required 

102 readStorageClass = fileDescriptor.readStorageClass 

103 if readStorageClass != fileDescriptor.storageClass: 

104 if component is None: 

105 raise ValueError("Storage class inconsistency ({} vs {}) but no" 

106 " component requested".format(readStorageClass.name, 

107 fileDescriptor.storageClass.name)) 

108 

109 # Concrete composite written as a single file (we hope) 

110 try: 

111 data = fileDescriptor.storageClass.assembler().getComponent(data, component) 

112 except AttributeError: 

113 # Defer the complaint 

114 data = None 

115 

116 # Coerce to the requested type (not necessarily the type that was 

117 # written) 

118 data = self._coerceType(data, fileDescriptor.readStorageClass, 

119 pytype=fileDescriptor.readStorageClass.pytype) 

120 

121 return data 

122 

123 def _coerceType(self, inMemoryDataset, storageClass, pytype=None): 

124 """Coerce the supplied inMemoryDataset to type `pytype`. 

125 

126 Usually a no-op. 

127 

128 Parameters 

129 ---------- 

130 inMemoryDataset : `object` 

131 Object to coerce to expected type. 

132 storageClass : `StorageClass` 

133 StorageClass associated with ``inMemoryDataset``. 

134 pytype : `class`, optional 

135 Override type to use for conversion. 

136 

137 Returns 

138 ------- 

139 inMemoryDataset : `object` 

140 Object of expected type `pytype`. 

141 """ 

142 return inMemoryDataset 

143 

144 def read(self, component=None): 

145 """Read data from a file. 

146 

147 Parameters 

148 ---------- 

149 fileDescriptor : `FileDescriptor` 

150 Identifies the file to read, type to read it into and parameters 

151 to be used for reading. 

152 component : `str`, optional 

153 Component to read from the file. Only used if the `StorageClass` 

154 for reading differed from the `StorageClass` used to write the 

155 file. 

156 

157 Returns 

158 ------- 

159 inMemoryDataset : `object` 

160 The requested data as a Python object. The type of object 

161 is controlled by the specific formatter. 

162 

163 Raises 

164 ------ 

165 ValueError 

166 Component requested but this file does not seem to be a concrete 

167 composite. 

168 NotImplementedError 

169 Formatter does not implement a method to read from files. 

170 """ 

171 

172 # Read the file naively 

173 path = self.fileDescriptor.location.path 

174 data = self._readFile(path, self.fileDescriptor.storageClass.pytype) 

175 

176 # Assemble the requested dataset and potentially return only its 

177 # component coercing it to its appropriate pytype 

178 data = self._assembleDataset(data, component) 

179 

180 # Special case components by allowing a formatter to return None 

181 # to indicate that the component was understood but is missing 

182 if data is None and component is None: 

183 raise ValueError(f"Unable to read data with URI {self.fileDescriptor.location.uri}") 

184 

185 return data 

186 

187 def fromBytes(self, serializedDataset, component=None): 

188 """Reads serialized data into a Dataset or its component. 

189 

190 Parameters 

191 ---------- 

192 serializedDataset : `bytes` 

193 Bytes object to unserialize. 

194 fileDescriptor : `FileDescriptor` 

195 Identifies read type and parameters to be used for reading. 

196 component : `str`, optional 

197 Component to read from the Dataset. Only used if the `StorageClass` 

198 for reading differed from the `StorageClass` used to write the 

199 file. 

200 

201 Returns 

202 ------- 

203 inMemoryDataset : `object` 

204 The requested data as a Python object. The type of object 

205 is controlled by the specific formatter. 

206 

207 Raises 

208 ------ 

209 NotImplementedError 

210 Formatter does not support reading from bytes. 

211 """ 

212 if not hasattr(self, '_fromBytes'): 

213 raise NotImplementedError("Type does not support reading from bytes.") 

214 

215 data = self._fromBytes(serializedDataset, 

216 self.fileDescriptor.storageClass.pytype) 

217 

218 # Assemble the requested dataset and potentially return only its 

219 # component coercing it to its appropriate ptype 

220 data = self._assembleDataset(data, component) 

221 

222 if data is None: 

223 raise ValueError(f"Unable to read data with URI {self.fileDescriptor.location.uri}") 

224 

225 return data 

226 

227 def write(self, inMemoryDataset): 

228 """Write a Python object to a file. 

229 

230 Parameters 

231 ---------- 

232 inMemoryDataset : `object` 

233 The Python object to store. 

234 

235 Returns 

236 ------- 

237 path : `str` 

238 The path where the primary file is stored within the datastore. 

239 """ 

240 fileDescriptor = self.fileDescriptor 

241 # Update the location with the formatter-preferred file extension 

242 fileDescriptor.location.updateExtension(self.extension) 

243 

244 self._writeFile(inMemoryDataset) 

245 

246 return fileDescriptor.location.pathInStore 

247 

248 def toBytes(self, inMemoryDataset): 

249 """Serialize the Dataset to bytes based on formatter. 

250 

251 Parameters 

252 ---------- 

253 inMemoryDataset : `object` 

254 Object to serialize. 

255 fileDescriptor : `FileDescriptor` 

256 Identifies read type and parameters to be used for reading. 

257 

258 Returns 

259 ------- 

260 serializedDataset : `bytes` 

261 Bytes representing the serialized dataset. 

262 

263 Raises 

264 ------ 

265 NotImplementedError 

266 Formatter does not support reading from bytes. 

267 """ 

268 if not hasattr(self, '_toBytes'): 

269 raise NotImplementedError("Type does not support reading from bytes.") 

270 

271 return self._toBytes(inMemoryDataset)