Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Support for reading and writing files to a POSIX file system.""" 

23 

24__all__ = ("FileFormatter",) 

25 

26from abc import abstractmethod 

27 

28from lsst.daf.butler import Formatter 

29 

30 

31class FileFormatter(Formatter): 

32 """Interface for reading and writing files on a POSIX file system. 

33 """ 

34 

35 extension = None 

36 """Default file extension to use for writing files. None means that no 

37 modifications will be made to the supplied file extension. (`str`)""" 

38 

39 @abstractmethod 

40 def _readFile(self, path, pytype=None): 

41 """Read a file from the path in the correct format. 

42 

43 Parameters 

44 ---------- 

45 path : `str` 

46 Path to use to open the file. 

47 pytype : `class`, optional 

48 Class to use to read the file. 

49 

50 Returns 

51 ------- 

52 data : `object` 

53 Data read from file. Returns `None` if the file can not be 

54 found at the given path. 

55 

56 Raises 

57 ------ 

58 Exception 

59 Some problem reading the file. 

60 """ 

61 pass 

62 

63 @abstractmethod 

64 def _writeFile(self, inMemoryDataset): 

65 """Write the in memory dataset to file on disk. 

66 

67 Parameters 

68 ---------- 

69 inMemoryDataset : `object` 

70 Object to serialize. 

71 

72 Raises 

73 ------ 

74 Exception 

75 The file could not be written. 

76 """ 

77 pass 

78 

79 def _assembleDataset(self, data, component=None): 

80 """Assembles and coerces the dataset, or one of its components, 

81 into an appropriate python type and returns it. 

82 

83 Parameters 

84 ---------- 

85 data : `dict` or `object` 

86 Composite or a dict that, or which component, needs to be 

87 coerced to the python type specified in "fileDescriptor" 

88 component : `str`, optional 

89 Component to read from the file. Only used if the `StorageClass` 

90 for reading differed from the `StorageClass` used to write the 

91 file. 

92 

93 Returns 

94 ------- 

95 inMemoryDataset : `object` 

96 The requested data as a Python object. The type of object 

97 is controlled by the specific formatter. 

98 """ 

99 fileDescriptor = self.fileDescriptor 

100 

101 # if read and write storage classes differ, more work is required 

102 readStorageClass = fileDescriptor.readStorageClass 

103 if readStorageClass != fileDescriptor.storageClass: 

104 if component is None: 

105 raise ValueError("Storage class inconsistency ({} vs {}) but no" 

106 " component requested".format(readStorageClass.name, 

107 fileDescriptor.storageClass.name)) 

108 

109 # Concrete composite written as a single file (we hope) 

110 try: 

111 data = fileDescriptor.storageClass.assembler().getComponent(data, component) 

112 except AttributeError: 

113 # Defer the complaint 

114 data = None 

115 

116 # Coerce to the requested type (not necessarily the type that was 

117 # written) 

118 data = self._coerceType(data, fileDescriptor.readStorageClass, 

119 pytype=fileDescriptor.readStorageClass.pytype) 

120 

121 return data 

122 

123 def _coerceType(self, inMemoryDataset, storageClass, pytype=None): 

124 """Coerce the supplied inMemoryDataset to type `pytype`. 

125 

126 Usually a no-op. 

127 

128 Parameters 

129 ---------- 

130 inMemoryDataset : `object` 

131 Object to coerce to expected type. 

132 storageClass : `StorageClass` 

133 StorageClass associated with ``inMemoryDataset``. 

134 pytype : `class`, optional 

135 Override type to use for conversion. 

136 

137 Returns 

138 ------- 

139 inMemoryDataset : `object` 

140 Object of expected type `pytype`. 

141 """ 

142 return inMemoryDataset 

143 

144 def read(self, component=None): 

145 """Read data from a file. 

146 

147 Parameters 

148 ---------- 

149 fileDescriptor : `FileDescriptor` 

150 Identifies the file to read, type to read it into and parameters 

151 to be used for reading. 

152 component : `str`, optional 

153 Component to read from the file. Only used if the `StorageClass` 

154 for reading differed from the `StorageClass` used to write the 

155 file. 

156 

157 Returns 

158 ------- 

159 inMemoryDataset : `object` 

160 The requested data as a Python object. The type of object 

161 is controlled by the specific formatter. 

162 

163 Raises 

164 ------ 

165 ValueError 

166 Component requested but this file does not seem to be a concrete 

167 composite. 

168 NotImplementedError 

169 Formatter does not implement a method to read from files. 

170 """ 

171 

172 # Read the file naively 

173 path = self.fileDescriptor.location.path 

174 data = self._readFile(path, self.fileDescriptor.storageClass.pytype) 

175 

176 # Assemble the requested dataset and potentially return only its 

177 # component coercing it to its appropriate pytype 

178 data = self._assembleDataset(data, component) 

179 

180 if data is None: 

181 raise ValueError(f"Unable to read data with URI {self.fileDescriptor.location.uri}") 

182 

183 return data 

184 

185 def fromBytes(self, serializedDataset, component=None): 

186 """Reads serialized data into a Dataset or its component. 

187 

188 Parameters 

189 ---------- 

190 serializedDataset : `bytes` 

191 Bytes object to unserialize. 

192 fileDescriptor : `FileDescriptor` 

193 Identifies read type and parameters to be used for reading. 

194 component : `str`, optional 

195 Component to read from the Dataset. Only used if the `StorageClass` 

196 for reading differed from the `StorageClass` used to write the 

197 file. 

198 

199 Returns 

200 ------- 

201 inMemoryDataset : `object` 

202 The requested data as a Python object. The type of object 

203 is controlled by the specific formatter. 

204 

205 Raises 

206 ------ 

207 NotImplementedError 

208 Formatter does not support reading from bytes. 

209 """ 

210 if not hasattr(self, '_fromBytes'): 

211 raise NotImplementedError("Type does not support reading from bytes.") 

212 

213 data = self._fromBytes(serializedDataset, 

214 self.fileDescriptor.storageClass.pytype) 

215 

216 # Assemble the requested dataset and potentially return only its 

217 # component coercing it to its appropriate ptype 

218 data = self._assembleDataset(data, component) 

219 

220 if data is None: 

221 raise ValueError(f"Unable to read data with URI {self.fileDescriptor.location.uri}") 

222 

223 return data 

224 

225 def write(self, inMemoryDataset): 

226 """Write a Python object to a file. 

227 

228 Parameters 

229 ---------- 

230 inMemoryDataset : `object` 

231 The Python object to store. 

232 

233 Returns 

234 ------- 

235 path : `str` 

236 The path where the primary file is stored within the datastore. 

237 """ 

238 fileDescriptor = self.fileDescriptor 

239 # Update the location with the formatter-preferred file extension 

240 fileDescriptor.location.updateExtension(self.extension) 

241 

242 self._writeFile(inMemoryDataset) 

243 

244 return fileDescriptor.location.pathInStore 

245 

246 def toBytes(self, inMemoryDataset): 

247 """Serialize the Dataset to bytes based on formatter. 

248 

249 Parameters 

250 ---------- 

251 inMemoryDataset : `object` 

252 Object to serialize. 

253 fileDescriptor : `FileDescriptor` 

254 Identifies read type and parameters to be used for reading. 

255 

256 Returns 

257 ------- 

258 serializedDataset : `bytes` 

259 Bytes representing the serialized dataset. 

260 

261 Raises 

262 ------ 

263 NotImplementedError 

264 Formatter does not support reading from bytes. 

265 """ 

266 if not hasattr(self, '_toBytes'): 

267 raise NotImplementedError("Type does not support reading from bytes.") 

268 

269 return self._toBytes(inMemoryDataset)