Coverage for python/lsst/daf/butler/formatters/fileFormatter.py : 25%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Support for reading and writing files to a POSIX file system."""
24__all__ = ("FileFormatter",)
26from abc import abstractmethod
28from lsst.daf.butler import Formatter
31class FileFormatter(Formatter):
32 """Interface for reading and writing files on a POSIX file system.
33 """
35 extension = None
36 """Default file extension to use for writing files. None means that no
37 modifications will be made to the supplied file extension. (`str`)"""
39 @abstractmethod
40 def _readFile(self, path, pytype=None):
41 """Read a file from the path in the correct format.
43 Parameters
44 ----------
45 path : `str`
46 Path to use to open the file.
47 pytype : `class`, optional
48 Class to use to read the file.
50 Returns
51 -------
52 data : `object`
53 Data read from file. Returns `None` if the file can not be
54 found at the given path.
56 Raises
57 ------
58 Exception
59 Some problem reading the file.
60 """
61 pass
63 @abstractmethod
64 def _writeFile(self, inMemoryDataset):
65 """Write the in memory dataset to file on disk.
67 Parameters
68 ----------
69 inMemoryDataset : `object`
70 Object to serialize.
72 Raises
73 ------
74 Exception
75 The file could not be written.
76 """
77 pass
79 def _assembleDataset(self, data, component=None):
80 """Assembles and coerces the dataset, or one of its components,
81 into an appropriate python type and returns it.
83 Parameters
84 ----------
85 data : `dict` or `object`
86 Composite or a dict that, or which component, needs to be
87 coerced to the python type specified in "fileDescriptor"
88 component : `str`, optional
89 Component to read from the file. Only used if the `StorageClass`
90 for reading differed from the `StorageClass` used to write the
91 file.
93 Returns
94 -------
95 inMemoryDataset : `object`
96 The requested data as a Python object. The type of object
97 is controlled by the specific formatter.
98 """
99 fileDescriptor = self.fileDescriptor
101 # if read and write storage classes differ, more work is required
102 readStorageClass = fileDescriptor.readStorageClass
103 if readStorageClass != fileDescriptor.storageClass:
104 if component is None:
105 raise ValueError("Storage class inconsistency ({} vs {}) but no"
106 " component requested".format(readStorageClass.name,
107 fileDescriptor.storageClass.name))
109 # Concrete composite written as a single file (we hope)
110 try:
111 data = fileDescriptor.storageClass.assembler().getComponent(data, component)
112 except AttributeError:
113 # Defer the complaint
114 data = None
116 # Coerce to the requested type (not necessarily the type that was
117 # written)
118 data = self._coerceType(data, fileDescriptor.readStorageClass,
119 pytype=fileDescriptor.readStorageClass.pytype)
121 return data
123 def _coerceType(self, inMemoryDataset, storageClass, pytype=None):
124 """Coerce the supplied inMemoryDataset to type `pytype`.
126 Usually a no-op.
128 Parameters
129 ----------
130 inMemoryDataset : `object`
131 Object to coerce to expected type.
132 storageClass : `StorageClass`
133 StorageClass associated with ``inMemoryDataset``.
134 pytype : `class`, optional
135 Override type to use for conversion.
137 Returns
138 -------
139 inMemoryDataset : `object`
140 Object of expected type `pytype`.
141 """
142 return inMemoryDataset
144 def read(self, component=None):
145 """Read data from a file.
147 Parameters
148 ----------
149 fileDescriptor : `FileDescriptor`
150 Identifies the file to read, type to read it into and parameters
151 to be used for reading.
152 component : `str`, optional
153 Component to read from the file. Only used if the `StorageClass`
154 for reading differed from the `StorageClass` used to write the
155 file.
157 Returns
158 -------
159 inMemoryDataset : `object`
160 The requested data as a Python object. The type of object
161 is controlled by the specific formatter.
163 Raises
164 ------
165 ValueError
166 Component requested but this file does not seem to be a concrete
167 composite.
168 NotImplementedError
169 Formatter does not implement a method to read from files.
170 """
172 # Read the file naively
173 path = self.fileDescriptor.location.path
174 data = self._readFile(path, self.fileDescriptor.storageClass.pytype)
176 # Assemble the requested dataset and potentially return only its
177 # component coercing it to its appropriate pytype
178 data = self._assembleDataset(data, component)
180 # Special case components by allowing a formatter to return None
181 # to indicate that the component was understood but is missing
182 if data is None and component is None:
183 raise ValueError(f"Unable to read data with URI {self.fileDescriptor.location.uri}")
185 return data
187 def fromBytes(self, serializedDataset, component=None):
188 """Reads serialized data into a Dataset or its component.
190 Parameters
191 ----------
192 serializedDataset : `bytes`
193 Bytes object to unserialize.
194 fileDescriptor : `FileDescriptor`
195 Identifies read type and parameters to be used for reading.
196 component : `str`, optional
197 Component to read from the Dataset. Only used if the `StorageClass`
198 for reading differed from the `StorageClass` used to write the
199 file.
201 Returns
202 -------
203 inMemoryDataset : `object`
204 The requested data as a Python object. The type of object
205 is controlled by the specific formatter.
207 Raises
208 ------
209 NotImplementedError
210 Formatter does not support reading from bytes.
211 """
212 if not hasattr(self, '_fromBytes'):
213 raise NotImplementedError("Type does not support reading from bytes.")
215 data = self._fromBytes(serializedDataset,
216 self.fileDescriptor.storageClass.pytype)
218 # Assemble the requested dataset and potentially return only its
219 # component coercing it to its appropriate ptype
220 data = self._assembleDataset(data, component)
222 if data is None:
223 raise ValueError(f"Unable to read data with URI {self.fileDescriptor.location.uri}")
225 return data
227 def write(self, inMemoryDataset):
228 """Write a Python object to a file.
230 Parameters
231 ----------
232 inMemoryDataset : `object`
233 The Python object to store.
235 Returns
236 -------
237 path : `str`
238 The path where the primary file is stored within the datastore.
239 """
240 fileDescriptor = self.fileDescriptor
241 # Update the location with the formatter-preferred file extension
242 fileDescriptor.location.updateExtension(self.extension)
244 self._writeFile(inMemoryDataset)
246 return fileDescriptor.location.pathInStore
248 def toBytes(self, inMemoryDataset):
249 """Serialize the Dataset to bytes based on formatter.
251 Parameters
252 ----------
253 inMemoryDataset : `object`
254 Object to serialize.
255 fileDescriptor : `FileDescriptor`
256 Identifies read type and parameters to be used for reading.
258 Returns
259 -------
260 serializedDataset : `bytes`
261 Bytes representing the serialized dataset.
263 Raises
264 ------
265 NotImplementedError
266 Formatter does not support reading from bytes.
267 """
268 if not hasattr(self, '_toBytes'):
269 raise NotImplementedError("Type does not support reading from bytes.")
271 return self._toBytes(inMemoryDataset)