Coverage for python/lsst/daf/butler/formatters/file.py: 29%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24"""Support for reading and writing files to a POSIX file system."""
26__all__ = ("FileFormatter",)
28from abc import abstractmethod
29from typing import TYPE_CHECKING, Any, Optional, Type
31from lsst.daf.butler import Formatter
33if TYPE_CHECKING: 33 ↛ 34line 33 didn't jump to line 34, because the condition on line 33 was never true
34 from lsst.daf.butler import StorageClass
37class FileFormatter(Formatter):
38 """Interface for reading and writing files on a POSIX file system."""
40 extension: Optional[str] = None
41 """Default file extension to use for writing files. None means that no
42 modifications will be made to the supplied file extension. (`str`)"""
44 @abstractmethod
45 def _readFile(self, path: str, pytype: Optional[Type[Any]] = None) -> Any:
46 """Read a file from the path in the correct format.
48 Parameters
49 ----------
50 path : `str`
51 Path to use to open the file.
52 pytype : `class`, optional
53 Class to use to read the file.
55 Returns
56 -------
57 data : `object`
58 Data read from file. Returns `None` if the file can not be
59 found at the given path.
61 Raises
62 ------
63 Exception
64 Some problem reading the file.
65 """
66 pass
68 @abstractmethod
69 def _writeFile(self, inMemoryDataset: Any) -> None:
70 """Write the in memory dataset to file on disk.
72 Parameters
73 ----------
74 inMemoryDataset : `object`
75 Object to serialize.
77 Raises
78 ------
79 Exception
80 The file could not be written.
81 """
82 pass
84 def _assembleDataset(self, data: Any, component: Optional[str] = None) -> Any:
85 """Assembles and coerces the dataset, or one of its components,
86 into an appropriate python type and returns it.
88 Parameters
89 ----------
90 data : `dict` or `object`
91 Composite or a dict that, or which component, needs to be
92 coerced to the python type specified in "fileDescriptor"
93 component : `str`, optional
94 Component to read from the file. Only used if the `StorageClass`
95 for reading differed from the `StorageClass` used to write the
96 file.
98 Returns
99 -------
100 inMemoryDataset : `object`
101 The requested data as a Python object. The type of object
102 is controlled by the specific formatter.
103 """
104 fileDescriptor = self.fileDescriptor
106 # if read and write storage classes differ, more work is required
107 readStorageClass = fileDescriptor.readStorageClass
108 if readStorageClass != fileDescriptor.storageClass:
109 if component is None:
110 # This likely means that type conversion is required but
111 # it will be an error if no valid converter is available
112 # for this pytype.
113 if not readStorageClass.can_convert(fileDescriptor.storageClass):
114 raise ValueError(
115 f"Storage class inconsistency ({readStorageClass.name,} vs"
116 f" {fileDescriptor.storageClass.name}) but no"
117 " component requested or converter registered"
118 )
119 else:
120 # Concrete composite written as a single file (we hope)
121 try:
122 data = fileDescriptor.storageClass.delegate().getComponent(data, component)
123 except AttributeError:
124 # Defer the complaint
125 data = None
127 # Coerce to the requested type (not necessarily the type that was
128 # written)
129 data = self._coerceType(data, fileDescriptor.storageClass, readStorageClass)
131 return data
133 def _coerceType(
134 self, inMemoryDataset: Any, writeStorageClass: StorageClass, readStorageClass: StorageClass
135 ) -> Any:
136 """Coerce the supplied inMemoryDataset to the correct python type.
138 Parameters
139 ----------
140 inMemoryDataset : `object`
141 Object to coerce to expected type.
142 writeStorageClass : `StorageClass`
143 Storage class used to serialize this data.
144 readStorageClass : `StorageClass`
145 Storage class requested as the outcome.
147 Returns
148 -------
149 inMemoryDataset : `object`
150 Object of expected type ``readStorageClass.pytype``.
151 """
152 return readStorageClass.coerce_type(inMemoryDataset)
154 def read(self, component: Optional[str] = None) -> Any:
155 """Read data from a file.
157 Parameters
158 ----------
159 fileDescriptor : `FileDescriptor`
160 Identifies the file to read, type to read it into and parameters
161 to be used for reading.
162 component : `str`, optional
163 Component to read from the file. Only used if the `StorageClass`
164 for reading differed from the `StorageClass` used to write the
165 file.
167 Returns
168 -------
169 inMemoryDataset : `object`
170 The requested data as a Python object. The type of object
171 is controlled by the specific formatter.
173 Raises
174 ------
175 ValueError
176 Component requested but this file does not seem to be a concrete
177 composite.
178 NotImplementedError
179 Formatter does not implement a method to read from files.
180 """
182 # Read the file naively
183 path = self.fileDescriptor.location.path
184 data = self._readFile(path, self.fileDescriptor.storageClass.pytype)
186 # Assemble the requested dataset and potentially return only its
187 # component coercing it to its appropriate pytype
188 data = self._assembleDataset(data, component)
190 # Special case components by allowing a formatter to return None
191 # to indicate that the component was understood but is missing
192 if data is None and component is None:
193 raise ValueError(f"Unable to read data with URI {self.fileDescriptor.location.uri}")
195 return data
197 def fromBytes(self, serializedDataset: bytes, component: Optional[str] = None) -> Any:
198 """Reads serialized data into a Dataset or its component.
200 Parameters
201 ----------
202 serializedDataset : `bytes`
203 Bytes object to unserialize.
204 component : `str`, optional
205 Component to read from the Dataset. Only used if the `StorageClass`
206 for reading differed from the `StorageClass` used to write the
207 file.
209 Returns
210 -------
211 inMemoryDataset : `object`
212 The requested data as a Python object. The type of object
213 is controlled by the specific formatter.
215 Raises
216 ------
217 NotImplementedError
218 Formatter does not support reading from bytes.
219 """
220 if not hasattr(self, "_fromBytes"):
221 raise NotImplementedError("Type does not support reading from bytes.")
223 # mypy can not understand that the previous line protects this call
224 data = self._fromBytes(serializedDataset, self.fileDescriptor.storageClass.pytype) # type: ignore
226 # Assemble the requested dataset and potentially return only its
227 # component coercing it to its appropriate pytype
228 data = self._assembleDataset(data, component)
230 # Special case components by allowing a formatter to return None
231 # to indicate that the component was understood but is missing
232 if data is None and component is None:
233 nbytes = len(serializedDataset)
234 s = "s" if nbytes != 1 else ""
235 raise ValueError(
236 f"Unable to unpersist {nbytes} byte{s} from URI {self.fileDescriptor.location.uri}"
237 )
239 return data
241 def write(self, inMemoryDataset: Any) -> None:
242 """Write a Python object to a file.
244 Parameters
245 ----------
246 inMemoryDataset : `object`
247 The Python object to store.
249 Returns
250 -------
251 path : `str`
252 The path where the primary file is stored within the datastore.
253 """
254 fileDescriptor = self.fileDescriptor
255 # Update the location with the formatter-preferred file extension
256 fileDescriptor.location.updateExtension(self.extension)
258 self._writeFile(inMemoryDataset)
260 def toBytes(self, inMemoryDataset: Any) -> bytes:
261 """Serialize the Dataset to bytes based on formatter.
263 Parameters
264 ----------
265 inMemoryDataset : `object`
266 Object to serialize.
268 Returns
269 -------
270 serializedDataset : `bytes`
271 Bytes representing the serialized dataset.
273 Raises
274 ------
275 NotImplementedError
276 Formatter does not support reading from bytes.
277 """
278 if not hasattr(self, "_toBytes"):
279 raise NotImplementedError("Type does not support reading from bytes.")
281 # mypy can not understand that the previous line protects this call
282 return self._toBytes(inMemoryDataset) # type: ignore