Coverage for python/lsst/daf/butler/formatters/file.py: 28%
68 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-08-12 09:20 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-08-12 09:20 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Support for reading and writing files to a POSIX file system."""
24from __future__ import annotations
26__all__ = ("FileFormatter",)
28import dataclasses
29from abc import abstractmethod
30from typing import TYPE_CHECKING, Any
32from lsst.daf.butler import Formatter
34if TYPE_CHECKING:
35 from lsst.daf.butler import StorageClass
38class FileFormatter(Formatter):
39 """Interface for reading and writing files on a POSIX file system."""
41 extension: str | None = None
42 """Default file extension to use for writing files. None means that no
43 modifications will be made to the supplied file extension. (`str`)"""
45 @abstractmethod
46 def _readFile(self, path: str, pytype: type[Any] | None = None) -> Any:
47 """Read a file from the path in the correct format.
49 Parameters
50 ----------
51 path : `str`
52 Path to use to open the file.
53 pytype : `class`, optional
54 Class to use to read the file.
56 Returns
57 -------
58 data : `object`
59 Data read from file. Returns `None` if the file can not be
60 found at the given path.
62 Raises
63 ------
64 Exception
65 Some problem reading the file.
66 """
67 pass
69 @abstractmethod
70 def _writeFile(self, inMemoryDataset: Any) -> None:
71 """Write the in memory dataset to file on disk.
73 Parameters
74 ----------
75 inMemoryDataset : `object`
76 Object to serialize.
78 Raises
79 ------
80 Exception
81 The file could not be written.
82 """
83 pass
85 def _assembleDataset(self, data: Any, component: str | None = None) -> Any:
86 """Assembles and coerces the dataset, or one of its components,
87 into an appropriate python type and returns it.
89 Parameters
90 ----------
91 data : `dict` or `object`
92 Composite or a dict that, or which component, needs to be
93 coerced to the python type specified in "fileDescriptor"
94 component : `str`, optional
95 Component to read from the file. Only used if the `StorageClass`
96 for reading differed from the `StorageClass` used to write the
97 file.
99 Returns
100 -------
101 inMemoryDataset : `object`
102 The requested data as a Python object. The type of object
103 is controlled by the specific formatter.
104 """
105 fileDescriptor = self.fileDescriptor
107 # Get the read and write storage classes.
108 readStorageClass = fileDescriptor.readStorageClass
109 writeStorageClass = fileDescriptor.storageClass
111 if component is not None:
112 # Requesting a component implies that we need to first ensure
113 # that the composite is the correct python type. Lie to the
114 # coercion routine since the read StorageClass is not relevant
115 # if we want the original.
116 data = self._coerceType(data, writeStorageClass, writeStorageClass)
118 # Concrete composite written as a single file (we hope)
119 # so try to get the component.
120 try:
121 data = fileDescriptor.storageClass.delegate().getComponent(data, component)
122 except AttributeError:
123 # Defer the complaint
124 data = None
126 # Update the write storage class to match that of the component.
127 # It should be safe to use the component storage class directly
128 # since that should match what was returned from getComponent
129 # (else we could create a temporary storage class guaranteed to
130 # match the python type we have).
131 writeStorageClass = writeStorageClass.allComponents()[component]
133 # Coerce to the requested type.
134 data = self._coerceType(data, writeStorageClass, readStorageClass)
136 return data
138 def _coerceBuiltinType(self, inMemoryDataset: Any, writeStorageClass: StorageClass) -> Any:
139 """Coerce the supplied inMemoryDataset to the written python type if it
140 is currently a built-in type.
142 Parameters
143 ----------
144 inMemoryDataset : `object`
145 Object to coerce to expected type.
146 writeStorageClass : `StorageClass`
147 Storage class used to serialize this data.
149 Returns
150 -------
151 inMemoryDataset : `object`
152 Object of expected type ``writeStorageClass.pytype``.
154 Notes
155 -----
156 This method only modifies the supplied object if the object is:
158 * Not already the required type.
159 * Not `None`.
160 * Looks like a built-in type.
162 It is intended to be used as a helper for file formats that do not
163 store the original Python type information in serialized form and
164 instead return built-in types such as `dict` and `list` that need
165 to be converted to the required form. This happens before
166 `StorageClass` converters trigger so that constructors can be
167 called that can build the original type first before checking the
168 requested Python type. This is important for Pydantic models where
169 the internal structure of the model may not match the `dict` form
170 in a scenario where the user has requested a `dict`.
171 """
172 if (
173 inMemoryDataset is not None
174 and not isinstance(inMemoryDataset, writeStorageClass.pytype)
175 and type(inMemoryDataset).__module__ == "builtins"
176 ):
177 # Try different ways of converting to the required type.
178 # Pydantic v1 uses parse_obj and some non-pydantic classes
179 # use that convention. Pydantic v2 uses model_validate.
180 for method_name in ("model_validate", "parse_obj"):
181 if method := getattr(writeStorageClass.pytype, method_name, None):
182 return method(inMemoryDataset)
183 if isinstance(inMemoryDataset, dict):
184 if dataclasses.is_dataclass(writeStorageClass.pytype):
185 # Dataclasses accept key/value parameters.
186 inMemoryDataset = writeStorageClass.pytype(**inMemoryDataset)
187 elif writeStorageClass.isComposite():
188 # Assume that this type can be constructed
189 # using the registered assembler from a dict.
190 inMemoryDataset = writeStorageClass.delegate().assemble(
191 inMemoryDataset, pytype=writeStorageClass.pytype
192 )
193 else:
194 # Unpack the dict and hope that works.
195 inMemoryDataset = writeStorageClass.pytype(**inMemoryDataset)
196 else:
197 # Hope that we can pass the arguments in directly.
198 inMemoryDataset = writeStorageClass.pytype(inMemoryDataset)
200 return inMemoryDataset
202 def _coerceType(
203 self, inMemoryDataset: Any, writeStorageClass: StorageClass, readStorageClass: StorageClass
204 ) -> Any:
205 """Coerce the supplied inMemoryDataset to the correct python type.
207 Parameters
208 ----------
209 inMemoryDataset : `object`
210 Object to coerce to expected type.
211 writeStorageClass : `StorageClass`
212 Storage class used to serialize this data.
213 readStorageClass : `StorageClass`
214 Storage class requested as the outcome.
216 Returns
217 -------
218 inMemoryDataset : `object`
219 Object of expected type ``readStorageClass.pytype``.
220 """
221 inMemoryDataset = self._coerceBuiltinType(inMemoryDataset, writeStorageClass)
222 return readStorageClass.coerce_type(inMemoryDataset)
224 def read(self, component: str | None = None) -> Any:
225 """Read data from a file.
227 Parameters
228 ----------
229 fileDescriptor : `FileDescriptor`
230 Identifies the file to read, type to read it into and parameters
231 to be used for reading.
232 component : `str`, optional
233 Component to read from the file. Only used if the `StorageClass`
234 for reading differed from the `StorageClass` used to write the
235 file.
237 Returns
238 -------
239 inMemoryDataset : `object`
240 The requested data as a Python object. The type of object
241 is controlled by the specific formatter.
243 Raises
244 ------
245 ValueError
246 Component requested but this file does not seem to be a concrete
247 composite.
248 NotImplementedError
249 Formatter does not implement a method to read from files.
250 """
251 # Read the file naively
252 path = self.fileDescriptor.location.path
253 data = self._readFile(path, self.fileDescriptor.storageClass.pytype)
255 # Assemble the requested dataset and potentially return only its
256 # component coercing it to its appropriate pytype
257 data = self._assembleDataset(data, component)
259 # Special case components by allowing a formatter to return None
260 # to indicate that the component was understood but is missing
261 if data is None and component is None:
262 raise ValueError(f"Unable to read data with URI {self.fileDescriptor.location.uri}")
264 return data
266 def fromBytes(self, serializedDataset: bytes, component: str | None = None) -> Any:
267 """Read serialized data into a Dataset or its component.
269 Parameters
270 ----------
271 serializedDataset : `bytes`
272 Bytes object to unserialize.
273 component : `str`, optional
274 Component to read from the Dataset. Only used if the `StorageClass`
275 for reading differed from the `StorageClass` used to write the
276 file.
278 Returns
279 -------
280 inMemoryDataset : `object`
281 The requested data as a Python object. The type of object
282 is controlled by the specific formatter.
284 Raises
285 ------
286 NotImplementedError
287 Formatter does not support reading from bytes.
288 """
289 if not hasattr(self, "_fromBytes"):
290 raise NotImplementedError("Type does not support reading from bytes.")
292 data = self._fromBytes(serializedDataset, self.fileDescriptor.storageClass.pytype)
294 # Assemble the requested dataset and potentially return only its
295 # component coercing it to its appropriate pytype
296 data = self._assembleDataset(data, component)
298 # Special case components by allowing a formatter to return None
299 # to indicate that the component was understood but is missing
300 if data is None and component is None:
301 nbytes = len(serializedDataset)
302 s = "s" if nbytes != 1 else ""
303 raise ValueError(
304 f"Unable to unpersist {nbytes} byte{s} from URI {self.fileDescriptor.location.uri}"
305 )
307 return data
309 def write(self, inMemoryDataset: Any) -> None:
310 """Write a Python object to a file.
312 Parameters
313 ----------
314 inMemoryDataset : `object`
315 The Python object to store.
317 Returns
318 -------
319 path : `str`
320 The path where the primary file is stored within the datastore.
321 """
322 fileDescriptor = self.fileDescriptor
323 # Update the location with the formatter-preferred file extension
324 fileDescriptor.location.updateExtension(self.extension)
326 self._writeFile(inMemoryDataset)
328 def toBytes(self, inMemoryDataset: Any) -> bytes:
329 """Serialize the Dataset to bytes based on formatter.
331 Parameters
332 ----------
333 inMemoryDataset : `object`
334 Object to serialize.
336 Returns
337 -------
338 serializedDataset : `bytes`
339 Bytes representing the serialized dataset.
341 Raises
342 ------
343 NotImplementedError
344 Formatter does not support reading from bytes.
345 """
346 if not hasattr(self, "_toBytes"):
347 raise NotImplementedError("Type does not support reading from bytes.")
349 return self._toBytes(inMemoryDataset)