Coverage for python/lsst/daf/butler/formatters/file.py: 25%
67 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 02:10 -0700
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 02:10 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24"""Support for reading and writing files to a POSIX file system."""
26__all__ = ("FileFormatter",)
28import dataclasses
29from abc import abstractmethod
30from typing import TYPE_CHECKING, Any, Optional, Type
32from lsst.daf.butler import Formatter
34if TYPE_CHECKING:
35 from lsst.daf.butler import StorageClass
38class FileFormatter(Formatter):
39 """Interface for reading and writing files on a POSIX file system."""
41 extension: Optional[str] = None
42 """Default file extension to use for writing files. None means that no
43 modifications will be made to the supplied file extension. (`str`)"""
45 @abstractmethod
46 def _readFile(self, path: str, pytype: Optional[Type[Any]] = None) -> Any:
47 """Read a file from the path in the correct format.
49 Parameters
50 ----------
51 path : `str`
52 Path to use to open the file.
53 pytype : `class`, optional
54 Class to use to read the file.
56 Returns
57 -------
58 data : `object`
59 Data read from file. Returns `None` if the file can not be
60 found at the given path.
62 Raises
63 ------
64 Exception
65 Some problem reading the file.
66 """
67 pass
69 @abstractmethod
70 def _writeFile(self, inMemoryDataset: Any) -> None:
71 """Write the in memory dataset to file on disk.
73 Parameters
74 ----------
75 inMemoryDataset : `object`
76 Object to serialize.
78 Raises
79 ------
80 Exception
81 The file could not be written.
82 """
83 pass
85 def _assembleDataset(self, data: Any, component: Optional[str] = None) -> Any:
86 """Assembles and coerces the dataset, or one of its components,
87 into an appropriate python type and returns it.
89 Parameters
90 ----------
91 data : `dict` or `object`
92 Composite or a dict that, or which component, needs to be
93 coerced to the python type specified in "fileDescriptor"
94 component : `str`, optional
95 Component to read from the file. Only used if the `StorageClass`
96 for reading differed from the `StorageClass` used to write the
97 file.
99 Returns
100 -------
101 inMemoryDataset : `object`
102 The requested data as a Python object. The type of object
103 is controlled by the specific formatter.
104 """
105 fileDescriptor = self.fileDescriptor
107 # Get the read and write storage classes.
108 readStorageClass = fileDescriptor.readStorageClass
109 writeStorageClass = fileDescriptor.storageClass
111 if component is not None:
112 # Requesting a component implies that we need to first ensure
113 # that the composite is the correct python type. Lie to the
114 # coercion routine since the read StorageClass is not relevant
115 # if we want the original.
116 data = self._coerceType(data, writeStorageClass, writeStorageClass)
118 # Concrete composite written as a single file (we hope)
119 # so try to get the component.
120 try:
121 data = fileDescriptor.storageClass.delegate().getComponent(data, component)
122 except AttributeError:
123 # Defer the complaint
124 data = None
126 # Update the write storage class to match that of the component.
127 # It should be safe to use the component storage class directly
128 # since that should match what was returned from getComponent
129 # (else we could create a temporary storage class guaranteed to
130 # match the python type we have).
131 writeStorageClass = writeStorageClass.allComponents()[component]
133 # Coerce to the requested type.
134 data = self._coerceType(data, writeStorageClass, readStorageClass)
136 return data
138 def _coerceBuiltinType(self, inMemoryDataset: Any, writeStorageClass: StorageClass) -> Any:
139 """Coerce the supplied inMemoryDataset to the written python type if it
140 is currently a built-in type.
142 Parameters
143 ----------
144 inMemoryDataset : `object`
145 Object to coerce to expected type.
146 writeStorageClass : `StorageClass`
147 Storage class used to serialize this data.
149 Returns
150 -------
151 inMemoryDataset : `object`
152 Object of expected type ``writeStorageClass.pytype``.
154 Notes
155 -----
156 This method only modifies the supplied object if the object is:
158 * Not already the required type.
159 * Not `None`.
160 * Looks like a built-in type.
162 It is intended to be used as a helper for file formats that do not
163 store the original Python type information in serialized form and
164 instead return built-in types such as `dict` and `list` that need
165 to be converted to the required form. This happens before
166 `StorageClass` converters trigger so that constructors can be
167 called that can build the original type first before checking the
168 requested Python type. This is important for Pydantic models where
169 the internal structure of the model may not match the `dict` form
170 in a scenario where the user has requested a `dict`.
171 """
172 if (
173 inMemoryDataset is not None
174 and not isinstance(inMemoryDataset, writeStorageClass.pytype)
175 and type(inMemoryDataset).__module__ == "builtins"
176 ):
177 # Try different ways of converting to the required type.
178 if hasattr(writeStorageClass.pytype, "parse_obj"):
179 # This is for a Pydantic model.
180 inMemoryDataset = writeStorageClass.pytype.parse_obj(inMemoryDataset)
181 elif isinstance(inMemoryDataset, dict):
182 if dataclasses.is_dataclass(writeStorageClass.pytype):
183 # Dataclasses accept key/value parameters.
184 inMemoryDataset = writeStorageClass.pytype(**inMemoryDataset)
185 elif writeStorageClass.isComposite():
186 # Assume that this type can be constructed
187 # using the registered assembler from a dict.
188 inMemoryDataset = writeStorageClass.delegate().assemble(
189 inMemoryDataset, pytype=writeStorageClass.pytype
190 )
191 else:
192 # Unpack the dict and hope that works.
193 inMemoryDataset = writeStorageClass.pytype(**inMemoryDataset)
194 else:
195 # Hope that we can pass the arguments in directly.
196 inMemoryDataset = writeStorageClass.pytype(inMemoryDataset)
198 return inMemoryDataset
200 def _coerceType(
201 self, inMemoryDataset: Any, writeStorageClass: StorageClass, readStorageClass: StorageClass
202 ) -> Any:
203 """Coerce the supplied inMemoryDataset to the correct python type.
205 Parameters
206 ----------
207 inMemoryDataset : `object`
208 Object to coerce to expected type.
209 writeStorageClass : `StorageClass`
210 Storage class used to serialize this data.
211 readStorageClass : `StorageClass`
212 Storage class requested as the outcome.
214 Returns
215 -------
216 inMemoryDataset : `object`
217 Object of expected type ``readStorageClass.pytype``.
218 """
219 inMemoryDataset = self._coerceBuiltinType(inMemoryDataset, writeStorageClass)
220 return readStorageClass.coerce_type(inMemoryDataset)
222 def read(self, component: Optional[str] = None) -> Any:
223 """Read data from a file.
225 Parameters
226 ----------
227 fileDescriptor : `FileDescriptor`
228 Identifies the file to read, type to read it into and parameters
229 to be used for reading.
230 component : `str`, optional
231 Component to read from the file. Only used if the `StorageClass`
232 for reading differed from the `StorageClass` used to write the
233 file.
235 Returns
236 -------
237 inMemoryDataset : `object`
238 The requested data as a Python object. The type of object
239 is controlled by the specific formatter.
241 Raises
242 ------
243 ValueError
244 Component requested but this file does not seem to be a concrete
245 composite.
246 NotImplementedError
247 Formatter does not implement a method to read from files.
248 """
250 # Read the file naively
251 path = self.fileDescriptor.location.path
252 data = self._readFile(path, self.fileDescriptor.storageClass.pytype)
254 # Assemble the requested dataset and potentially return only its
255 # component coercing it to its appropriate pytype
256 data = self._assembleDataset(data, component)
258 # Special case components by allowing a formatter to return None
259 # to indicate that the component was understood but is missing
260 if data is None and component is None:
261 raise ValueError(f"Unable to read data with URI {self.fileDescriptor.location.uri}")
263 return data
265 def fromBytes(self, serializedDataset: bytes, component: Optional[str] = None) -> Any:
266 """Reads serialized data into a Dataset or its component.
268 Parameters
269 ----------
270 serializedDataset : `bytes`
271 Bytes object to unserialize.
272 component : `str`, optional
273 Component to read from the Dataset. Only used if the `StorageClass`
274 for reading differed from the `StorageClass` used to write the
275 file.
277 Returns
278 -------
279 inMemoryDataset : `object`
280 The requested data as a Python object. The type of object
281 is controlled by the specific formatter.
283 Raises
284 ------
285 NotImplementedError
286 Formatter does not support reading from bytes.
287 """
288 if not hasattr(self, "_fromBytes"):
289 raise NotImplementedError("Type does not support reading from bytes.")
291 data = self._fromBytes(serializedDataset, self.fileDescriptor.storageClass.pytype)
293 # Assemble the requested dataset and potentially return only its
294 # component coercing it to its appropriate pytype
295 data = self._assembleDataset(data, component)
297 # Special case components by allowing a formatter to return None
298 # to indicate that the component was understood but is missing
299 if data is None and component is None:
300 nbytes = len(serializedDataset)
301 s = "s" if nbytes != 1 else ""
302 raise ValueError(
303 f"Unable to unpersist {nbytes} byte{s} from URI {self.fileDescriptor.location.uri}"
304 )
306 return data
308 def write(self, inMemoryDataset: Any) -> None:
309 """Write a Python object to a file.
311 Parameters
312 ----------
313 inMemoryDataset : `object`
314 The Python object to store.
316 Returns
317 -------
318 path : `str`
319 The path where the primary file is stored within the datastore.
320 """
321 fileDescriptor = self.fileDescriptor
322 # Update the location with the formatter-preferred file extension
323 fileDescriptor.location.updateExtension(self.extension)
325 self._writeFile(inMemoryDataset)
327 def toBytes(self, inMemoryDataset: Any) -> bytes:
328 """Serialize the Dataset to bytes based on formatter.
330 Parameters
331 ----------
332 inMemoryDataset : `object`
333 Object to serialize.
335 Returns
336 -------
337 serializedDataset : `bytes`
338 Bytes representing the serialized dataset.
340 Raises
341 ------
342 NotImplementedError
343 Formatter does not support reading from bytes.
344 """
345 if not hasattr(self, "_toBytes"):
346 raise NotImplementedError("Type does not support reading from bytes.")
348 return self._toBytes(inMemoryDataset)