Coverage for python/lsst/daf/butler/formatters/file.py: 30%
58 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-10-26 15:15 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2023-10-26 15:15 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24"""Support for reading and writing files to a POSIX file system."""
26__all__ = ("FileFormatter",)
28from abc import abstractmethod
29from typing import TYPE_CHECKING, Any, Optional, Type
31from lsst.daf.butler import Formatter
32from lsst.utils.introspection import get_full_type_name
34if TYPE_CHECKING: 34 ↛ 35line 34 didn't jump to line 35, because the condition on line 34 was never true
35 from lsst.daf.butler import StorageClass
38class FileFormatter(Formatter):
39 """Interface for reading and writing files on a POSIX file system."""
41 extension: Optional[str] = None
42 """Default file extension to use for writing files. None means that no
43 modifications will be made to the supplied file extension. (`str`)"""
45 @abstractmethod
46 def _readFile(self, path: str, pytype: Optional[Type[Any]] = None) -> Any:
47 """Read a file from the path in the correct format.
49 Parameters
50 ----------
51 path : `str`
52 Path to use to open the file.
53 pytype : `class`, optional
54 Class to use to read the file.
56 Returns
57 -------
58 data : `object`
59 Data read from file. Returns `None` if the file can not be
60 found at the given path.
62 Raises
63 ------
64 Exception
65 Some problem reading the file.
66 """
67 pass
69 @abstractmethod
70 def _writeFile(self, inMemoryDataset: Any) -> None:
71 """Write the in memory dataset to file on disk.
73 Parameters
74 ----------
75 inMemoryDataset : `object`
76 Object to serialize.
78 Raises
79 ------
80 Exception
81 The file could not be written.
82 """
83 pass
85 def _assembleDataset(self, data: Any, component: Optional[str] = None) -> Any:
86 """Assembles and coerces the dataset, or one of its components,
87 into an appropriate python type and returns it.
89 Parameters
90 ----------
91 data : `dict` or `object`
92 Composite or a dict that, or which component, needs to be
93 coerced to the python type specified in "fileDescriptor"
94 component : `str`, optional
95 Component to read from the file. Only used if the `StorageClass`
96 for reading differed from the `StorageClass` used to write the
97 file.
99 Returns
100 -------
101 inMemoryDataset : `object`
102 The requested data as a Python object. The type of object
103 is controlled by the specific formatter.
104 """
105 fileDescriptor = self.fileDescriptor
107 # if read and write storage classes differ, more work is required
108 readStorageClass = fileDescriptor.readStorageClass
109 if readStorageClass != fileDescriptor.storageClass:
110 if component is None:
111 # This likely means that type conversion is required but
112 # it will be an error if no valid converter is available
113 # for this pytype.
114 if not readStorageClass.can_convert(fileDescriptor.storageClass):
115 raise ValueError(
116 f"Storage class inconsistency ({readStorageClass.name} vs"
117 f" {fileDescriptor.storageClass.name}) but no"
118 " component requested or converter registered for"
119 f" converting type {get_full_type_name(fileDescriptor.storageClass.pytype)}"
120 f" to {get_full_type_name(readStorageClass.pytype)}."
121 )
122 else:
123 # Concrete composite written as a single file (we hope)
124 try:
125 data = fileDescriptor.storageClass.delegate().getComponent(data, component)
126 except AttributeError:
127 # Defer the complaint
128 data = None
130 # Coerce to the requested type (not necessarily the type that was
131 # written)
132 data = self._coerceType(data, fileDescriptor.storageClass, readStorageClass)
134 return data
136 def _coerceType(
137 self, inMemoryDataset: Any, writeStorageClass: StorageClass, readStorageClass: StorageClass
138 ) -> Any:
139 """Coerce the supplied inMemoryDataset to the correct python type.
141 Parameters
142 ----------
143 inMemoryDataset : `object`
144 Object to coerce to expected type.
145 writeStorageClass : `StorageClass`
146 Storage class used to serialize this data.
147 readStorageClass : `StorageClass`
148 Storage class requested as the outcome.
150 Returns
151 -------
152 inMemoryDataset : `object`
153 Object of expected type ``readStorageClass.pytype``.
154 """
155 return readStorageClass.coerce_type(inMemoryDataset)
157 def read(self, component: Optional[str] = None) -> Any:
158 """Read data from a file.
160 Parameters
161 ----------
162 fileDescriptor : `FileDescriptor`
163 Identifies the file to read, type to read it into and parameters
164 to be used for reading.
165 component : `str`, optional
166 Component to read from the file. Only used if the `StorageClass`
167 for reading differed from the `StorageClass` used to write the
168 file.
170 Returns
171 -------
172 inMemoryDataset : `object`
173 The requested data as a Python object. The type of object
174 is controlled by the specific formatter.
176 Raises
177 ------
178 ValueError
179 Component requested but this file does not seem to be a concrete
180 composite.
181 NotImplementedError
182 Formatter does not implement a method to read from files.
183 """
185 # Read the file naively
186 path = self.fileDescriptor.location.path
187 data = self._readFile(path, self.fileDescriptor.storageClass.pytype)
189 # Assemble the requested dataset and potentially return only its
190 # component coercing it to its appropriate pytype
191 data = self._assembleDataset(data, component)
193 # Special case components by allowing a formatter to return None
194 # to indicate that the component was understood but is missing
195 if data is None and component is None:
196 raise ValueError(f"Unable to read data with URI {self.fileDescriptor.location.uri}")
198 return data
200 def fromBytes(self, serializedDataset: bytes, component: Optional[str] = None) -> Any:
201 """Reads serialized data into a Dataset or its component.
203 Parameters
204 ----------
205 serializedDataset : `bytes`
206 Bytes object to unserialize.
207 component : `str`, optional
208 Component to read from the Dataset. Only used if the `StorageClass`
209 for reading differed from the `StorageClass` used to write the
210 file.
212 Returns
213 -------
214 inMemoryDataset : `object`
215 The requested data as a Python object. The type of object
216 is controlled by the specific formatter.
218 Raises
219 ------
220 NotImplementedError
221 Formatter does not support reading from bytes.
222 """
223 if not hasattr(self, "_fromBytes"):
224 raise NotImplementedError("Type does not support reading from bytes.")
226 data = self._fromBytes(serializedDataset, self.fileDescriptor.storageClass.pytype)
228 # Assemble the requested dataset and potentially return only its
229 # component coercing it to its appropriate pytype
230 data = self._assembleDataset(data, component)
232 # Special case components by allowing a formatter to return None
233 # to indicate that the component was understood but is missing
234 if data is None and component is None:
235 nbytes = len(serializedDataset)
236 s = "s" if nbytes != 1 else ""
237 raise ValueError(
238 f"Unable to unpersist {nbytes} byte{s} from URI {self.fileDescriptor.location.uri}"
239 )
241 return data
243 def write(self, inMemoryDataset: Any) -> None:
244 """Write a Python object to a file.
246 Parameters
247 ----------
248 inMemoryDataset : `object`
249 The Python object to store.
251 Returns
252 -------
253 path : `str`
254 The path where the primary file is stored within the datastore.
255 """
256 fileDescriptor = self.fileDescriptor
257 # Update the location with the formatter-preferred file extension
258 fileDescriptor.location.updateExtension(self.extension)
260 self._writeFile(inMemoryDataset)
262 def toBytes(self, inMemoryDataset: Any) -> bytes:
263 """Serialize the Dataset to bytes based on formatter.
265 Parameters
266 ----------
267 inMemoryDataset : `object`
268 Object to serialize.
270 Returns
271 -------
272 serializedDataset : `bytes`
273 Bytes representing the serialized dataset.
275 Raises
276 ------
277 NotImplementedError
278 Formatter does not support reading from bytes.
279 """
280 if not hasattr(self, "_toBytes"):
281 raise NotImplementedError("Type does not support reading from bytes.")
283 return self._toBytes(inMemoryDataset)