Coverage for python/lsst/daf/butler/formatters/file.py: 31%
57 statements
« prev ^ index » next coverage.py v6.4.4, created at 2022-08-19 12:04 -0700
« prev ^ index » next coverage.py v6.4.4, created at 2022-08-19 12:04 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24"""Support for reading and writing files to a POSIX file system."""
26__all__ = ("FileFormatter",)
28from abc import abstractmethod
29from typing import TYPE_CHECKING, Any, Optional, Type
31from lsst.daf.butler import Formatter
33if TYPE_CHECKING: 33 ↛ 34line 33 didn't jump to line 34, because the condition on line 33 was never true
34 from lsst.daf.butler import StorageClass
37class FileFormatter(Formatter):
38 """Interface for reading and writing files on a POSIX file system."""
40 extension: Optional[str] = None
41 """Default file extension to use for writing files. None means that no
42 modifications will be made to the supplied file extension. (`str`)"""
44 @abstractmethod
45 def _readFile(self, path: str, pytype: Optional[Type[Any]] = None) -> Any:
46 """Read a file from the path in the correct format.
48 Parameters
49 ----------
50 path : `str`
51 Path to use to open the file.
52 pytype : `class`, optional
53 Class to use to read the file.
55 Returns
56 -------
57 data : `object`
58 Data read from file. Returns `None` if the file can not be
59 found at the given path.
61 Raises
62 ------
63 Exception
64 Some problem reading the file.
65 """
66 pass
68 @abstractmethod
69 def _writeFile(self, inMemoryDataset: Any) -> None:
70 """Write the in memory dataset to file on disk.
72 Parameters
73 ----------
74 inMemoryDataset : `object`
75 Object to serialize.
77 Raises
78 ------
79 Exception
80 The file could not be written.
81 """
82 pass
84 def _assembleDataset(self, data: Any, component: Optional[str] = None) -> Any:
85 """Assembles and coerces the dataset, or one of its components,
86 into an appropriate python type and returns it.
88 Parameters
89 ----------
90 data : `dict` or `object`
91 Composite or a dict that, or which component, needs to be
92 coerced to the python type specified in "fileDescriptor"
93 component : `str`, optional
94 Component to read from the file. Only used if the `StorageClass`
95 for reading differed from the `StorageClass` used to write the
96 file.
98 Returns
99 -------
100 inMemoryDataset : `object`
101 The requested data as a Python object. The type of object
102 is controlled by the specific formatter.
103 """
104 fileDescriptor = self.fileDescriptor
106 # if read and write storage classes differ, more work is required
107 readStorageClass = fileDescriptor.readStorageClass
108 if readStorageClass != fileDescriptor.storageClass:
109 if component is None:
110 # This likely means that type conversion is required but
111 # it will be an error if no valid converter is available
112 # for this pytype.
113 if not readStorageClass.can_convert(fileDescriptor.storageClass):
114 raise ValueError(
115 f"Storage class inconsistency ({readStorageClass.name} vs"
116 f" {fileDescriptor.storageClass.name}) but no"
117 " component requested or converter registered for"
118 f" python type {type(data)}"
119 )
120 else:
121 # Concrete composite written as a single file (we hope)
122 try:
123 data = fileDescriptor.storageClass.delegate().getComponent(data, component)
124 except AttributeError:
125 # Defer the complaint
126 data = None
128 # Coerce to the requested type (not necessarily the type that was
129 # written)
130 data = self._coerceType(data, fileDescriptor.storageClass, readStorageClass)
132 return data
134 def _coerceType(
135 self, inMemoryDataset: Any, writeStorageClass: StorageClass, readStorageClass: StorageClass
136 ) -> Any:
137 """Coerce the supplied inMemoryDataset to the correct python type.
139 Parameters
140 ----------
141 inMemoryDataset : `object`
142 Object to coerce to expected type.
143 writeStorageClass : `StorageClass`
144 Storage class used to serialize this data.
145 readStorageClass : `StorageClass`
146 Storage class requested as the outcome.
148 Returns
149 -------
150 inMemoryDataset : `object`
151 Object of expected type ``readStorageClass.pytype``.
152 """
153 return readStorageClass.coerce_type(inMemoryDataset)
155 def read(self, component: Optional[str] = None) -> Any:
156 """Read data from a file.
158 Parameters
159 ----------
160 fileDescriptor : `FileDescriptor`
161 Identifies the file to read, type to read it into and parameters
162 to be used for reading.
163 component : `str`, optional
164 Component to read from the file. Only used if the `StorageClass`
165 for reading differed from the `StorageClass` used to write the
166 file.
168 Returns
169 -------
170 inMemoryDataset : `object`
171 The requested data as a Python object. The type of object
172 is controlled by the specific formatter.
174 Raises
175 ------
176 ValueError
177 Component requested but this file does not seem to be a concrete
178 composite.
179 NotImplementedError
180 Formatter does not implement a method to read from files.
181 """
183 # Read the file naively
184 path = self.fileDescriptor.location.path
185 data = self._readFile(path, self.fileDescriptor.storageClass.pytype)
187 # Assemble the requested dataset and potentially return only its
188 # component coercing it to its appropriate pytype
189 data = self._assembleDataset(data, component)
191 # Special case components by allowing a formatter to return None
192 # to indicate that the component was understood but is missing
193 if data is None and component is None:
194 raise ValueError(f"Unable to read data with URI {self.fileDescriptor.location.uri}")
196 return data
198 def fromBytes(self, serializedDataset: bytes, component: Optional[str] = None) -> Any:
199 """Reads serialized data into a Dataset or its component.
201 Parameters
202 ----------
203 serializedDataset : `bytes`
204 Bytes object to unserialize.
205 component : `str`, optional
206 Component to read from the Dataset. Only used if the `StorageClass`
207 for reading differed from the `StorageClass` used to write the
208 file.
210 Returns
211 -------
212 inMemoryDataset : `object`
213 The requested data as a Python object. The type of object
214 is controlled by the specific formatter.
216 Raises
217 ------
218 NotImplementedError
219 Formatter does not support reading from bytes.
220 """
221 if not hasattr(self, "_fromBytes"):
222 raise NotImplementedError("Type does not support reading from bytes.")
224 # mypy can not understand that the previous line protects this call
225 data = self._fromBytes(serializedDataset, self.fileDescriptor.storageClass.pytype) # type: ignore
227 # Assemble the requested dataset and potentially return only its
228 # component coercing it to its appropriate pytype
229 data = self._assembleDataset(data, component)
231 # Special case components by allowing a formatter to return None
232 # to indicate that the component was understood but is missing
233 if data is None and component is None:
234 nbytes = len(serializedDataset)
235 s = "s" if nbytes != 1 else ""
236 raise ValueError(
237 f"Unable to unpersist {nbytes} byte{s} from URI {self.fileDescriptor.location.uri}"
238 )
240 return data
242 def write(self, inMemoryDataset: Any) -> None:
243 """Write a Python object to a file.
245 Parameters
246 ----------
247 inMemoryDataset : `object`
248 The Python object to store.
250 Returns
251 -------
252 path : `str`
253 The path where the primary file is stored within the datastore.
254 """
255 fileDescriptor = self.fileDescriptor
256 # Update the location with the formatter-preferred file extension
257 fileDescriptor.location.updateExtension(self.extension)
259 self._writeFile(inMemoryDataset)
261 def toBytes(self, inMemoryDataset: Any) -> bytes:
262 """Serialize the Dataset to bytes based on formatter.
264 Parameters
265 ----------
266 inMemoryDataset : `object`
267 Object to serialize.
269 Returns
270 -------
271 serializedDataset : `bytes`
272 Bytes representing the serialized dataset.
274 Raises
275 ------
276 NotImplementedError
277 Formatter does not support reading from bytes.
278 """
279 if not hasattr(self, "_toBytes"):
280 raise NotImplementedError("Type does not support reading from bytes.")
282 # mypy can not understand that the previous line protects this call
283 return self._toBytes(inMemoryDataset) # type: ignore