Coverage for python/lsst/daf/butler/formatters/file.py: 29%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24"""Support for reading and writing files to a POSIX file system."""
26__all__ = ("FileFormatter",)
28from abc import abstractmethod
30from typing import (
31 TYPE_CHECKING,
32 Any,
33 Optional,
34 Type,
35)
37from lsst.daf.butler import Formatter
39if TYPE_CHECKING: 39 ↛ 40line 39 didn't jump to line 40, because the condition on line 39 was never true
40 from lsst.daf.butler import StorageClass
43class FileFormatter(Formatter):
44 """Interface for reading and writing files on a POSIX file system.
45 """
47 extension: Optional[str] = None
48 """Default file extension to use for writing files. None means that no
49 modifications will be made to the supplied file extension. (`str`)"""
51 @abstractmethod
52 def _readFile(self, path: str, pytype: Optional[Type[Any]] = None) -> Any:
53 """Read a file from the path in the correct format.
55 Parameters
56 ----------
57 path : `str`
58 Path to use to open the file.
59 pytype : `class`, optional
60 Class to use to read the file.
62 Returns
63 -------
64 data : `object`
65 Data read from file. Returns `None` if the file can not be
66 found at the given path.
68 Raises
69 ------
70 Exception
71 Some problem reading the file.
72 """
73 pass
75 @abstractmethod
76 def _writeFile(self, inMemoryDataset: Any) -> None:
77 """Write the in memory dataset to file on disk.
79 Parameters
80 ----------
81 inMemoryDataset : `object`
82 Object to serialize.
84 Raises
85 ------
86 Exception
87 The file could not be written.
88 """
89 pass
91 def _assembleDataset(self, data: Any, component: Optional[str] = None) -> Any:
92 """Assembles and coerces the dataset, or one of its components,
93 into an appropriate python type and returns it.
95 Parameters
96 ----------
97 data : `dict` or `object`
98 Composite or a dict that, or which component, needs to be
99 coerced to the python type specified in "fileDescriptor"
100 component : `str`, optional
101 Component to read from the file. Only used if the `StorageClass`
102 for reading differed from the `StorageClass` used to write the
103 file.
105 Returns
106 -------
107 inMemoryDataset : `object`
108 The requested data as a Python object. The type of object
109 is controlled by the specific formatter.
110 """
111 fileDescriptor = self.fileDescriptor
113 # if read and write storage classes differ, more work is required
114 readStorageClass = fileDescriptor.readStorageClass
115 if readStorageClass != fileDescriptor.storageClass:
116 if component is None:
117 # This likely means that type conversion is required but
118 # it will be an error if no valid converter is available
119 # for this pytype.
120 if not readStorageClass.can_convert(fileDescriptor.storageClass):
121 raise ValueError(f"Storage class inconsistency ({readStorageClass.name,} vs"
122 f" {fileDescriptor.storageClass.name}) but no"
123 " component requested or converter registered")
124 else:
125 # Concrete composite written as a single file (we hope)
126 try:
127 data = fileDescriptor.storageClass.delegate().getComponent(data, component)
128 except AttributeError:
129 # Defer the complaint
130 data = None
132 # Coerce to the requested type (not necessarily the type that was
133 # written)
134 data = self._coerceType(data, fileDescriptor.storageClass, readStorageClass)
136 return data
138 def _coerceType(self, inMemoryDataset: Any, writeStorageClass: StorageClass,
139 readStorageClass: StorageClass) -> Any:
140 """Coerce the supplied inMemoryDataset to the correct python type.
142 Parameters
143 ----------
144 inMemoryDataset : `object`
145 Object to coerce to expected type.
146 writeStorageClass : `StorageClass`
147 Storage class used to serialize this data.
148 readStorageClass : `StorageClass`
149 Storage class requested as the outcome.
151 Returns
152 -------
153 inMemoryDataset : `object`
154 Object of expected type ``readStorageClass.pytype``.
155 """
156 return readStorageClass.coerce_type(inMemoryDataset)
158 def read(self, component: Optional[str] = None) -> Any:
159 """Read data from a file.
161 Parameters
162 ----------
163 fileDescriptor : `FileDescriptor`
164 Identifies the file to read, type to read it into and parameters
165 to be used for reading.
166 component : `str`, optional
167 Component to read from the file. Only used if the `StorageClass`
168 for reading differed from the `StorageClass` used to write the
169 file.
171 Returns
172 -------
173 inMemoryDataset : `object`
174 The requested data as a Python object. The type of object
175 is controlled by the specific formatter.
177 Raises
178 ------
179 ValueError
180 Component requested but this file does not seem to be a concrete
181 composite.
182 NotImplementedError
183 Formatter does not implement a method to read from files.
184 """
186 # Read the file naively
187 path = self.fileDescriptor.location.path
188 data = self._readFile(path, self.fileDescriptor.storageClass.pytype)
190 # Assemble the requested dataset and potentially return only its
191 # component coercing it to its appropriate pytype
192 data = self._assembleDataset(data, component)
194 # Special case components by allowing a formatter to return None
195 # to indicate that the component was understood but is missing
196 if data is None and component is None:
197 raise ValueError(f"Unable to read data with URI {self.fileDescriptor.location.uri}")
199 return data
201 def fromBytes(self, serializedDataset: bytes, component: Optional[str] = None) -> Any:
202 """Reads serialized data into a Dataset or its component.
204 Parameters
205 ----------
206 serializedDataset : `bytes`
207 Bytes object to unserialize.
208 component : `str`, optional
209 Component to read from the Dataset. Only used if the `StorageClass`
210 for reading differed from the `StorageClass` used to write the
211 file.
213 Returns
214 -------
215 inMemoryDataset : `object`
216 The requested data as a Python object. The type of object
217 is controlled by the specific formatter.
219 Raises
220 ------
221 NotImplementedError
222 Formatter does not support reading from bytes.
223 """
224 if not hasattr(self, '_fromBytes'):
225 raise NotImplementedError("Type does not support reading from bytes.")
227 # mypy can not understand that the previous line protects this call
228 data = self._fromBytes(serializedDataset, # type: ignore
229 self.fileDescriptor.storageClass.pytype)
231 # Assemble the requested dataset and potentially return only its
232 # component coercing it to its appropriate pytype
233 data = self._assembleDataset(data, component)
235 # Special case components by allowing a formatter to return None
236 # to indicate that the component was understood but is missing
237 if data is None and component is None:
238 nbytes = len(serializedDataset)
239 s = "s" if nbytes != 1 else ""
240 raise ValueError(f"Unable to unpersist {nbytes} byte{s} from "
241 f"URI {self.fileDescriptor.location.uri}")
243 return data
245 def write(self, inMemoryDataset: Any) -> None:
246 """Write a Python object to a file.
248 Parameters
249 ----------
250 inMemoryDataset : `object`
251 The Python object to store.
253 Returns
254 -------
255 path : `str`
256 The path where the primary file is stored within the datastore.
257 """
258 fileDescriptor = self.fileDescriptor
259 # Update the location with the formatter-preferred file extension
260 fileDescriptor.location.updateExtension(self.extension)
262 self._writeFile(inMemoryDataset)
264 def toBytes(self, inMemoryDataset: Any) -> bytes:
265 """Serialize the Dataset to bytes based on formatter.
267 Parameters
268 ----------
269 inMemoryDataset : `object`
270 Object to serialize.
272 Returns
273 -------
274 serializedDataset : `bytes`
275 Bytes representing the serialized dataset.
277 Raises
278 ------
279 NotImplementedError
280 Formatter does not support reading from bytes.
281 """
282 if not hasattr(self, '_toBytes'):
283 raise NotImplementedError("Type does not support reading from bytes.")
285 # mypy can not understand that the previous line protects this call
286 return self._toBytes(inMemoryDataset) # type: ignore