Coverage for python/lsst/daf/butler/formatters/file.py : 28%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24"""Support for reading and writing files to a POSIX file system."""
26__all__ = ("FileFormatter",)
28from abc import abstractmethod
30from typing import (
31 TYPE_CHECKING,
32 Any,
33 Optional,
34 Type,
35)
37from lsst.daf.butler import Formatter
39if TYPE_CHECKING: 39 ↛ 40line 39 didn't jump to line 40, because the condition on line 39 was never true
40 from lsst.daf.butler import StorageClass
43class FileFormatter(Formatter):
44 """Interface for reading and writing files on a POSIX file system.
45 """
47 extension: Optional[str] = None
48 """Default file extension to use for writing files. None means that no
49 modifications will be made to the supplied file extension. (`str`)"""
51 @abstractmethod
52 def _readFile(self, path: str, pytype: Optional[Type[Any]] = None) -> Any:
53 """Read a file from the path in the correct format.
55 Parameters
56 ----------
57 path : `str`
58 Path to use to open the file.
59 pytype : `class`, optional
60 Class to use to read the file.
62 Returns
63 -------
64 data : `object`
65 Data read from file. Returns `None` if the file can not be
66 found at the given path.
68 Raises
69 ------
70 Exception
71 Some problem reading the file.
72 """
73 pass
75 @abstractmethod
76 def _writeFile(self, inMemoryDataset: Any) -> None:
77 """Write the in memory dataset to file on disk.
79 Parameters
80 ----------
81 inMemoryDataset : `object`
82 Object to serialize.
84 Raises
85 ------
86 Exception
87 The file could not be written.
88 """
89 pass
91 def _assembleDataset(self, data: Any, component: Optional[str] = None) -> Any:
92 """Assembles and coerces the dataset, or one of its components,
93 into an appropriate python type and returns it.
95 Parameters
96 ----------
97 data : `dict` or `object`
98 Composite or a dict that, or which component, needs to be
99 coerced to the python type specified in "fileDescriptor"
100 component : `str`, optional
101 Component to read from the file. Only used if the `StorageClass`
102 for reading differed from the `StorageClass` used to write the
103 file.
105 Returns
106 -------
107 inMemoryDataset : `object`
108 The requested data as a Python object. The type of object
109 is controlled by the specific formatter.
110 """
111 fileDescriptor = self.fileDescriptor
113 # if read and write storage classes differ, more work is required
114 readStorageClass = fileDescriptor.readStorageClass
115 if readStorageClass != fileDescriptor.storageClass:
116 if component is None:
117 raise ValueError("Storage class inconsistency ({} vs {}) but no"
118 " component requested".format(readStorageClass.name,
119 fileDescriptor.storageClass.name))
121 # Concrete composite written as a single file (we hope)
122 try:
123 data = fileDescriptor.storageClass.delegate().getComponent(data, component)
124 except AttributeError:
125 # Defer the complaint
126 data = None
128 # Coerce to the requested type (not necessarily the type that was
129 # written)
130 data = self._coerceType(data, fileDescriptor.readStorageClass,
131 pytype=fileDescriptor.readStorageClass.pytype)
133 return data
135 def _coerceType(self, inMemoryDataset: Any, storageClass: StorageClass,
136 pytype: Optional[Type[Any]] = None) -> Any:
137 """Coerce the supplied inMemoryDataset to type `pytype`.
139 Usually a no-op.
141 Parameters
142 ----------
143 inMemoryDataset : `object`
144 Object to coerce to expected type.
145 storageClass : `StorageClass`
146 StorageClass associated with ``inMemoryDataset``.
147 pytype : `class`, optional
148 Override type to use for conversion.
150 Returns
151 -------
152 inMemoryDataset : `object`
153 Object of expected type `pytype`.
154 """
155 return inMemoryDataset
157 def read(self, component: Optional[str] = None) -> Any:
158 """Read data from a file.
160 Parameters
161 ----------
162 fileDescriptor : `FileDescriptor`
163 Identifies the file to read, type to read it into and parameters
164 to be used for reading.
165 component : `str`, optional
166 Component to read from the file. Only used if the `StorageClass`
167 for reading differed from the `StorageClass` used to write the
168 file.
170 Returns
171 -------
172 inMemoryDataset : `object`
173 The requested data as a Python object. The type of object
174 is controlled by the specific formatter.
176 Raises
177 ------
178 ValueError
179 Component requested but this file does not seem to be a concrete
180 composite.
181 NotImplementedError
182 Formatter does not implement a method to read from files.
183 """
185 # Read the file naively
186 path = self.fileDescriptor.location.path
187 data = self._readFile(path, self.fileDescriptor.storageClass.pytype)
189 # Assemble the requested dataset and potentially return only its
190 # component coercing it to its appropriate pytype
191 data = self._assembleDataset(data, component)
193 # Special case components by allowing a formatter to return None
194 # to indicate that the component was understood but is missing
195 if data is None and component is None:
196 raise ValueError(f"Unable to read data with URI {self.fileDescriptor.location.uri}")
198 return data
200 def fromBytes(self, serializedDataset: bytes, component: Optional[str] = None) -> Any:
201 """Reads serialized data into a Dataset or its component.
203 Parameters
204 ----------
205 serializedDataset : `bytes`
206 Bytes object to unserialize.
207 component : `str`, optional
208 Component to read from the Dataset. Only used if the `StorageClass`
209 for reading differed from the `StorageClass` used to write the
210 file.
212 Returns
213 -------
214 inMemoryDataset : `object`
215 The requested data as a Python object. The type of object
216 is controlled by the specific formatter.
218 Raises
219 ------
220 NotImplementedError
221 Formatter does not support reading from bytes.
222 """
223 if not hasattr(self, '_fromBytes'):
224 raise NotImplementedError("Type does not support reading from bytes.")
226 # mypy can not understand that the previous line protects this call
227 data = self._fromBytes(serializedDataset, # type: ignore
228 self.fileDescriptor.storageClass.pytype)
230 # Assemble the requested dataset and potentially return only its
231 # component coercing it to its appropriate pytype
232 data = self._assembleDataset(data, component)
234 # Special case components by allowing a formatter to return None
235 # to indicate that the component was understood but is missing
236 if data is None and component is None:
237 nbytes = len(serializedDataset)
238 s = "s" if nbytes != 1 else ""
239 raise ValueError(f"Unable to unpersist {nbytes} byte{s} from "
240 f"URI {self.fileDescriptor.location.uri}")
242 return data
244 def write(self, inMemoryDataset: Any) -> None:
245 """Write a Python object to a file.
247 Parameters
248 ----------
249 inMemoryDataset : `object`
250 The Python object to store.
252 Returns
253 -------
254 path : `str`
255 The path where the primary file is stored within the datastore.
256 """
257 fileDescriptor = self.fileDescriptor
258 # Update the location with the formatter-preferred file extension
259 fileDescriptor.location.updateExtension(self.extension)
261 self._writeFile(inMemoryDataset)
263 def toBytes(self, inMemoryDataset: Any) -> bytes:
264 """Serialize the Dataset to bytes based on formatter.
266 Parameters
267 ----------
268 inMemoryDataset : `object`
269 Object to serialize.
271 Returns
272 -------
273 serializedDataset : `bytes`
274 Bytes representing the serialized dataset.
276 Raises
277 ------
278 NotImplementedError
279 Formatter does not support reading from bytes.
280 """
281 if not hasattr(self, '_toBytes'):
282 raise NotImplementedError("Type does not support reading from bytes.")
284 # mypy can not understand that the previous line protects this call
285 return self._toBytes(inMemoryDataset) # type: ignore