Coverage for python / lsst / daf / butler / formatters / typeless.py: 18%

41 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-14 23:37 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28"""Support for reading and writing files to a POSIX file system.""" 

29 

30from __future__ import annotations 

31 

32__all__ = ["TypelessFormatter"] 

33 

34import dataclasses 

35from typing import TYPE_CHECKING, Any 

36 

37from lsst.daf.butler import FormatterV2 

38 

39if TYPE_CHECKING: 

40 from lsst.daf.butler import StorageClass 

41 from lsst.daf.butler.datastore.cache_manager import AbstractDatastoreCacheManager 

42 

43 

44class TypelessFormatter(FormatterV2): 

45 """Formatter V2 base class that attempts to coerce generic objects 

46 read in subclasses into the correct Python type. 

47 

48 Notes 

49 ----- 

50 This class provides a ``read()`` method that will run `FormatterV2.read` 

51 and coerce the return type using a variety of techniques. Use the 

52 standard `FormatterV2` methods for reading bytes/files and writing 

53 bytes/files. 

54 """ 

55 

56 def read( 

57 self, 

58 component: str | None = None, 

59 expected_size: int = -1, 

60 cache_manager: AbstractDatastoreCacheManager | None = None, 

61 ) -> Any: 

62 # Do the standard read of the base class. 

63 data = super().read(component, expected_size, cache_manager) 

64 

65 # Assemble the requested dataset and potentially return only its 

66 # component coercing it to its appropriate pytype. 

67 data = self._assemble_dataset(data, component) 

68 

69 # Special case components by allowing a formatter to return None 

70 # to indicate that the component was understood but is missing. 

71 if data is None and component is None: 

72 raise ValueError(f"Unable to read data with URI {self.file_descriptor.location.uri}") 

73 

74 return data 

75 

76 def _assemble_dataset(self, data: Any, component: str | None = None) -> Any: 

77 """Assembles and coerces the dataset, or one of its components, 

78 into an appropriate python type and returns it. 

79 

80 Parameters 

81 ---------- 

82 data : `dict` or `object` 

83 Composite or a dict that, or which component, needs to be 

84 coerced to the python type specified in "fileDescriptor" 

85 component : `str`, optional 

86 Component to read from the file. Only used if the `StorageClass` 

87 for reading differed from the `StorageClass` used to write the 

88 file. 

89 

90 Returns 

91 ------- 

92 inMemoryDataset : `object` 

93 The requested data as a Python object. The type of object 

94 is controlled by the specific formatter. 

95 """ 

96 file_descriptor = self.file_descriptor 

97 

98 # Get the read and write storage classes. 

99 read_storage_class = file_descriptor.readStorageClass 

100 write_storage_class = file_descriptor.storageClass 

101 

102 if component is not None: 

103 # Requesting a component implies that we need to first ensure 

104 # that the composite is the correct python type. Lie to the 

105 # coercion routine since the read StorageClass is not relevant 

106 # if we want the original. 

107 data = self._coerce_type(data, write_storage_class, write_storage_class) 

108 

109 # Concrete composite written as a single file (we hope) 

110 # so try to get the component. 

111 try: 

112 data = file_descriptor.storageClass.delegate().getComponent(data, component) 

113 except AttributeError: 

114 # Defer the complaint 

115 data = None 

116 

117 # Update the write storage class to match that of the component. 

118 # It should be safe to use the component storage class directly 

119 # since that should match what was returned from getComponent 

120 # (else we could create a temporary storage class guaranteed to 

121 # match the python type we have). 

122 write_storage_class = write_storage_class.allComponents()[component] 

123 

124 # Coerce to the requested type. 

125 data = self._coerce_type(data, write_storage_class, read_storage_class) 

126 

127 return data 

128 

129 def _coerce_builtin_type(self, in_memory_dataset: Any, write_storage_class: StorageClass) -> Any: 

130 """Coerce the supplied in-memory dataset to the written python type if 

131 it is currently a built-in type. 

132 

133 Parameters 

134 ---------- 

135 in_memory_dataset : `object` 

136 Object to coerce to expected type. 

137 write_storage_class : `StorageClass` 

138 Storage class used to serialize this data. 

139 

140 Returns 

141 ------- 

142 in_memory_dataset : `object` 

143 Object of expected type ``write_storage_class.pytype``. 

144 

145 Notes 

146 ----- 

147 This method only modifies the supplied object if the object is: 

148 

149 * Not already the required type. 

150 * Not `None`. 

151 * Looks like a built-in type. 

152 

153 It is intended to be used as a helper for file formats that do not 

154 store the original Python type information in serialized form and 

155 instead return built-in types such as `dict` and `list` that need 

156 to be converted to the required form. This happens before 

157 `StorageClass` converters trigger so that constructors can be 

158 called that can build the original type first before checking the 

159 requested Python type. This is important for Pydantic models where 

160 the internal structure of the model may not match the `dict` form 

161 in a scenario where the user has requested a `dict`. 

162 """ 

163 if ( 

164 in_memory_dataset is not None 

165 and not isinstance(in_memory_dataset, write_storage_class.pytype) 

166 and type(in_memory_dataset).__module__ == "builtins" 

167 ): 

168 # Try different ways of converting to the required type. 

169 # Pydantic v1 uses parse_obj and some non-pydantic classes 

170 # use that convention. Pydantic v2 uses model_validate. 

171 for method_name in ("model_validate", "parse_obj"): 

172 if method := getattr(write_storage_class.pytype, method_name, None): 

173 return method(in_memory_dataset) 

174 if isinstance(in_memory_dataset, dict): 

175 if dataclasses.is_dataclass(write_storage_class.pytype): 

176 # Dataclasses accept key/value parameters. 

177 in_memory_dataset = write_storage_class.pytype(**in_memory_dataset) 

178 elif write_storage_class.isComposite(): 

179 # Assume that this type can be constructed 

180 # using the registered assembler from a dict. 

181 in_memory_dataset = write_storage_class.delegate().assemble( 

182 in_memory_dataset, pytype=write_storage_class.pytype 

183 ) 

184 else: 

185 # Unpack the dict and hope that works. 

186 in_memory_dataset = write_storage_class.pytype(**in_memory_dataset) 

187 else: 

188 # Hope that we can pass the arguments in directly. 

189 in_memory_dataset = write_storage_class.pytype(in_memory_dataset) 

190 

191 return in_memory_dataset 

192 

193 def _coerce_type( 

194 self, in_memory_dataset: Any, write_storage_class: StorageClass, read_storage_class: StorageClass 

195 ) -> Any: 

196 """Coerce the supplied in-memory dataset to the correct python type. 

197 

198 Parameters 

199 ---------- 

200 in_memory_dataset : `object` 

201 Object to coerce to expected type. 

202 write_storage_class : `StorageClass` 

203 Storage class used to serialize this data. 

204 read_storage_class : `StorageClass` 

205 Storage class requested as the outcome. 

206 

207 Returns 

208 ------- 

209 in_memory_dataset : `object` 

210 Object of expected type ``readStorageClass.pytype``. 

211 """ 

212 in_memory_dataset = self._coerce_builtin_type(in_memory_dataset, write_storage_class) 

213 return read_storage_class.coerce_type(in_memory_dataset)