Coverage for python / lsst / daf / butler / pydantic_utils.py: 39%

92 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-28 08:36 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ( 

31 "DeferredValidation", 

32 "SerializableBytesHex", 

33 "SerializableRegion", 

34 "SerializableTime", 

35 "get_universe_from_context", 

36) 

37 

38from types import EllipsisType 

39from typing import TYPE_CHECKING, Annotated, Any, ClassVar, Generic, Self, TypeAlias, TypeVar, get_args 

40 

41import pydantic 

42from astropy.time import Time 

43from pydantic_core import core_schema 

44 

45from lsst.sphgeom import Region 

46 

47from .time_utils import TimeConverter 

48 

49if TYPE_CHECKING: 

50 from .dimensions import DimensionUniverse 

51 

52_T = TypeVar("_T") 

53 

54 

55def get_universe_from_context(context: dict[str, Any] | None) -> DimensionUniverse: 

56 """Extract the dimension universe from a Pydantic validation context 

57 dictionary. 

58 

59 Parameters 

60 ---------- 

61 context : `dict` 

62 Dictionary obtained from `pydantic.ValidationInfo.context`. 

63 

64 Returns 

65 ------- 

66 universe : `DimensionUniverse` 

67 Definitions for all dimensions. 

68 

69 Notes 

70 ----- 

71 This function just provides consistent error handling around:: 

72 

73 context["universe"] 

74 """ 

75 if context is None: 

76 raise ValueError("This object requires Pydantic validation context to be deserialized.") 

77 try: 

78 return context["universe"] 

79 except KeyError: 

80 raise ValueError( 

81 "This object requires the DimensionUniverse to be provided in the Pydantic validation " 

82 "context to be deserialized." 

83 ) from None 

84 

85 

86class DeferredValidation(Generic[_T]): 

87 """A base class whose subclasses define a wrapper for a Pydantic-aware type 

88 that defers validation but declares the same JSON schema. 

89 

90 Parameters 

91 ---------- 

92 data : `object` 

93 Unvalidated data representing an instance of the wrapped type. This 

94 may be the serialized form of the wrapped type, an instance of the 

95 wrapped type, or anything else - but the in the latter case, calls to 

96 `validated` will fail with a Pydantic validation error, and if the 

97 object is known to be an instance of the wrapped type, `from_validated` 

98 should be preferred. 

99 

100 Notes 

101 ----- 

102 This class must be subclassed to be used, but subclasses are always 

103 trivial:: 

104 

105 class SerializableThing(DeferredValidation[Thing]): 

106 pass 

107 

108 The type parameter for `DeferredValidation` may be a special typing object 

109 such as `typing.Annotated` instead of an actual `type` 

110 object. The only requirement is that it must be a type Pydantic 

111 recognizes, like a `pydantic.BaseModel` subclass, a dataclass, or a 

112 primitive built-in. 

113 

114 A wrapper subclass (e.g. ``SerializableThing``) can be used with Pydantic 

115 via `pydantic.TypeAdapter` or as a field in `pydantic.BaseModel`. The JSON 

116 schema of the wrapper will be consistent with the JSON schema of the 

117 wrapped type (though it may not use JSON pointer references the same way), 

118 and Pydantic serialization will work regardless of whether the wrapper 

119 instance was initialized with the raw type or the wrapped type. Pydantic 

120 validation of the wrapper will effectively do nothing, however; instead, 

121 the `validated` method must be called to return a fully-validated instance 

122 of the wrapped type, which is then cached within the wrapper for subsequent 

123 calls to `validated`. 

124 

125 Indirect subclasses of `DeferredValidation` are not permitted. 

126 

127 A major use case for `DeferredValidation` is types whose validation 

128 requires additional runtime context (via the Pydantic "validation context" 

129 dictionary that can custom validator hooks can access). These types are 

130 often first deserialized (e.g. by FastAPI) in a way that does not permit 

131 that context to be provided. 

132 """ 

133 

134 def __init__(self, data: Any): 

135 self._data = data 

136 self._is_validated = False 

137 

138 @classmethod 

139 def from_validated(cls, wrapped: _T) -> Self: 

140 """Construct from an instance of the wrapped type. 

141 

142 Unlike invoking the constructor with an instance of the wrapped type, 

143 this factory marks the held instance as already validated (since that 

144 is expected to be guaranteed by the caller, possibly with the help of 

145 static analysis), which sidesteps Pydantic validation in later calls 

146 to `validated`. 

147 

148 Parameters 

149 ---------- 

150 wrapped : `object` 

151 Instance of the wrapped type. 

152 

153 Returns 

154 ------- 

155 wrapper : `DeferredValidation` 

156 Instance of the wrapper. 

157 """ 

158 result = cls(wrapped) 

159 result._is_validated = True 

160 return result 

161 

162 def validated(self, **kwargs: Any) -> _T: 

163 """Validate (if necessary) and return the validated object. 

164 

165 Parameters 

166 ---------- 

167 **kwargs 

168 Additional keywords arguments are passed as the Pydantic 

169 "validation context" `dict`. 

170 

171 Returns 

172 ------- 

173 wrapped : `typing.Any` 

174 An instance of the wrapped type. This is also cached for the next 

175 call to `validated`, *which will ignore ``**kwargs``*. 

176 """ 

177 if not self._is_validated: 

178 self._data = self._get_wrapped_type_adapter().validate_python( 

179 self._data, strict=False, context=kwargs 

180 ) 

181 self._is_validated = True 

182 return self._data 

183 

184 _WRAPPED_TYPE: ClassVar[Any | None] = None 

185 _WRAPPED_TYPE_ADAPTER: ClassVar[pydantic.TypeAdapter[Any] | None] = None 

186 

187 def __init_subclass__(cls) -> None: 

188 # We override __init_subclass__ to grab the type argument to the 

189 # DeferredValidation base class, since that's the wrapped type. 

190 assert cls.__base__ is DeferredValidation, ( 

191 "Indirect subclasses of DeferredValidation are not allowed." 

192 ) 

193 try: 

194 # This uses some typing internals that are not as stable as the 

195 # rest of Python, so it's the messiest aspect of this class, but 

196 # even if it breaks on (say) some Python minor releases, it should 

197 # be easy to detect and fix and I think that makes it better than 

198 # requiring the wrapped type to be declared twice when subclassing. 

199 # Since the type-checking ecosystem depends on this sort of thing 

200 # to work it's not exactly private, either. 

201 cls._WRAPPED_TYPE = get_args(cls.__orig_bases__[0])[0] # type: ignore 

202 except Exception as err: 

203 raise TypeError("DeferredValidation must be subclassed with a single type parameter.") from err 

204 return super().__init_subclass__() 

205 

206 @classmethod 

207 def _get_wrapped_type_adapter(cls) -> pydantic.TypeAdapter[_T]: 

208 """Return the Pydantic adapter for the wrapped type, constructing and 

209 caching it if necessary. 

210 """ 

211 if cls._WRAPPED_TYPE_ADAPTER is None: 

212 if cls._WRAPPED_TYPE is None: 

213 raise TypeError("DeferredValidation must be subclassed to be used.") 

214 cls._WRAPPED_TYPE_ADAPTER = pydantic.TypeAdapter(cls._WRAPPED_TYPE) 

215 return cls._WRAPPED_TYPE_ADAPTER 

216 

217 def _serialize(self) -> Any: 

218 """Serialize this object.""" 

219 if self._is_validated: 

220 return self._get_wrapped_type_adapter().dump_python(self._data) 

221 else: 

222 return self._data 

223 

224 @classmethod 

225 def __get_pydantic_core_schema__( 

226 cls, _source_type: Any, _handler: pydantic.GetCoreSchemaHandler 

227 ) -> core_schema.CoreSchema: 

228 # This is the Pydantic hook for overriding serialization and 

229 # validation. It's also normally the hook for defining the JSON 

230 # schema, but we throw that JSON schema away and define our own in 

231 # __get_pydantic_json_schema__. 

232 return core_schema.json_or_python_schema( 

233 # When deserializing from JSON, invoke the constructor with the 

234 # result of parsing the JSON into Python primitives. 

235 json_schema=core_schema.no_info_plain_validator_function(cls), 

236 # When validating a Python dict... 

237 python_schema=core_schema.union_schema( 

238 [ 

239 # ...first see if we already have an instance of the 

240 # wrapper... 

241 core_schema.is_instance_schema(cls), 

242 # ...and otherwise just call the constructor on whatever 

243 # we were given. 

244 core_schema.no_info_plain_validator_function(cls), 

245 ] 

246 ), 

247 # When serializing to JSON, just call the _serialize method. 

248 serialization=core_schema.plain_serializer_function_ser_schema(cls._serialize), 

249 ) 

250 

251 @classmethod 

252 def __get_pydantic_json_schema__( 

253 cls, _core_schema: core_schema.CoreSchema, handler: pydantic.json_schema.GetJsonSchemaHandler 

254 ) -> pydantic.json_schema.JsonSchemaValue: 

255 # This is the Pydantic hook for customizing JSON schema. We ignore 

256 # the schema generated for this class, and just return the JSON schema 

257 # of the wrapped type. 

258 json_schema = handler(cls._get_wrapped_type_adapter().core_schema) 

259 return handler.resolve_ref_schema(json_schema) 

260 

261 

262def _deserialize_region(value: object, handler: pydantic.ValidatorFunctionWrapHandler) -> Region: 

263 if isinstance(value, Region): 

264 return value 

265 

266 string = handler(value) 

267 return Region.decode(bytes.fromhex(string)) 

268 

269 

270def _serialize_region(region: Region) -> str: 

271 return region.encode().hex() 

272 

273 

274SerializableRegion: TypeAlias = Annotated[ 

275 Region, 

276 pydantic.GetPydanticSchema(lambda _, h: h(str)), 

277 pydantic.WrapValidator(_deserialize_region), 

278 pydantic.PlainSerializer(_serialize_region), 

279 pydantic.WithJsonSchema( 

280 { 

281 "type": "string", 

282 "description": "A region on the sphere from the lsst.sphgeom package.", 

283 "media": {"binaryEncoding": "base16", "type": "application/lsst.sphgeom"}, 

284 } 

285 ), 

286] 

287"""A Pydantic-annotated version of `lsst.sphgeom.Region`. 

288 

289An object annotated with this type is always an `lsst.sphgeom.Region` instance 

290in Python, but unlike `lsst.sphgeom.Region` itself it can be used as a type 

291in Pydantic models and type adapters, resulting in the field being saved as 

292a hex encoding of the sphgeom-encoded bytes. 

293""" 

294 

295 

296def _deserialize_bytes_hex(value: object, handler: pydantic.ValidatorFunctionWrapHandler) -> Region: 

297 if isinstance(value, bytes): 

298 return value 

299 

300 string = handler(value) 

301 return bytes.fromhex(string) 

302 

303 

304SerializableBytesHex: TypeAlias = Annotated[ 

305 bytes, 

306 pydantic.GetPydanticSchema(lambda _, h: h(str)), 

307 pydantic.WrapValidator(_deserialize_bytes_hex), 

308 pydantic.PlainSerializer(bytes.hex), 

309 pydantic.WithJsonSchema( 

310 { 

311 "type": "string", 

312 "description": "A hex-encoded byte string.", 

313 "media": {"binaryEncoding": "base16"}, 

314 } 

315 ), 

316] 

317"""A Pydantic-annotated version `bytes` that serializes as hex. 

318""" 

319 

320 

321def _deserialize_time(value: object, handler: pydantic.ValidatorFunctionWrapHandler) -> Region: 

322 if isinstance(value, Time): 

323 return value 

324 

325 integer = handler(value) 

326 return TimeConverter().nsec_to_astropy(integer) 

327 

328 

329def _serialize_time(time: Time) -> int: 

330 return TimeConverter().astropy_to_nsec(time) 

331 

332 

333SerializableTime: TypeAlias = Annotated[ 

334 Time, 

335 pydantic.GetPydanticSchema(lambda _, h: h(int)), 

336 pydantic.WrapValidator(_deserialize_time), 

337 pydantic.PlainSerializer(_serialize_time), 

338 pydantic.WithJsonSchema( 

339 { 

340 "type": "integer", 

341 "description": "A TAI time represented as integer nanoseconds since 1970-01-01 00:00:00.", 

342 } 

343 ), 

344] 

345"""A Pydantic-annotated version of `astropy.time.Time`. 

346 

347An object annotated with this type is always an `astropy.time.Time` instance 

348in Python, but unlike `astropy.time.Time` itself it can be used as a type 

349in Pydantic models and type adapters, resulting in the field being saved as 

350integer nanoseconds since 1970-01-01 00:00:00. 

351""" 

352 

353 

354def _serialize_ellipsis(value: Any, handler: pydantic.SerializerFunctionWrapHandler) -> str: 

355 if value is ...: 

356 return "..." 

357 return handler(value) 

358 

359 

360def _deserialize_ellipsis(value: object, handler: pydantic.ValidatorFunctionWrapHandler) -> EllipsisType: 

361 s = handler(value) 

362 if s == "...": 

363 return ... 

364 raise ValueError(f"String {s!r} is not '...'.")