Coverage for python/lsst/daf/butler/column_spec.py: 80%

101 statements  

« prev     ^ index     » next       coverage.py v7.4.1, created at 2024-02-13 10:57 +0000

1# This file is part of butler4. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ( 

31 "ColumnSpec", 

32 "IntColumnSpec", 

33 "StringColumnSpec", 

34 "HashColumnSpec", 

35 "FloatColumnSpec", 

36 "BoolColumnSpec", 

37 "UUIDColumnSpec", 

38 "RegionColumnSpec", 

39 "TimespanColumnSpec", 

40 "ColumnType", 

41) 

42 

43import textwrap 

44import uuid 

45from abc import ABC, abstractmethod 

46from typing import Annotated, Any, ClassVar, Literal, TypeAlias, Union, final 

47 

48import astropy.time 

49import pyarrow as pa 

50import pydantic 

51from lsst.sphgeom import Region 

52 

53from . import arrow_utils, ddl 

54from ._timespan import Timespan 

55 

56ColumnType: TypeAlias = Literal[ 

57 "int", "string", "hash", "float", "datetime", "bool", "uuid", "timespan", "region" 

58] 

59 

60 

61class _BaseColumnSpec(pydantic.BaseModel, ABC): 

62 """Base class for descriptions of table columns.""" 

63 

64 name: str = pydantic.Field(description="""Name of the column.""") 

65 

66 doc: str = pydantic.Field(default="", description="Documentation for the column.") 

67 

68 type: ColumnType 

69 

70 nullable: bool = pydantic.Field( 

71 default=True, 

72 description="Whether the column may be ``NULL``.", 

73 ) 

74 

75 def to_sql_spec(self, **kwargs: Any) -> ddl.FieldSpec: 

76 """Convert this specification to a SQL-specific one. 

77 

78 Parameters 

79 ---------- 

80 **kwargs 

81 Forwarded to `ddl.FieldSpec`. 

82 

83 Returns 

84 ------- 

85 sql_spec : `ddl.FieldSpec` 

86 A SQL-specific version of this specification. 

87 """ 

88 return ddl.FieldSpec(name=self.name, dtype=ddl.VALID_CONFIG_COLUMN_TYPES[self.type], **kwargs) 

89 

90 @abstractmethod 

91 def to_arrow(self) -> arrow_utils.ToArrow: 

92 """Return an object that converts values of this column to a column in 

93 an Arrow table. 

94 

95 Returns 

96 ------- 

97 converter : `arrow_utils.ToArrow` 

98 A converter object with schema information in Arrow form. 

99 """ 

100 raise NotImplementedError() 

101 

102 def display(self, level: int = 0, tab: str = " ") -> list[str]: 

103 """Return a human-reader-focused string description of this column as 

104 a list of lines. 

105 

106 Parameters 

107 ---------- 

108 level : `int` 

109 Number of indentation tabs for the first line. 

110 tab : `str` 

111 Characters to duplicate ``level`` times to form the actual indent. 

112 

113 Returns 

114 ------- 

115 lines : `list` [ `str` ] 

116 Display lines. 

117 """ 

118 lines = [f"{tab * level}{self.name}: {self.type}"] 

119 if self.doc: 

120 indent = tab * (level + 1) 

121 lines.extend( 

122 textwrap.wrap( 

123 self.doc, 

124 initial_indent=indent, 

125 subsequent_indent=indent, 

126 ) 

127 ) 

128 return lines 

129 

130 def __str__(self) -> str: 

131 return "\n".join(self.display()) 

132 

133 

134@final 

135class IntColumnSpec(_BaseColumnSpec): 

136 """Description of an integer column.""" 

137 

138 pytype: ClassVar[type] = int 

139 

140 type: Literal["int"] = "int" 

141 

142 def to_arrow(self) -> arrow_utils.ToArrow: 

143 # Docstring inherited. 

144 return arrow_utils.ToArrow.for_primitive(self.name, pa.uint64(), nullable=self.nullable) 

145 

146 

147@final 

148class StringColumnSpec(_BaseColumnSpec): 

149 """Description of a string column.""" 

150 

151 pytype: ClassVar[type] = str 

152 

153 type: Literal["string"] = "string" 

154 

155 length: int 

156 """Maximum length of strings.""" 

157 

158 def to_sql_spec(self, **kwargs: Any) -> ddl.FieldSpec: 

159 # Docstring inherited. 

160 return super().to_sql_spec(length=self.length, **kwargs) 

161 

162 def to_arrow(self) -> arrow_utils.ToArrow: 

163 # Docstring inherited. 

164 return arrow_utils.ToArrow.for_primitive(self.name, pa.string(), nullable=self.nullable) 

165 

166 

167@final 

168class HashColumnSpec(_BaseColumnSpec): 

169 """Description of a hash digest.""" 

170 

171 pytype: ClassVar[type] = bytes 

172 

173 type: Literal["hash"] = "hash" 

174 

175 nbytes: int 

176 """Number of bytes for the hash.""" 

177 

178 def to_sql_spec(self, **kwargs: Any) -> ddl.FieldSpec: 

179 # Docstring inherited. 

180 return super().to_sql_spec(nbytes=self.nbytes, **kwargs) 

181 

182 def to_arrow(self) -> arrow_utils.ToArrow: 

183 # Docstring inherited. 

184 return arrow_utils.ToArrow.for_primitive( 

185 self.name, 

186 # The size for Arrow binary columns is a fixed size, not a maximum 

187 # as in SQL, so we use a variable-size column. 

188 pa.binary(), 

189 nullable=self.nullable, 

190 ) 

191 

192 

193@final 

194class FloatColumnSpec(_BaseColumnSpec): 

195 """Description of a float column.""" 

196 

197 pytype: ClassVar[type] = float 

198 

199 type: Literal["float"] = "float" 

200 

201 def to_arrow(self) -> arrow_utils.ToArrow: 

202 # Docstring inherited. 

203 assert self.nullable is not None, "nullable=None should be resolved by validators" 

204 return arrow_utils.ToArrow.for_primitive(self.name, pa.float64(), nullable=self.nullable) 

205 

206 

207@final 

208class BoolColumnSpec(_BaseColumnSpec): 

209 """Description of a bool column.""" 

210 

211 pytype: ClassVar[type] = bool 

212 

213 type: Literal["bool"] = "bool" 

214 

215 def to_arrow(self) -> arrow_utils.ToArrow: 

216 # Docstring inherited. 

217 return arrow_utils.ToArrow.for_primitive(self.name, pa.bool_(), nullable=self.nullable) 

218 

219 

220@final 

221class UUIDColumnSpec(_BaseColumnSpec): 

222 """Description of a UUID column.""" 

223 

224 pytype: ClassVar[type] = uuid.UUID 

225 

226 type: Literal["uuid"] = "uuid" 

227 

228 def to_arrow(self) -> arrow_utils.ToArrow: 

229 # Docstring inherited. 

230 assert self.nullable is not None, "nullable=None should be resolved by validators" 

231 return arrow_utils.ToArrow.for_uuid(self.name, nullable=self.nullable) 

232 

233 

234@final 

235class RegionColumnSpec(_BaseColumnSpec): 

236 """Description of a region column.""" 

237 

238 name: str = "region" 

239 

240 pytype: ClassVar[type] = Region 

241 

242 type: Literal["region"] = "region" 

243 

244 nbytes: int = 2048 

245 """Number of bytes for the encoded region.""" 

246 

247 def to_arrow(self) -> arrow_utils.ToArrow: 

248 # Docstring inherited. 

249 assert self.nullable is not None, "nullable=None should be resolved by validators" 

250 return arrow_utils.ToArrow.for_region(self.name, nullable=self.nullable) 

251 

252 

253@final 

254class TimespanColumnSpec(_BaseColumnSpec): 

255 """Description of a timespan column.""" 

256 

257 name: str = "timespan" 

258 

259 pytype: ClassVar[type] = Timespan 

260 

261 type: Literal["timespan"] = "timespan" 

262 

263 def to_arrow(self) -> arrow_utils.ToArrow: 

264 # Docstring inherited. 

265 return arrow_utils.ToArrow.for_timespan(self.name, nullable=self.nullable) 

266 

267 

268@final 

269class DateTimeColumnSpec(_BaseColumnSpec): 

270 """Description of a time column, stored as integer TAI nanoseconds since 

271 1970-01-01 and represented in Python via `astropy.time.Time`. 

272 """ 

273 

274 pytype: ClassVar[type] = astropy.time.Time 

275 

276 type: Literal["datetime"] = "datetime" 

277 

278 def to_arrow(self) -> arrow_utils.ToArrow: 

279 # Docstring inherited. 

280 assert self.nullable is not None, "nullable=None should be resolved by validators" 

281 return arrow_utils.ToArrow.for_datetime(self.name, nullable=self.nullable) 

282 

283 

284ColumnSpec = Annotated[ 

285 Union[ 

286 IntColumnSpec, 

287 StringColumnSpec, 

288 HashColumnSpec, 

289 FloatColumnSpec, 

290 BoolColumnSpec, 

291 UUIDColumnSpec, 

292 RegionColumnSpec, 

293 TimespanColumnSpec, 

294 DateTimeColumnSpec, 

295 ], 

296 pydantic.Field(discriminator="type"), 

297]