Coverage for python/lsst/daf/butler/column_spec.py: 80%

84 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-16 10:44 +0000

1# This file is part of butler4. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ( 

31 "ColumnSpec", 

32 "IntColumnSpec", 

33 "StringColumnSpec", 

34 "HashColumnSpec", 

35 "FloatColumnSpec", 

36 "BoolColumnSpec", 

37 "RegionColumnSpec", 

38 "TimespanColumnSpec", 

39) 

40 

41import textwrap 

42from abc import ABC, abstractmethod 

43from typing import Annotated, Any, ClassVar, Literal, Union, final 

44 

45import pyarrow as pa 

46import pydantic 

47from lsst.sphgeom import Region 

48 

49from . import arrow_utils, ddl 

50from ._timespan import Timespan 

51 

52 

53class _BaseColumnSpec(pydantic.BaseModel, ABC): 

54 """Base class for descriptions of table columns.""" 

55 

56 name: str = pydantic.Field(description="""Name of the column.""") 

57 

58 doc: str = pydantic.Field(default="", description="Documentation for the column.") 

59 

60 type: str 

61 

62 nullable: bool = pydantic.Field( 

63 default=True, 

64 description="Whether the column may be ``NULL``.", 

65 ) 

66 

67 def to_sql_spec(self, **kwargs: Any) -> ddl.FieldSpec: 

68 """Convert this specification to a SQL-specific one. 

69 

70 Parameters 

71 ---------- 

72 **kwargs 

73 Forwarded to `ddl.FieldSpec`. 

74 

75 Returns 

76 ------- 

77 sql_spec : `ddl.FieldSpec` 

78 A SQL-specific version of this specification. 

79 """ 

80 return ddl.FieldSpec(name=self.name, dtype=ddl.VALID_CONFIG_COLUMN_TYPES[self.type], **kwargs) 

81 

82 @abstractmethod 

83 def to_arrow(self) -> arrow_utils.ToArrow: 

84 """Return an object that converts values of this column to a column in 

85 an Arrow table. 

86 

87 Returns 

88 ------- 

89 converter : `arrow_utils.ToArrow` 

90 A converter object with schema information in Arrow form. 

91 """ 

92 raise NotImplementedError() 

93 

94 def display(self, level: int = 0, tab: str = " ") -> list[str]: 

95 """Return a human-reader-focused string description of this column as 

96 a list of lines. 

97 

98 Parameters 

99 ---------- 

100 level : `int` 

101 Number of indentation tabs for the first line. 

102 tab : `str` 

103 Characters to duplicate ``level`` times to form the actual indent. 

104 

105 Returns 

106 ------- 

107 lines : `list` [ `str` ] 

108 Display lines. 

109 """ 

110 lines = [f"{tab * level}{self.name}: {self.type}"] 

111 if self.doc: 

112 indent = tab * (level + 1) 

113 lines.extend( 

114 textwrap.wrap( 

115 self.doc, 

116 initial_indent=indent, 

117 subsequent_indent=indent, 

118 ) 

119 ) 

120 return lines 

121 

122 def __str__(self) -> str: 

123 return "\n".join(self.display()) 

124 

125 

126@final 

127class IntColumnSpec(_BaseColumnSpec): 

128 """Description of an integer column.""" 

129 

130 pytype: ClassVar[type] = int 

131 

132 type: Literal["int"] = "int" 

133 

134 def to_arrow(self) -> arrow_utils.ToArrow: 

135 # Docstring inherited. 

136 return arrow_utils.ToArrow.for_primitive(self.name, pa.uint64(), nullable=self.nullable) 

137 

138 

139@final 

140class StringColumnSpec(_BaseColumnSpec): 

141 """Description of a string column.""" 

142 

143 pytype: ClassVar[type] = str 

144 

145 type: Literal["string"] = "string" 

146 

147 length: int 

148 """Maximum length of strings.""" 

149 

150 def to_sql_spec(self, **kwargs: Any) -> ddl.FieldSpec: 

151 # Docstring inherited. 

152 return super().to_sql_spec(length=self.length, **kwargs) 

153 

154 def to_arrow(self) -> arrow_utils.ToArrow: 

155 # Docstring inherited. 

156 return arrow_utils.ToArrow.for_primitive(self.name, pa.string(), nullable=self.nullable) 

157 

158 

159@final 

160class HashColumnSpec(_BaseColumnSpec): 

161 """Description of a hash digest.""" 

162 

163 pytype: ClassVar[type] = bytes 

164 

165 type: Literal["hash"] = "hash" 

166 

167 nbytes: int 

168 """Number of bytes for the hash.""" 

169 

170 def to_sql_spec(self, **kwargs: Any) -> ddl.FieldSpec: 

171 # Docstring inherited. 

172 return super().to_sql_spec(nbytes=self.nbytes, **kwargs) 

173 

174 def to_arrow(self) -> arrow_utils.ToArrow: 

175 # Docstring inherited. 

176 return arrow_utils.ToArrow.for_primitive( 

177 self.name, 

178 # The size for Arrow binary columns is a fixed size, not a maximum 

179 # as in SQL, so we use a variable-size column. 

180 pa.binary(), 

181 nullable=self.nullable, 

182 ) 

183 

184 

185@final 

186class FloatColumnSpec(_BaseColumnSpec): 

187 """Description of a float column.""" 

188 

189 pytype: ClassVar[type] = float 

190 

191 type: Literal["float"] = "float" 

192 

193 def to_arrow(self) -> arrow_utils.ToArrow: 

194 # Docstring inherited. 

195 assert self.nullable is not None, "nullable=None should be resolved by validators" 

196 return arrow_utils.ToArrow.for_primitive(self.name, pa.float64(), nullable=self.nullable) 

197 

198 

199@final 

200class BoolColumnSpec(_BaseColumnSpec): 

201 """Description of a bool column.""" 

202 

203 pytype: ClassVar[type] = bool 

204 

205 type: Literal["bool"] = "bool" 

206 

207 def to_arrow(self) -> arrow_utils.ToArrow: 

208 # Docstring inherited. 

209 return arrow_utils.ToArrow.for_primitive(self.name, pa.bool_(), nullable=self.nullable) 

210 

211 

212@final 

213class RegionColumnSpec(_BaseColumnSpec): 

214 """Description of a region column.""" 

215 

216 name: str = "region" 

217 

218 pytype: ClassVar[type] = Region 

219 

220 type: Literal["region"] = "region" 

221 

222 nbytes: int = 2048 

223 """Number of bytes for the encoded region.""" 

224 

225 def to_arrow(self) -> arrow_utils.ToArrow: 

226 # Docstring inherited. 

227 assert self.nullable is not None, "nullable=None should be resolved by validators" 

228 return arrow_utils.ToArrow.for_region(self.name, nullable=self.nullable) 

229 

230 

231@final 

232class TimespanColumnSpec(_BaseColumnSpec): 

233 """Description of a timespan column.""" 

234 

235 name: str = "timespan" 

236 

237 pytype: ClassVar[type] = Timespan 

238 

239 type: Literal["timespan"] = "timespan" 

240 

241 def to_arrow(self) -> arrow_utils.ToArrow: 

242 # Docstring inherited. 

243 return arrow_utils.ToArrow.for_timespan(self.name, nullable=self.nullable) 

244 

245 

246ColumnSpec = Annotated[ 

247 Union[ 

248 IntColumnSpec, 

249 StringColumnSpec, 

250 HashColumnSpec, 

251 FloatColumnSpec, 

252 BoolColumnSpec, 

253 RegionColumnSpec, 

254 TimespanColumnSpec, 

255 ], 

256 pydantic.Field(discriminator="type"), 

257]