Coverage for python/lsst/daf/butler/column_spec.py: 80%
84 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-25 10:50 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-25 10:50 +0000
1# This file is part of butler4.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = (
31 "ColumnSpec",
32 "IntColumnSpec",
33 "StringColumnSpec",
34 "HashColumnSpec",
35 "FloatColumnSpec",
36 "BoolColumnSpec",
37 "RegionColumnSpec",
38 "TimespanColumnSpec",
39)
41import textwrap
42from abc import ABC, abstractmethod
43from typing import Annotated, Any, ClassVar, Literal, Union, final
45import pyarrow as pa
46import pydantic
47from lsst.sphgeom import Region
49from . import arrow_utils, ddl
50from ._timespan import Timespan
53class _BaseColumnSpec(pydantic.BaseModel, ABC):
54 """Base class for descriptions of table columns."""
56 name: str = pydantic.Field(description="""Name of the column.""")
58 doc: str = pydantic.Field(default="", description="Documentation for the column.")
60 type: str
62 nullable: bool = pydantic.Field(
63 default=True,
64 description="Whether the column may be ``NULL``.",
65 )
67 def to_sql_spec(self, **kwargs: Any) -> ddl.FieldSpec:
68 """Convert this specification to a SQL-specific one.
70 Parameters
71 ----------
72 **kwargs
73 Forwarded to `ddl.FieldSpec`.
75 Returns
76 -------
77 sql_spec : `ddl.FieldSpec`
78 A SQL-specific version of this specification.
79 """
80 return ddl.FieldSpec(name=self.name, dtype=ddl.VALID_CONFIG_COLUMN_TYPES[self.type], **kwargs)
82 @abstractmethod
83 def to_arrow(self) -> arrow_utils.ToArrow:
84 """Return an object that converts values of this column to a column in
85 an Arrow table.
87 Returns
88 -------
89 converter : `arrow_utils.ToArrow`
90 A converter object with schema information in Arrow form.
91 """
92 raise NotImplementedError()
94 def display(self, level: int = 0, tab: str = " ") -> list[str]:
95 """Return a human-reader-focused string description of this column as
96 a list of lines.
98 Parameters
99 ----------
100 level : `int`
101 Number of indentation tabs for the first line.
102 tab : `str`
103 Characters to duplicate ``level`` times to form the actual indent.
105 Returns
106 -------
107 lines : `list` [ `str` ]
108 Display lines.
109 """
110 lines = [f"{tab * level}{self.name}: {self.type}"]
111 if self.doc:
112 indent = tab * (level + 1)
113 lines.extend(
114 textwrap.wrap(
115 self.doc,
116 initial_indent=indent,
117 subsequent_indent=indent,
118 )
119 )
120 return lines
122 def __str__(self) -> str:
123 return "\n".join(self.display())
126@final
127class IntColumnSpec(_BaseColumnSpec):
128 """Description of an integer column."""
130 pytype: ClassVar[type] = int
132 type: Literal["int"] = "int"
134 def to_arrow(self) -> arrow_utils.ToArrow:
135 # Docstring inherited.
136 return arrow_utils.ToArrow.for_primitive(self.name, pa.uint64(), nullable=self.nullable)
139@final
140class StringColumnSpec(_BaseColumnSpec):
141 """Description of a string column."""
143 pytype: ClassVar[type] = str
145 type: Literal["string"] = "string"
147 length: int
148 """Maximum length of strings."""
150 def to_sql_spec(self, **kwargs: Any) -> ddl.FieldSpec:
151 # Docstring inherited.
152 return super().to_sql_spec(length=self.length, **kwargs)
154 def to_arrow(self) -> arrow_utils.ToArrow:
155 # Docstring inherited.
156 return arrow_utils.ToArrow.for_primitive(self.name, pa.string(), nullable=self.nullable)
159@final
160class HashColumnSpec(_BaseColumnSpec):
161 """Description of a hash digest."""
163 pytype: ClassVar[type] = bytes
165 type: Literal["hash"] = "hash"
167 nbytes: int
168 """Number of bytes for the hash."""
170 def to_sql_spec(self, **kwargs: Any) -> ddl.FieldSpec:
171 # Docstring inherited.
172 return super().to_sql_spec(nbytes=self.nbytes, **kwargs)
174 def to_arrow(self) -> arrow_utils.ToArrow:
175 # Docstring inherited.
176 return arrow_utils.ToArrow.for_primitive(
177 self.name,
178 # The size for Arrow binary columns is a fixed size, not a maximum
179 # as in SQL, so we use a variable-size column.
180 pa.binary(),
181 nullable=self.nullable,
182 )
185@final
186class FloatColumnSpec(_BaseColumnSpec):
187 """Description of a float column."""
189 pytype: ClassVar[type] = float
191 type: Literal["float"] = "float"
193 def to_arrow(self) -> arrow_utils.ToArrow:
194 # Docstring inherited.
195 assert self.nullable is not None, "nullable=None should be resolved by validators"
196 return arrow_utils.ToArrow.for_primitive(self.name, pa.float64(), nullable=self.nullable)
199@final
200class BoolColumnSpec(_BaseColumnSpec):
201 """Description of a bool column."""
203 pytype: ClassVar[type] = bool
205 type: Literal["bool"] = "bool"
207 def to_arrow(self) -> arrow_utils.ToArrow:
208 # Docstring inherited.
209 return arrow_utils.ToArrow.for_primitive(self.name, pa.bool_(), nullable=self.nullable)
212@final
213class RegionColumnSpec(_BaseColumnSpec):
214 """Description of a region column."""
216 name: str = "region"
218 pytype: ClassVar[type] = Region
220 type: Literal["region"] = "region"
222 nbytes: int = 2048
223 """Number of bytes for the encoded region."""
225 def to_arrow(self) -> arrow_utils.ToArrow:
226 # Docstring inherited.
227 assert self.nullable is not None, "nullable=None should be resolved by validators"
228 return arrow_utils.ToArrow.for_region(self.name, nullable=self.nullable)
231@final
232class TimespanColumnSpec(_BaseColumnSpec):
233 """Description of a timespan column."""
235 name: str = "timespan"
237 pytype: ClassVar[type] = Timespan
239 type: Literal["timespan"] = "timespan"
241 def to_arrow(self) -> arrow_utils.ToArrow:
242 # Docstring inherited.
243 return arrow_utils.ToArrow.for_timespan(self.name, nullable=self.nullable)
246ColumnSpec = Annotated[
247 Union[
248 IntColumnSpec,
249 StringColumnSpec,
250 HashColumnSpec,
251 FloatColumnSpec,
252 BoolColumnSpec,
253 RegionColumnSpec,
254 TimespanColumnSpec,
255 ],
256 pydantic.Field(discriminator="type"),
257]