Coverage for python/lsst/daf/butler/column_spec.py: 80%
101 statements
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-13 10:57 +0000
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-13 10:57 +0000
1# This file is part of butler4.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = (
31 "ColumnSpec",
32 "IntColumnSpec",
33 "StringColumnSpec",
34 "HashColumnSpec",
35 "FloatColumnSpec",
36 "BoolColumnSpec",
37 "UUIDColumnSpec",
38 "RegionColumnSpec",
39 "TimespanColumnSpec",
40 "ColumnType",
41)
43import textwrap
44import uuid
45from abc import ABC, abstractmethod
46from typing import Annotated, Any, ClassVar, Literal, TypeAlias, Union, final
48import astropy.time
49import pyarrow as pa
50import pydantic
51from lsst.sphgeom import Region
53from . import arrow_utils, ddl
54from ._timespan import Timespan
56ColumnType: TypeAlias = Literal[
57 "int", "string", "hash", "float", "datetime", "bool", "uuid", "timespan", "region"
58]
61class _BaseColumnSpec(pydantic.BaseModel, ABC):
62 """Base class for descriptions of table columns."""
64 name: str = pydantic.Field(description="""Name of the column.""")
66 doc: str = pydantic.Field(default="", description="Documentation for the column.")
68 type: ColumnType
70 nullable: bool = pydantic.Field(
71 default=True,
72 description="Whether the column may be ``NULL``.",
73 )
75 def to_sql_spec(self, **kwargs: Any) -> ddl.FieldSpec:
76 """Convert this specification to a SQL-specific one.
78 Parameters
79 ----------
80 **kwargs
81 Forwarded to `ddl.FieldSpec`.
83 Returns
84 -------
85 sql_spec : `ddl.FieldSpec`
86 A SQL-specific version of this specification.
87 """
88 return ddl.FieldSpec(name=self.name, dtype=ddl.VALID_CONFIG_COLUMN_TYPES[self.type], **kwargs)
90 @abstractmethod
91 def to_arrow(self) -> arrow_utils.ToArrow:
92 """Return an object that converts values of this column to a column in
93 an Arrow table.
95 Returns
96 -------
97 converter : `arrow_utils.ToArrow`
98 A converter object with schema information in Arrow form.
99 """
100 raise NotImplementedError()
102 def display(self, level: int = 0, tab: str = " ") -> list[str]:
103 """Return a human-reader-focused string description of this column as
104 a list of lines.
106 Parameters
107 ----------
108 level : `int`
109 Number of indentation tabs for the first line.
110 tab : `str`
111 Characters to duplicate ``level`` times to form the actual indent.
113 Returns
114 -------
115 lines : `list` [ `str` ]
116 Display lines.
117 """
118 lines = [f"{tab * level}{self.name}: {self.type}"]
119 if self.doc:
120 indent = tab * (level + 1)
121 lines.extend(
122 textwrap.wrap(
123 self.doc,
124 initial_indent=indent,
125 subsequent_indent=indent,
126 )
127 )
128 return lines
130 def __str__(self) -> str:
131 return "\n".join(self.display())
134@final
135class IntColumnSpec(_BaseColumnSpec):
136 """Description of an integer column."""
138 pytype: ClassVar[type] = int
140 type: Literal["int"] = "int"
142 def to_arrow(self) -> arrow_utils.ToArrow:
143 # Docstring inherited.
144 return arrow_utils.ToArrow.for_primitive(self.name, pa.uint64(), nullable=self.nullable)
147@final
148class StringColumnSpec(_BaseColumnSpec):
149 """Description of a string column."""
151 pytype: ClassVar[type] = str
153 type: Literal["string"] = "string"
155 length: int
156 """Maximum length of strings."""
158 def to_sql_spec(self, **kwargs: Any) -> ddl.FieldSpec:
159 # Docstring inherited.
160 return super().to_sql_spec(length=self.length, **kwargs)
162 def to_arrow(self) -> arrow_utils.ToArrow:
163 # Docstring inherited.
164 return arrow_utils.ToArrow.for_primitive(self.name, pa.string(), nullable=self.nullable)
167@final
168class HashColumnSpec(_BaseColumnSpec):
169 """Description of a hash digest."""
171 pytype: ClassVar[type] = bytes
173 type: Literal["hash"] = "hash"
175 nbytes: int
176 """Number of bytes for the hash."""
178 def to_sql_spec(self, **kwargs: Any) -> ddl.FieldSpec:
179 # Docstring inherited.
180 return super().to_sql_spec(nbytes=self.nbytes, **kwargs)
182 def to_arrow(self) -> arrow_utils.ToArrow:
183 # Docstring inherited.
184 return arrow_utils.ToArrow.for_primitive(
185 self.name,
186 # The size for Arrow binary columns is a fixed size, not a maximum
187 # as in SQL, so we use a variable-size column.
188 pa.binary(),
189 nullable=self.nullable,
190 )
193@final
194class FloatColumnSpec(_BaseColumnSpec):
195 """Description of a float column."""
197 pytype: ClassVar[type] = float
199 type: Literal["float"] = "float"
201 def to_arrow(self) -> arrow_utils.ToArrow:
202 # Docstring inherited.
203 assert self.nullable is not None, "nullable=None should be resolved by validators"
204 return arrow_utils.ToArrow.for_primitive(self.name, pa.float64(), nullable=self.nullable)
207@final
208class BoolColumnSpec(_BaseColumnSpec):
209 """Description of a bool column."""
211 pytype: ClassVar[type] = bool
213 type: Literal["bool"] = "bool"
215 def to_arrow(self) -> arrow_utils.ToArrow:
216 # Docstring inherited.
217 return arrow_utils.ToArrow.for_primitive(self.name, pa.bool_(), nullable=self.nullable)
220@final
221class UUIDColumnSpec(_BaseColumnSpec):
222 """Description of a UUID column."""
224 pytype: ClassVar[type] = uuid.UUID
226 type: Literal["uuid"] = "uuid"
228 def to_arrow(self) -> arrow_utils.ToArrow:
229 # Docstring inherited.
230 assert self.nullable is not None, "nullable=None should be resolved by validators"
231 return arrow_utils.ToArrow.for_uuid(self.name, nullable=self.nullable)
234@final
235class RegionColumnSpec(_BaseColumnSpec):
236 """Description of a region column."""
238 name: str = "region"
240 pytype: ClassVar[type] = Region
242 type: Literal["region"] = "region"
244 nbytes: int = 2048
245 """Number of bytes for the encoded region."""
247 def to_arrow(self) -> arrow_utils.ToArrow:
248 # Docstring inherited.
249 assert self.nullable is not None, "nullable=None should be resolved by validators"
250 return arrow_utils.ToArrow.for_region(self.name, nullable=self.nullable)
253@final
254class TimespanColumnSpec(_BaseColumnSpec):
255 """Description of a timespan column."""
257 name: str = "timespan"
259 pytype: ClassVar[type] = Timespan
261 type: Literal["timespan"] = "timespan"
263 def to_arrow(self) -> arrow_utils.ToArrow:
264 # Docstring inherited.
265 return arrow_utils.ToArrow.for_timespan(self.name, nullable=self.nullable)
268@final
269class DateTimeColumnSpec(_BaseColumnSpec):
270 """Description of a time column, stored as integer TAI nanoseconds since
271 1970-01-01 and represented in Python via `astropy.time.Time`.
272 """
274 pytype: ClassVar[type] = astropy.time.Time
276 type: Literal["datetime"] = "datetime"
278 def to_arrow(self) -> arrow_utils.ToArrow:
279 # Docstring inherited.
280 assert self.nullable is not None, "nullable=None should be resolved by validators"
281 return arrow_utils.ToArrow.for_datetime(self.name, nullable=self.nullable)
284ColumnSpec = Annotated[
285 Union[
286 IntColumnSpec,
287 StringColumnSpec,
288 HashColumnSpec,
289 FloatColumnSpec,
290 BoolColumnSpec,
291 UUIDColumnSpec,
292 RegionColumnSpec,
293 TimespanColumnSpec,
294 DateTimeColumnSpec,
295 ],
296 pydantic.Field(discriminator="type"),
297]