Coverage for python/lsst/daf/butler/column_spec.py: 81%
102 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-03-26 02:48 -0700
« prev ^ index » next coverage.py v7.4.4, created at 2024-03-26 02:48 -0700
1# This file is part of butler4.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = (
31 "ColumnSpec",
32 "IntColumnSpec",
33 "StringColumnSpec",
34 "HashColumnSpec",
35 "FloatColumnSpec",
36 "BoolColumnSpec",
37 "UUIDColumnSpec",
38 "RegionColumnSpec",
39 "TimespanColumnSpec",
40 "ColumnType",
41 "COLLECTION_NAME_MAX_LENGTH",
42)
44import textwrap
45import uuid
46from abc import ABC, abstractmethod
47from typing import Annotated, Any, ClassVar, Literal, TypeAlias, Union, final
49import astropy.time
50import pyarrow as pa
51import pydantic
52from lsst.sphgeom import Region
54from . import arrow_utils, ddl
55from ._timespan import Timespan
57ColumnType: TypeAlias = Literal[
58 "int", "string", "hash", "float", "datetime", "bool", "uuid", "timespan", "region"
59]
62COLLECTION_NAME_MAX_LENGTH = 64
63# TODO: DM-42541 would bee a good opportunity to move this constant to a
64# better home; this file is the least-bad home I can think of for now. Note
65# that actually changing the value is a (minor) schema change.
68class _BaseColumnSpec(pydantic.BaseModel, ABC):
69 """Base class for descriptions of table columns."""
71 name: str = pydantic.Field(description="""Name of the column.""")
73 doc: str = pydantic.Field(default="", description="Documentation for the column.")
75 type: ColumnType
77 nullable: bool = pydantic.Field(
78 default=True,
79 description="Whether the column may be ``NULL``.",
80 )
82 def to_sql_spec(self, **kwargs: Any) -> ddl.FieldSpec:
83 """Convert this specification to a SQL-specific one.
85 Parameters
86 ----------
87 **kwargs
88 Forwarded to `ddl.FieldSpec`.
90 Returns
91 -------
92 sql_spec : `ddl.FieldSpec`
93 A SQL-specific version of this specification.
94 """
95 return ddl.FieldSpec(name=self.name, dtype=ddl.VALID_CONFIG_COLUMN_TYPES[self.type], **kwargs)
97 @abstractmethod
98 def to_arrow(self) -> arrow_utils.ToArrow:
99 """Return an object that converts values of this column to a column in
100 an Arrow table.
102 Returns
103 -------
104 converter : `arrow_utils.ToArrow`
105 A converter object with schema information in Arrow form.
106 """
107 raise NotImplementedError()
109 def display(self, level: int = 0, tab: str = " ") -> list[str]:
110 """Return a human-reader-focused string description of this column as
111 a list of lines.
113 Parameters
114 ----------
115 level : `int`
116 Number of indentation tabs for the first line.
117 tab : `str`
118 Characters to duplicate ``level`` times to form the actual indent.
120 Returns
121 -------
122 lines : `list` [ `str` ]
123 Display lines.
124 """
125 lines = [f"{tab * level}{self.name}: {self.type}"]
126 if self.doc:
127 indent = tab * (level + 1)
128 lines.extend(
129 textwrap.wrap(
130 self.doc,
131 initial_indent=indent,
132 subsequent_indent=indent,
133 )
134 )
135 return lines
137 def __str__(self) -> str:
138 return "\n".join(self.display())
141@final
142class IntColumnSpec(_BaseColumnSpec):
143 """Description of an integer column."""
145 pytype: ClassVar[type] = int
147 type: Literal["int"] = "int"
149 def to_arrow(self) -> arrow_utils.ToArrow:
150 # Docstring inherited.
151 return arrow_utils.ToArrow.for_primitive(self.name, pa.uint64(), nullable=self.nullable)
154@final
155class StringColumnSpec(_BaseColumnSpec):
156 """Description of a string column."""
158 pytype: ClassVar[type] = str
160 type: Literal["string"] = "string"
162 length: int
163 """Maximum length of strings."""
165 def to_sql_spec(self, **kwargs: Any) -> ddl.FieldSpec:
166 # Docstring inherited.
167 return super().to_sql_spec(length=self.length, **kwargs)
169 def to_arrow(self) -> arrow_utils.ToArrow:
170 # Docstring inherited.
171 return arrow_utils.ToArrow.for_primitive(self.name, pa.string(), nullable=self.nullable)
174@final
175class HashColumnSpec(_BaseColumnSpec):
176 """Description of a hash digest."""
178 pytype: ClassVar[type] = bytes
180 type: Literal["hash"] = "hash"
182 nbytes: int
183 """Number of bytes for the hash."""
185 def to_sql_spec(self, **kwargs: Any) -> ddl.FieldSpec:
186 # Docstring inherited.
187 return super().to_sql_spec(nbytes=self.nbytes, **kwargs)
189 def to_arrow(self) -> arrow_utils.ToArrow:
190 # Docstring inherited.
191 return arrow_utils.ToArrow.for_primitive(
192 self.name,
193 # The size for Arrow binary columns is a fixed size, not a maximum
194 # as in SQL, so we use a variable-size column.
195 pa.binary(),
196 nullable=self.nullable,
197 )
200@final
201class FloatColumnSpec(_BaseColumnSpec):
202 """Description of a float column."""
204 pytype: ClassVar[type] = float
206 type: Literal["float"] = "float"
208 def to_arrow(self) -> arrow_utils.ToArrow:
209 # Docstring inherited.
210 assert self.nullable is not None, "nullable=None should be resolved by validators"
211 return arrow_utils.ToArrow.for_primitive(self.name, pa.float64(), nullable=self.nullable)
214@final
215class BoolColumnSpec(_BaseColumnSpec):
216 """Description of a bool column."""
218 pytype: ClassVar[type] = bool
220 type: Literal["bool"] = "bool"
222 def to_arrow(self) -> arrow_utils.ToArrow:
223 # Docstring inherited.
224 return arrow_utils.ToArrow.for_primitive(self.name, pa.bool_(), nullable=self.nullable)
227@final
228class UUIDColumnSpec(_BaseColumnSpec):
229 """Description of a UUID column."""
231 pytype: ClassVar[type] = uuid.UUID
233 type: Literal["uuid"] = "uuid"
235 def to_arrow(self) -> arrow_utils.ToArrow:
236 # Docstring inherited.
237 assert self.nullable is not None, "nullable=None should be resolved by validators"
238 return arrow_utils.ToArrow.for_uuid(self.name, nullable=self.nullable)
241@final
242class RegionColumnSpec(_BaseColumnSpec):
243 """Description of a region column."""
245 name: str = "region"
247 pytype: ClassVar[type] = Region
249 type: Literal["region"] = "region"
251 nbytes: int = 2048
252 """Number of bytes for the encoded region."""
254 def to_arrow(self) -> arrow_utils.ToArrow:
255 # Docstring inherited.
256 assert self.nullable is not None, "nullable=None should be resolved by validators"
257 return arrow_utils.ToArrow.for_region(self.name, nullable=self.nullable)
260@final
261class TimespanColumnSpec(_BaseColumnSpec):
262 """Description of a timespan column."""
264 name: str = "timespan"
266 pytype: ClassVar[type] = Timespan
268 type: Literal["timespan"] = "timespan"
270 def to_arrow(self) -> arrow_utils.ToArrow:
271 # Docstring inherited.
272 return arrow_utils.ToArrow.for_timespan(self.name, nullable=self.nullable)
275@final
276class DateTimeColumnSpec(_BaseColumnSpec):
277 """Description of a time column, stored as integer TAI nanoseconds since
278 1970-01-01 and represented in Python via `astropy.time.Time`.
279 """
281 pytype: ClassVar[type] = astropy.time.Time
283 type: Literal["datetime"] = "datetime"
285 def to_arrow(self) -> arrow_utils.ToArrow:
286 # Docstring inherited.
287 assert self.nullable is not None, "nullable=None should be resolved by validators"
288 return arrow_utils.ToArrow.for_datetime(self.name, nullable=self.nullable)
291ColumnSpec = Annotated[
292 Union[
293 IntColumnSpec,
294 StringColumnSpec,
295 HashColumnSpec,
296 FloatColumnSpec,
297 BoolColumnSpec,
298 UUIDColumnSpec,
299 RegionColumnSpec,
300 TimespanColumnSpec,
301 DateTimeColumnSpec,
302 ],
303 pydantic.Field(discriminator="type"),
304]