Coverage for python/lsst/daf/butler/core/ddl.py : 49%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21"""Classes for representing SQL data-definition language (DDL; "CREATE TABLE",
22etc.) in Python.
24This provides an extra layer on top of SQLAlchemy's classes for these concepts,
25because we need a level of indirection between logical tables and the actual
26SQL, and SQLAlchemy's DDL classes always map 1-1 to SQL.
28We've opted for the rather more obscure "ddl" as the name of this module
29instead of "schema" because the latter is too overloaded; in most SQL
30databases, a "schema" is also another term for a namespace.
31"""
32from __future__ import annotations
34__all__ = ("TableSpec", "FieldSpec", "ForeignKeySpec", "Base64Bytes", "Base64Region",
35 "AstropyTimeNsecTai")
37from base64 import b64encode, b64decode
38import logging
39from math import ceil
40from dataclasses import dataclass
41from typing import Optional, Tuple, Sequence, Set
43import sqlalchemy
44import astropy.time
46from lsst.sphgeom import ConvexPolygon
47from .config import Config
48from .exceptions import ValidationError
49from . import time_utils
50from .utils import iterable, stripIfNotNone, NamedValueSet
53_LOG = logging.getLogger(__name__)
56class SchemaValidationError(ValidationError):
57 """Exceptions used to indicate problems in Registry schema configuration.
58 """
60 @classmethod
61 def translate(cls, caught, message):
62 """A decorator that re-raises exceptions as `SchemaValidationError`.
64 Decorated functions must be class or instance methods, with a
65 ``config`` parameter as their first argument. This will be passed
66 to ``message.format()`` as a keyword argument, along with ``err``,
67 the original exception.
69 Parameters
70 ----------
71 caught : `type` (`Exception` subclass)
72 The type of exception to catch.
73 message : `str`
74 A `str.format` string that may contain named placeholders for
75 ``config``, ``err``, or any keyword-only argument accepted by
76 the decorated function.
77 """
78 def decorate(func):
79 def decorated(self, config, *args, **kwds):
80 try:
81 return func(self, config, *args, **kwds)
82 except caught as err:
83 raise cls(message.format(config=str(config), err=err))
84 return decorated
85 return decorate
88class Base64Bytes(sqlalchemy.TypeDecorator):
89 """A SQLAlchemy custom type that maps Python `bytes` to a base64-encoded
90 `sqlalchemy.String`.
91 """
93 impl = sqlalchemy.String
95 def __init__(self, nbytes, *args, **kwds):
96 length = 4*ceil(nbytes/3)
97 super().__init__(*args, length=length, **kwds)
98 self.nbytes = nbytes
100 def process_bind_param(self, value, dialect):
101 # 'value' is native `bytes`. We want to encode that to base64 `bytes`
102 # and then ASCII `str`, because `str` is what SQLAlchemy expects for
103 # String fields.
104 if value is None:
105 return None
106 if not isinstance(value, bytes):
107 raise TypeError(
108 f"Base64Bytes fields require 'bytes' values; got '{value}' with type {type(value)}."
109 )
110 return b64encode(value).decode("ascii")
112 def process_result_value(self, value, dialect):
113 # 'value' is a `str` that must be ASCII because it's base64-encoded.
114 # We want to transform that to base64-encoded `bytes` and then
115 # native `bytes`.
116 return b64decode(value.encode("ascii")) if value is not None else None
119class Base64Region(Base64Bytes):
120 """A SQLAlchemy custom type that maps Python `sphgeom.ConvexPolygon` to a
121 base64-encoded `sqlalchemy.String`.
122 """
124 def process_bind_param(self, value, dialect):
125 if value is None:
126 return None
127 return super().process_bind_param(value.encode(), dialect)
129 def process_result_value(self, value, dialect):
130 if value is None:
131 return None
132 return ConvexPolygon.decode(super().process_result_value(value, dialect))
135class AstropyTimeNsecTai(sqlalchemy.TypeDecorator):
136 """A SQLAlchemy custom type that maps Python `astropy.time.Time` to a
137 number of nanoseconds since Unix epoch in TAI scale.
138 """
140 impl = sqlalchemy.BigInteger
142 def process_bind_param(self, value, dialect):
143 # value is astropy.time.Time or None
144 if value is None:
145 return None
146 if not isinstance(value, astropy.time.Time):
147 raise TypeError(f"Unsupported type: {type(value)}, expected astropy.time.Time")
148 value = time_utils.astropy_to_nsec(value)
149 return value
151 def process_result_value(self, value, dialect):
152 # value is nanoseconds since epoch, or None
153 if value is None:
154 return None
155 value = time_utils.nsec_to_astropy(value)
156 return value
159VALID_CONFIG_COLUMN_TYPES = {
160 "string": sqlalchemy.String,
161 "int": sqlalchemy.Integer,
162 "float": sqlalchemy.Float,
163 "region": Base64Region,
164 "bool": sqlalchemy.Boolean,
165 "blob": sqlalchemy.LargeBinary,
166 "datetime": AstropyTimeNsecTai,
167 "hash": Base64Bytes
168}
171@dataclass
172class FieldSpec:
173 """A struct-like class used to define a column in a logical `Registry`
174 table.
175 """
177 name: str
178 """Name of the column."""
180 dtype: type
181 """Type of the column; usually a `type` subclass provided by SQLAlchemy
182 that defines both a Python type and a corresponding precise SQL type.
183 """
185 length: Optional[int] = None
186 """Length of the type in the database, for variable-length types."""
188 nbytes: Optional[int] = None
189 """Natural length used for hash and encoded-region columns, to be converted
190 into the post-encoding length.
191 """
193 primaryKey: bool = False
194 """Whether this field is (part of) its table's primary key."""
196 autoincrement: bool = False
197 """Whether the database should insert automatically incremented values when
198 no value is provided in an INSERT.
199 """
201 nullable: bool = True
202 """Whether this field is allowed to be NULL."""
204 doc: Optional[str] = None
205 """Documentation for this field."""
207 def __eq__(self, other):
208 return self.name == other.name
210 def __hash__(self):
211 return hash(self.name)
213 @classmethod
214 @SchemaValidationError.translate(KeyError, "Missing key {err} in column config '{config}'.")
215 def fromConfig(cls, config: Config, **kwds) -> FieldSpec:
216 """Create a `FieldSpec` from a subset of a `SchemaConfig`.
218 Parameters
219 ----------
220 config: `Config`
221 Configuration describing the column. Nested configuration keys
222 correspond to `FieldSpec` attributes.
223 kwds
224 Additional keyword arguments that provide defaults for values
225 not present in config.
227 Returns
228 -------
229 spec: `FieldSpec`
230 Specification structure for the column.
232 Raises
233 ------
234 SchemaValidationError
235 Raised if configuration keys are missing or have invalid values.
236 """
237 dtype = VALID_CONFIG_COLUMN_TYPES.get(config["type"])
238 if dtype is None:
239 raise SchemaValidationError(f"Invalid field type string: '{config['type']}'.")
240 if not config["name"].islower():
241 raise SchemaValidationError(f"Column name '{config['name']}' is not all lowercase.")
242 self = cls(name=config["name"], dtype=dtype, **kwds)
243 self.length = config.get("length", self.length)
244 self.nbytes = config.get("nbytes", self.nbytes)
245 if self.length is not None and self.nbytes is not None:
246 raise SchemaValidationError(f"Both length and nbytes provided for field '{self.name}'.")
247 self.primaryKey = config.get("primaryKey", self.primaryKey)
248 self.autoincrement = config.get("autoincrement", self.autoincrement)
249 self.nullable = config.get("nullable", False if self.primaryKey else self.nullable)
250 self.doc = stripIfNotNone(config.get("doc", None))
251 return self
253 def getSizedColumnType(self) -> sqlalchemy.types.TypeEngine:
254 """Return a sized version of the column type, utilizing either (or
255 neither) of ``self.length`` and ``self.nbytes``.
257 Returns
258 -------
259 dtype : `sqlalchemy.types.TypeEngine`
260 A SQLAlchemy column type object.
261 """
262 if self.length is not None:
263 return self.dtype(length=self.length)
264 if self.nbytes is not None:
265 return self.dtype(nbytes=self.nbytes)
266 return self.dtype
269@dataclass
270class ForeignKeySpec:
271 """A struct-like class used to define a foreign key constraint in a logical
272 `Registry` table.
273 """
275 table: str
276 """Name of the target table."""
278 source: Tuple[str, ...]
279 """Tuple of source table column names."""
281 target: Tuple[str, ...]
282 """Tuple of target table column names."""
284 onDelete: Optional[str] = None
285 """SQL clause indicating how to handle deletes to the target table.
287 If not `None` (which indicates that a constraint violation exception should
288 be raised), should be either "SET NULL" or "CASCADE".
289 """
291 @classmethod
292 @SchemaValidationError.translate(KeyError, "Missing key {err} in foreignKey config '{config}'.")
293 def fromConfig(cls, config: Config) -> ForeignKeySpec:
294 """Create a `ForeignKeySpec` from a subset of a `SchemaConfig`.
296 Parameters
297 ----------
298 config: `Config`
299 Configuration describing the constraint. Nested configuration keys
300 correspond to `ForeignKeySpec` attributes.
302 Returns
303 -------
304 spec: `ForeignKeySpec`
305 Specification structure for the constraint.
307 Raises
308 ------
309 SchemaValidationError
310 Raised if configuration keys are missing or have invalid values.
311 """
312 return cls(table=config["table"],
313 source=tuple(iterable(config["source"])),
314 target=tuple(iterable(config["target"])),
315 onDelete=config.get("onDelete", None))
318@dataclass
319class TableSpec:
320 """A struct-like class used to define a table or table-like
321 query interface.
322 """
324 fields: NamedValueSet[FieldSpec]
325 """Specifications for the columns in this table."""
327 unique: Set[Tuple[str, ...]] = frozenset()
328 """Non-primary-key unique constraints for the table."""
330 indexes: Set[Tuple[str, ...]] = frozenset()
331 """Indexes for the table."""
333 foreignKeys: Sequence[ForeignKeySpec] = tuple()
334 """Foreign key constraints for the table."""
336 doc: Optional[str] = None
337 """Documentation for the table."""
339 def __post_init__(self):
340 self.fields = NamedValueSet(self.fields)
341 self.unique = set(self.unique)
342 self.indexes = set(self.indexes)
343 self.foreignKeys = list(self.foreignKeys)
345 @classmethod
346 @SchemaValidationError.translate(KeyError, "Missing key {err} in table config '{config}'.")
347 def fromConfig(cls, config: Config) -> TableSpec:
348 """Create a `ForeignKeySpec` from a subset of a `SchemaConfig`.
350 Parameters
351 ----------
352 config: `Config`
353 Configuration describing the constraint. Nested configuration keys
354 correspond to `TableSpec` attributes.
356 Returns
357 -------
358 spec: `TableSpec`
359 Specification structure for the table.
361 Raises
362 ------
363 SchemaValidationError
364 Raised if configuration keys are missing or have invalid values.
365 """
366 return cls(
367 fields=NamedValueSet(FieldSpec.fromConfig(c) for c in config["columns"]),
368 unique={tuple(u) for u in config.get("unique", ())},
369 foreignKeys=[ForeignKeySpec.fromConfig(c) for c in config.get("foreignKeys", ())],
370 sql=config.get("sql"),
371 doc=stripIfNotNone(config.get("doc")),
372 )