Coverage for python/lsst/daf/butler/core/ddl.py : 49%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21"""Classes for representing SQL data-definition language (DDL; "CREATE TABLE",
22etc.) in Python.
24This provides an extra layer on top of SQLAlchemy's classes for these concepts,
25because we need a level of indirection between logical tables and the actual
26SQL, and SQLAlchemy's DDL classes always map 1-1 to SQL.
28We've opted for the rather more obscure "ddl" as the name of this module
29instead of "schema" because the latter is too overloaded; in most SQL
30databases, a "schema" is also another term for a namespace.
31"""
32from __future__ import annotations
34__all__ = ("TableSpec", "FieldSpec", "ForeignKeySpec", "Base64Bytes", "Base64Region",
35 "AstropyTimeNsecTai")
37from base64 import b64encode, b64decode
38import logging
39from math import ceil
40from dataclasses import dataclass
41from typing import Any, Callable, Iterable, List, Optional, Set, Tuple, Type, Union
43import sqlalchemy
44import astropy.time
46from lsst.sphgeom import ConvexPolygon
47from .config import Config
48from .exceptions import ValidationError
49from . import time_utils
50from .utils import iterable, stripIfNotNone
51from .named import NamedValueSet
54_LOG = logging.getLogger(__name__)
57class SchemaValidationError(ValidationError):
58 """Exceptions used to indicate problems in Registry schema configuration.
59 """
61 @classmethod
62 def translate(cls, caught: Type[Exception], message: str) -> Callable:
63 """A decorator that re-raises exceptions as `SchemaValidationError`.
65 Decorated functions must be class or instance methods, with a
66 ``config`` parameter as their first argument. This will be passed
67 to ``message.format()`` as a keyword argument, along with ``err``,
68 the original exception.
70 Parameters
71 ----------
72 caught : `type` (`Exception` subclass)
73 The type of exception to catch.
74 message : `str`
75 A `str.format` string that may contain named placeholders for
76 ``config``, ``err``, or any keyword-only argument accepted by
77 the decorated function.
78 """
79 def decorate(func: Callable) -> Callable:
80 def decorated(self: Any, config: Config, *args: Any, **kwds: Any) -> Any:
81 try:
82 return func(self, config, *args, **kwds)
83 except caught as err:
84 raise cls(message.format(config=str(config), err=err))
85 return decorated
86 return decorate
89class Base64Bytes(sqlalchemy.TypeDecorator):
90 """A SQLAlchemy custom type that maps Python `bytes` to a base64-encoded
91 `sqlalchemy.String`.
92 """
94 impl = sqlalchemy.String
96 def __init__(self, nbytes: int, *args: Any, **kwargs: Any):
97 length = 4*ceil(nbytes/3)
98 super().__init__(*args, length=length, **kwargs)
99 self.nbytes = nbytes
101 def process_bind_param(self, value: Optional[bytes], dialect: sqlalchemy.engine.Dialect
102 ) -> Optional[str]:
103 # 'value' is native `bytes`. We want to encode that to base64 `bytes`
104 # and then ASCII `str`, because `str` is what SQLAlchemy expects for
105 # String fields.
106 if value is None:
107 return None
108 if not isinstance(value, bytes):
109 raise TypeError(
110 f"Base64Bytes fields require 'bytes' values; got '{value}' with type {type(value)}."
111 )
112 return b64encode(value).decode("ascii")
114 def process_result_value(self, value: Optional[str], dialect: sqlalchemy.engine.Dialect
115 ) -> Optional[bytes]:
116 # 'value' is a `str` that must be ASCII because it's base64-encoded.
117 # We want to transform that to base64-encoded `bytes` and then
118 # native `bytes`.
119 return b64decode(value.encode("ascii")) if value is not None else None
122class Base64Region(Base64Bytes):
123 """A SQLAlchemy custom type that maps Python `sphgeom.ConvexPolygon` to a
124 base64-encoded `sqlalchemy.String`.
125 """
127 def process_bind_param(self, value: Optional[ConvexPolygon], dialect: sqlalchemy.engine.Dialect
128 ) -> Optional[str]:
129 if value is None:
130 return None
131 return super().process_bind_param(value.encode(), dialect)
133 def process_result_value(self, value: Optional[str], dialect: sqlalchemy.engine.Dialect
134 ) -> Optional[ConvexPolygon]:
135 if value is None:
136 return None
137 return ConvexPolygon.decode(super().process_result_value(value, dialect))
140class AstropyTimeNsecTai(sqlalchemy.TypeDecorator):
141 """A SQLAlchemy custom type that maps Python `astropy.time.Time` to a
142 number of nanoseconds since Unix epoch in TAI scale.
143 """
145 impl = sqlalchemy.BigInteger
147 def process_bind_param(self, value: Optional[astropy.time.Time], dialect: sqlalchemy.engine.Dialect
148 ) -> Optional[int]:
149 if value is None:
150 return None
151 if not isinstance(value, astropy.time.Time):
152 raise TypeError(f"Unsupported type: {type(value)}, expected astropy.time.Time")
153 value = time_utils.astropy_to_nsec(value)
154 return value
156 def process_result_value(self, value: Optional[int], dialect: sqlalchemy.engine.Dialect
157 ) -> Optional[astropy.time.Time]:
158 # value is nanoseconds since epoch, or None
159 if value is None:
160 return None
161 value = time_utils.nsec_to_astropy(value)
162 return value
165VALID_CONFIG_COLUMN_TYPES = {
166 "string": sqlalchemy.String,
167 "int": sqlalchemy.Integer,
168 "float": sqlalchemy.Float,
169 "region": Base64Region,
170 "bool": sqlalchemy.Boolean,
171 "blob": sqlalchemy.LargeBinary,
172 "datetime": AstropyTimeNsecTai,
173 "hash": Base64Bytes
174}
177@dataclass
178class FieldSpec:
179 """A struct-like class used to define a column in a logical `Registry`
180 table.
181 """
183 name: str
184 """Name of the column."""
186 dtype: type
187 """Type of the column; usually a `type` subclass provided by SQLAlchemy
188 that defines both a Python type and a corresponding precise SQL type.
189 """
191 length: Optional[int] = None
192 """Length of the type in the database, for variable-length types."""
194 nbytes: Optional[int] = None
195 """Natural length used for hash and encoded-region columns, to be converted
196 into the post-encoding length.
197 """
199 primaryKey: bool = False
200 """Whether this field is (part of) its table's primary key."""
202 autoincrement: bool = False
203 """Whether the database should insert automatically incremented values when
204 no value is provided in an INSERT.
205 """
207 nullable: bool = True
208 """Whether this field is allowed to be NULL."""
210 doc: Optional[str] = None
211 """Documentation for this field."""
213 def __eq__(self, other: Any) -> Union[bool, NotImplemented]:
214 if isinstance(other, FieldSpec):
215 return self.name == other.name
216 else:
217 return NotImplemented
219 def __hash__(self) -> int:
220 return hash(self.name)
222 @classmethod
223 @SchemaValidationError.translate(KeyError, "Missing key {err} in column config '{config}'.")
224 def fromConfig(cls, config: Config, **kwds: Any) -> FieldSpec:
225 """Create a `FieldSpec` from a subset of a `SchemaConfig`.
227 Parameters
228 ----------
229 config: `Config`
230 Configuration describing the column. Nested configuration keys
231 correspond to `FieldSpec` attributes.
232 kwds
233 Additional keyword arguments that provide defaults for values
234 not present in config.
236 Returns
237 -------
238 spec: `FieldSpec`
239 Specification structure for the column.
241 Raises
242 ------
243 SchemaValidationError
244 Raised if configuration keys are missing or have invalid values.
245 """
246 dtype = VALID_CONFIG_COLUMN_TYPES.get(config["type"])
247 if dtype is None:
248 raise SchemaValidationError(f"Invalid field type string: '{config['type']}'.")
249 if not config["name"].islower():
250 raise SchemaValidationError(f"Column name '{config['name']}' is not all lowercase.")
251 self = cls(name=config["name"], dtype=dtype, **kwds)
252 self.length = config.get("length", self.length)
253 self.nbytes = config.get("nbytes", self.nbytes)
254 if self.length is not None and self.nbytes is not None:
255 raise SchemaValidationError(f"Both length and nbytes provided for field '{self.name}'.")
256 self.primaryKey = config.get("primaryKey", self.primaryKey)
257 self.autoincrement = config.get("autoincrement", self.autoincrement)
258 self.nullable = config.get("nullable", False if self.primaryKey else self.nullable)
259 self.doc = stripIfNotNone(config.get("doc", None))
260 return self
262 def getSizedColumnType(self) -> sqlalchemy.types.TypeEngine:
263 """Return a sized version of the column type, utilizing either (or
264 neither) of ``self.length`` and ``self.nbytes``.
266 Returns
267 -------
268 dtype : `sqlalchemy.types.TypeEngine`
269 A SQLAlchemy column type object.
270 """
271 if self.length is not None:
272 return self.dtype(length=self.length)
273 if self.nbytes is not None:
274 return self.dtype(nbytes=self.nbytes)
275 return self.dtype
278@dataclass
279class ForeignKeySpec:
280 """A struct-like class used to define a foreign key constraint in a logical
281 `Registry` table.
282 """
284 table: str
285 """Name of the target table."""
287 source: Tuple[str, ...]
288 """Tuple of source table column names."""
290 target: Tuple[str, ...]
291 """Tuple of target table column names."""
293 onDelete: Optional[str] = None
294 """SQL clause indicating how to handle deletes to the target table.
296 If not `None` (which indicates that a constraint violation exception should
297 be raised), should be either "SET NULL" or "CASCADE".
298 """
300 addIndex: bool = True
301 """If `True`, create an index on the columns of this foreign key in the
302 source table.
303 """
305 @classmethod
306 @SchemaValidationError.translate(KeyError, "Missing key {err} in foreignKey config '{config}'.")
307 def fromConfig(cls, config: Config) -> ForeignKeySpec:
308 """Create a `ForeignKeySpec` from a subset of a `SchemaConfig`.
310 Parameters
311 ----------
312 config: `Config`
313 Configuration describing the constraint. Nested configuration keys
314 correspond to `ForeignKeySpec` attributes.
316 Returns
317 -------
318 spec: `ForeignKeySpec`
319 Specification structure for the constraint.
321 Raises
322 ------
323 SchemaValidationError
324 Raised if configuration keys are missing or have invalid values.
325 """
326 return cls(table=config["table"],
327 source=tuple(iterable(config["source"])),
328 target=tuple(iterable(config["target"])),
329 onDelete=config.get("onDelete", None))
332@dataclass
333class TableSpec:
334 """A struct-like class used to define a table or table-like
335 query interface.
337 Parameters
338 ----------
339 fields : `Iterable` [ `FieldSpec` ]
340 Specifications for the columns in this table.
341 unique : `Iterable` [ `tuple` [ `str` ] ], optional
342 Non-primary-key unique constraints for the table.
343 indexes: `Iterable` [ `tuple` [ `str` ] ], optional
344 Indexes for the table.
345 foreignKeys : `Iterable` [ `ForeignKeySpec` ], optional
346 Foreign key constraints for the table.
347 recycleIds : bool, optional
348 If `True`, allow databases that might normally recycle autoincrement
349 IDs to do so (usually better for performance) on any autoincrement
350 field in this table.
351 doc : str, optional
352 Documentation for the table.
353 """
354 def __init__(
355 self, fields: Iterable[FieldSpec], *,
356 unique: Iterable[Tuple[str, ...]] = (),
357 indexes: Iterable[Tuple[str, ...]] = (),
358 foreignKeys: Iterable[ForeignKeySpec] = (),
359 recycleIds: bool = True,
360 doc: Optional[str] = None,
361 ):
362 self.fields = NamedValueSet(fields)
363 self.unique = set(unique)
364 self.indexes = set(indexes)
365 self.foreignKeys = list(foreignKeys)
366 self.recycleIds = recycleIds
367 self.doc = doc
369 fields: NamedValueSet[FieldSpec]
370 """Specifications for the columns in this table."""
372 unique: Set[Tuple[str, ...]]
373 """Non-primary-key unique constraints for the table."""
375 indexes: Set[Tuple[str, ...]]
376 """Indexes for the table."""
378 foreignKeys: List[ForeignKeySpec]
379 """Foreign key constraints for the table."""
381 recycleIds: bool = True
382 """If `True`, allow databases that might normally recycle autoincrement IDs
383 to do so (usually better for performance) on any autoincrement field in
384 this table.
385 """
387 doc: Optional[str] = None
388 """Documentation for the table."""
390 @classmethod
391 @SchemaValidationError.translate(KeyError, "Missing key {err} in table config '{config}'.")
392 def fromConfig(cls, config: Config) -> TableSpec:
393 """Create a `ForeignKeySpec` from a subset of a `SchemaConfig`.
395 Parameters
396 ----------
397 config: `Config`
398 Configuration describing the constraint. Nested configuration keys
399 correspond to `TableSpec` attributes.
401 Returns
402 -------
403 spec: `TableSpec`
404 Specification structure for the table.
406 Raises
407 ------
408 SchemaValidationError
409 Raised if configuration keys are missing or have invalid values.
410 """
411 return cls(
412 fields=NamedValueSet(FieldSpec.fromConfig(c) for c in config["columns"]),
413 unique={tuple(u) for u in config.get("unique", ())},
414 foreignKeys=[ForeignKeySpec.fromConfig(c) for c in config.get("foreignKeys", ())],
415 doc=stripIfNotNone(config.get("doc")),
416 )