Coverage for python/lsst/daf/butler/core/ddl.py : 49%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21"""Classes for representing SQL data-definition language (DDL) in Python.
23This include "CREATE TABLE" etc.
25This provides an extra layer on top of SQLAlchemy's classes for these concepts,
26because we need a level of indirection between logical tables and the actual
27SQL, and SQLAlchemy's DDL classes always map 1-1 to SQL.
29We've opted for the rather more obscure "ddl" as the name of this module
30instead of "schema" because the latter is too overloaded; in most SQL
31databases, a "schema" is also another term for a namespace.
32"""
33from __future__ import annotations
35__all__ = ("TableSpec", "FieldSpec", "ForeignKeySpec", "Base64Bytes", "Base64Region",
36 "AstropyTimeNsecTai")
38from base64 import b64encode, b64decode
39import logging
40from math import ceil
41from dataclasses import dataclass
42from typing import Any, Callable, Iterable, List, Optional, Set, Tuple, Type, TYPE_CHECKING, Union
44import sqlalchemy
45import astropy.time
47from lsst.sphgeom import Region
48from .config import Config
49from .exceptions import ValidationError
50from . import time_utils
51from .utils import iterable, stripIfNotNone
52from .named import NamedValueSet
54if TYPE_CHECKING: 54 ↛ 55line 54 didn't jump to line 55, because the condition on line 54 was never true
55 from .timespan import TimespanDatabaseRepresentation
58_LOG = logging.getLogger(__name__)
61class SchemaValidationError(ValidationError):
62 """Exceptions that indicate problems in Registry schema configuration."""
64 @classmethod
65 def translate(cls, caught: Type[Exception], message: str) -> Callable:
66 """Return decorator to re-raise exceptions as `SchemaValidationError`.
68 Decorated functions must be class or instance methods, with a
69 ``config`` parameter as their first argument. This will be passed
70 to ``message.format()`` as a keyword argument, along with ``err``,
71 the original exception.
73 Parameters
74 ----------
75 caught : `type` (`Exception` subclass)
76 The type of exception to catch.
77 message : `str`
78 A `str.format` string that may contain named placeholders for
79 ``config``, ``err``, or any keyword-only argument accepted by
80 the decorated function.
81 """
82 def decorate(func: Callable) -> Callable:
83 def decorated(self: Any, config: Config, *args: Any, **kwds: Any) -> Any:
84 try:
85 return func(self, config, *args, **kwds)
86 except caught as err:
87 raise cls(message.format(config=str(config), err=err))
88 return decorated
89 return decorate
92class Base64Bytes(sqlalchemy.TypeDecorator):
93 """A SQLAlchemy custom type for Python `bytes`.
95 Maps Python `bytes` to a base64-encoded `sqlalchemy.Text` field.
96 """
98 impl = sqlalchemy.Text
100 def __init__(self, nbytes: int, *args: Any, **kwargs: Any):
101 length = 4*ceil(nbytes/3) if self.impl == sqlalchemy.String else None
102 super().__init__(*args, length=length, **kwargs)
103 self.nbytes = nbytes
105 def process_bind_param(self, value: Optional[bytes], dialect: sqlalchemy.engine.Dialect
106 ) -> Optional[str]:
107 # 'value' is native `bytes`. We want to encode that to base64 `bytes`
108 # and then ASCII `str`, because `str` is what SQLAlchemy expects for
109 # String fields.
110 if value is None:
111 return None
112 if not isinstance(value, bytes):
113 raise TypeError(
114 f"Base64Bytes fields require 'bytes' values; got '{value}' with type {type(value)}."
115 )
116 return b64encode(value).decode("ascii")
118 def process_result_value(self, value: Optional[str], dialect: sqlalchemy.engine.Dialect
119 ) -> Optional[bytes]:
120 # 'value' is a `str` that must be ASCII because it's base64-encoded.
121 # We want to transform that to base64-encoded `bytes` and then
122 # native `bytes`.
123 return b64decode(value.encode("ascii")) if value is not None else None
126class Base64Region(Base64Bytes):
127 """A SQLAlchemy custom type for Python `sphgeom.Region`.
129 Maps Python `sphgeom.Region` to a base64-encoded `sqlalchemy.String`.
130 """
132 def process_bind_param(self, value: Optional[Region], dialect: sqlalchemy.engine.Dialect
133 ) -> Optional[str]:
134 if value is None:
135 return None
136 return super().process_bind_param(value.encode(), dialect)
138 def process_result_value(self, value: Optional[str], dialect: sqlalchemy.engine.Dialect
139 ) -> Optional[Region]:
140 if value is None:
141 return None
142 return Region.decode(super().process_result_value(value, dialect))
145class AstropyTimeNsecTai(sqlalchemy.TypeDecorator):
146 """A SQLAlchemy custom type for Python `astropy.time.Time`.
148 Maps Python `astropy.time.Time` to a number of nanoseconds since Unix
149 epoch in TAI scale.
150 """
152 impl = sqlalchemy.BigInteger
154 def process_bind_param(self, value: Optional[astropy.time.Time], dialect: sqlalchemy.engine.Dialect
155 ) -> Optional[int]:
156 if value is None:
157 return None
158 if not isinstance(value, astropy.time.Time):
159 raise TypeError(f"Unsupported type: {type(value)}, expected astropy.time.Time")
160 value = time_utils.TimeConverter().astropy_to_nsec(value)
161 return value
163 def process_result_value(self, value: Optional[int], dialect: sqlalchemy.engine.Dialect
164 ) -> Optional[astropy.time.Time]:
165 # value is nanoseconds since epoch, or None
166 if value is None:
167 return None
168 value = time_utils.TimeConverter().nsec_to_astropy(value)
169 return value
172VALID_CONFIG_COLUMN_TYPES = {
173 "string": sqlalchemy.String,
174 "int": sqlalchemy.BigInteger,
175 "float": sqlalchemy.Float,
176 "region": Base64Region,
177 "bool": sqlalchemy.Boolean,
178 "blob": sqlalchemy.LargeBinary,
179 "datetime": AstropyTimeNsecTai,
180 "hash": Base64Bytes
181}
184@dataclass
185class FieldSpec:
186 """A data class for defining a column in a logical `Registry` table."""
188 name: str
189 """Name of the column."""
191 dtype: type
192 """Type of the column; usually a `type` subclass provided by SQLAlchemy
193 that defines both a Python type and a corresponding precise SQL type.
194 """
196 length: Optional[int] = None
197 """Length of the type in the database, for variable-length types."""
199 nbytes: Optional[int] = None
200 """Natural length used for hash and encoded-region columns, to be converted
201 into the post-encoding length.
202 """
204 primaryKey: bool = False
205 """Whether this field is (part of) its table's primary key."""
207 autoincrement: bool = False
208 """Whether the database should insert automatically incremented values when
209 no value is provided in an INSERT.
210 """
212 nullable: bool = True
213 """Whether this field is allowed to be NULL."""
215 default: Any = None
216 """A server-side default value for this field.
218 This is passed directly as the ``server_default`` argument to
219 `sqlalchemy.schema.Column`. It does _not_ go through SQLAlchemy's usual
220 type conversion or quoting for Python literals, and should hence be used
221 with care. See the SQLAlchemy documentation for more information.
222 """
224 doc: Optional[str] = None
225 """Documentation for this field."""
227 def __eq__(self, other: Any) -> bool:
228 if isinstance(other, FieldSpec):
229 return self.name == other.name
230 else:
231 return NotImplemented
233 def __hash__(self) -> int:
234 return hash(self.name)
236 @classmethod
237 @SchemaValidationError.translate(KeyError, "Missing key {err} in column config '{config}'.")
238 def fromConfig(cls, config: Config, **kwds: Any) -> FieldSpec:
239 """Create a `FieldSpec` from a subset of a `SchemaConfig`.
241 Parameters
242 ----------
243 config: `Config`
244 Configuration describing the column. Nested configuration keys
245 correspond to `FieldSpec` attributes.
246 kwds
247 Additional keyword arguments that provide defaults for values
248 not present in config.
250 Returns
251 -------
252 spec: `FieldSpec`
253 Specification structure for the column.
255 Raises
256 ------
257 SchemaValidationError
258 Raised if configuration keys are missing or have invalid values.
259 """
260 dtype = VALID_CONFIG_COLUMN_TYPES.get(config["type"])
261 if dtype is None:
262 raise SchemaValidationError(f"Invalid field type string: '{config['type']}'.")
263 if not config["name"].islower():
264 raise SchemaValidationError(f"Column name '{config['name']}' is not all lowercase.")
265 self = cls(name=config["name"], dtype=dtype, **kwds)
266 self.length = config.get("length", self.length)
267 self.nbytes = config.get("nbytes", self.nbytes)
268 if self.length is not None and self.nbytes is not None:
269 raise SchemaValidationError(f"Both length and nbytes provided for field '{self.name}'.")
270 self.primaryKey = config.get("primaryKey", self.primaryKey)
271 self.autoincrement = config.get("autoincrement", self.autoincrement)
272 self.nullable = config.get("nullable", False if self.primaryKey else self.nullable)
273 self.doc = stripIfNotNone(config.get("doc", None))
274 return self
276 def isStringType(self) -> bool:
277 """Indicate that this is a sqlalchemy.String field spec.
279 Returns
280 -------
281 isString : `bool`
282 The field refers to a `sqlalchemy.String` and not any other type.
283 This can return `False` even if the object was created with a
284 string type if it has been decided that it should be implemented
285 as a `sqlalchemy.Text` type.
286 """
287 if self.dtype == sqlalchemy.String:
288 # For short strings retain them as strings
289 if self.dtype == sqlalchemy.String and self.length and self.length <= 32:
290 return True
291 return False
293 def getSizedColumnType(self) -> sqlalchemy.types.TypeEngine:
294 """Return a sized version of the column type.
296 Utilizes either (or neither) of ``self.length`` and ``self.nbytes``.
298 Returns
299 -------
300 dtype : `sqlalchemy.types.TypeEngine`
301 A SQLAlchemy column type object.
302 """
303 if self.length is not None:
304 # Last chance check that we are only looking at possible String
305 if self.dtype == sqlalchemy.String and not self.isStringType():
306 return sqlalchemy.Text
307 return self.dtype(length=self.length)
308 if self.nbytes is not None:
309 return self.dtype(nbytes=self.nbytes)
310 return self.dtype
312 def getPythonType(self) -> type:
313 """Return the Python type associated with this field's (SQL) dtype.
315 Returns
316 -------
317 type : `type`
318 Python type associated with this field's (SQL) `dtype`.
319 """
320 return self.dtype().python_type
323@dataclass
324class ForeignKeySpec:
325 """Definition of a foreign key constraint in a logical `Registry` table."""
327 table: str
328 """Name of the target table."""
330 source: Tuple[str, ...]
331 """Tuple of source table column names."""
333 target: Tuple[str, ...]
334 """Tuple of target table column names."""
336 onDelete: Optional[str] = None
337 """SQL clause indicating how to handle deletes to the target table.
339 If not `None` (which indicates that a constraint violation exception should
340 be raised), should be either "SET NULL" or "CASCADE".
341 """
343 addIndex: bool = True
344 """If `True`, create an index on the columns of this foreign key in the
345 source table.
346 """
348 @classmethod
349 @SchemaValidationError.translate(KeyError, "Missing key {err} in foreignKey config '{config}'.")
350 def fromConfig(cls, config: Config) -> ForeignKeySpec:
351 """Create a `ForeignKeySpec` from a subset of a `SchemaConfig`.
353 Parameters
354 ----------
355 config: `Config`
356 Configuration describing the constraint. Nested configuration keys
357 correspond to `ForeignKeySpec` attributes.
359 Returns
360 -------
361 spec: `ForeignKeySpec`
362 Specification structure for the constraint.
364 Raises
365 ------
366 SchemaValidationError
367 Raised if configuration keys are missing or have invalid values.
368 """
369 return cls(table=config["table"],
370 source=tuple(iterable(config["source"])),
371 target=tuple(iterable(config["target"])),
372 onDelete=config.get("onDelete", None))
375@dataclass
376class TableSpec:
377 """A data class used to define a table or table-like query interface.
379 Parameters
380 ----------
381 fields : `Iterable` [ `FieldSpec` ]
382 Specifications for the columns in this table.
383 unique : `Iterable` [ `tuple` [ `str` ] ], optional
384 Non-primary-key unique constraints for the table.
385 indexes: `Iterable` [ `tuple` [ `str` ] ], optional
386 Indexes for the table.
387 foreignKeys : `Iterable` [ `ForeignKeySpec` ], optional
388 Foreign key constraints for the table.
389 exclusion : `Iterable` [ `tuple` [ `str` or `type` ] ]
390 Special constraints that prohibit overlaps between timespans over rows
391 where other columns are equal. These take the same form as unique
392 constraints, but each tuple may contain a single
393 `TimespanDatabaseRepresentation` subclass representing a timespan
394 column.
395 recycleIds : `bool`, optional
396 If `True`, allow databases that might normally recycle autoincrement
397 IDs to do so (usually better for performance) on any autoincrement
398 field in this table.
399 doc : `str`, optional
400 Documentation for the table.
401 """
403 def __init__(
404 self, fields: Iterable[FieldSpec], *,
405 unique: Iterable[Tuple[str, ...]] = (),
406 indexes: Iterable[Tuple[str, ...]] = (),
407 foreignKeys: Iterable[ForeignKeySpec] = (),
408 exclusion: Iterable[Tuple[Union[str, Type[TimespanDatabaseRepresentation]], ...]] = (),
409 recycleIds: bool = True,
410 doc: Optional[str] = None,
411 ):
412 self.fields = NamedValueSet(fields)
413 self.unique = set(unique)
414 self.indexes = set(indexes)
415 self.foreignKeys = list(foreignKeys)
416 self.exclusion = set(exclusion)
417 self.recycleIds = recycleIds
418 self.doc = doc
420 fields: NamedValueSet[FieldSpec]
421 """Specifications for the columns in this table."""
423 unique: Set[Tuple[str, ...]]
424 """Non-primary-key unique constraints for the table."""
426 indexes: Set[Tuple[str, ...]]
427 """Indexes for the table."""
429 foreignKeys: List[ForeignKeySpec]
430 """Foreign key constraints for the table."""
432 exclusion: Set[Tuple[Union[str, Type[TimespanDatabaseRepresentation]], ...]]
433 """Exclusion constraints for the table.
435 Exclusion constraints behave mostly like unique constraints, but may
436 contain a database-native Timespan column that is restricted to not overlap
437 across rows (for identical combinations of any non-Timespan columns in the
438 constraint).
439 """
441 recycleIds: bool = True
442 """If `True`, allow databases that might normally recycle autoincrement IDs
443 to do so (usually better for performance) on any autoincrement field in
444 this table.
445 """
447 doc: Optional[str] = None
448 """Documentation for the table."""
450 @classmethod
451 @SchemaValidationError.translate(KeyError, "Missing key {err} in table config '{config}'.")
452 def fromConfig(cls, config: Config) -> TableSpec:
453 """Create a `ForeignKeySpec` from a subset of a `SchemaConfig`.
455 Parameters
456 ----------
457 config: `Config`
458 Configuration describing the constraint. Nested configuration keys
459 correspond to `TableSpec` attributes.
461 Returns
462 -------
463 spec: `TableSpec`
464 Specification structure for the table.
466 Raises
467 ------
468 SchemaValidationError
469 Raised if configuration keys are missing or have invalid values.
470 """
471 return cls(
472 fields=NamedValueSet(FieldSpec.fromConfig(c) for c in config["columns"]),
473 unique={tuple(u) for u in config.get("unique", ())},
474 foreignKeys=[ForeignKeySpec.fromConfig(c) for c in config.get("foreignKeys", ())],
475 doc=stripIfNotNone(config.get("doc")),
476 )