Coverage for python/lsst/daf/butler/core/ddl.py : 47%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21"""Classes for representing SQL data-definition language (DDL; "CREATE TABLE",
22etc.) in Python.
24This provides an extra layer on top of SQLAlchemy's classes for these concepts,
25because we need a level of indirection between logical tables and the actual
26SQL, and SQLAlchemy's DDL classes always map 1-1 to SQL.
28We've opted for the rather more obscure "ddl" as the name of this module
29instead of "schema" because the latter is too overloaded; in most SQL
30databases, a "schema" is also another term for a namespace.
31"""
32from __future__ import annotations
34__all__ = ("TableSpec", "FieldSpec", "ForeignKeySpec", "Base64Bytes", "Base64Region",
35 "AstropyTimeNsecTai")
37from base64 import b64encode, b64decode
38import logging
39from math import ceil
40from dataclasses import dataclass
41from typing import Any, Callable, Iterable, List, Optional, Set, Tuple, Type, TYPE_CHECKING, Union
43import sqlalchemy
44import astropy.time
46from lsst.sphgeom import Region
47from .config import Config
48from .exceptions import ValidationError
49from . import time_utils
50from .utils import iterable, stripIfNotNone
51from .named import NamedValueSet
53if TYPE_CHECKING: 53 ↛ 54line 53 didn't jump to line 54, because the condition on line 53 was never true
54 from .timespan import TimespanDatabaseRepresentation
57_LOG = logging.getLogger(__name__)
60class SchemaValidationError(ValidationError):
61 """Exceptions used to indicate problems in Registry schema configuration.
62 """
64 @classmethod
65 def translate(cls, caught: Type[Exception], message: str) -> Callable:
66 """A decorator that re-raises exceptions as `SchemaValidationError`.
68 Decorated functions must be class or instance methods, with a
69 ``config`` parameter as their first argument. This will be passed
70 to ``message.format()`` as a keyword argument, along with ``err``,
71 the original exception.
73 Parameters
74 ----------
75 caught : `type` (`Exception` subclass)
76 The type of exception to catch.
77 message : `str`
78 A `str.format` string that may contain named placeholders for
79 ``config``, ``err``, or any keyword-only argument accepted by
80 the decorated function.
81 """
82 def decorate(func: Callable) -> Callable:
83 def decorated(self: Any, config: Config, *args: Any, **kwds: Any) -> Any:
84 try:
85 return func(self, config, *args, **kwds)
86 except caught as err:
87 raise cls(message.format(config=str(config), err=err))
88 return decorated
89 return decorate
92class Base64Bytes(sqlalchemy.TypeDecorator):
93 """A SQLAlchemy custom type that maps Python `bytes` to a base64-encoded
94 `sqlalchemy.Text` field.
95 """
97 impl = sqlalchemy.Text
99 def __init__(self, nbytes: int, *args: Any, **kwargs: Any):
100 length = 4*ceil(nbytes/3) if self.impl == sqlalchemy.String else None
101 super().__init__(*args, length=length, **kwargs)
102 self.nbytes = nbytes
104 def process_bind_param(self, value: Optional[bytes], dialect: sqlalchemy.engine.Dialect
105 ) -> Optional[str]:
106 # 'value' is native `bytes`. We want to encode that to base64 `bytes`
107 # and then ASCII `str`, because `str` is what SQLAlchemy expects for
108 # String fields.
109 if value is None:
110 return None
111 if not isinstance(value, bytes):
112 raise TypeError(
113 f"Base64Bytes fields require 'bytes' values; got '{value}' with type {type(value)}."
114 )
115 return b64encode(value).decode("ascii")
117 def process_result_value(self, value: Optional[str], dialect: sqlalchemy.engine.Dialect
118 ) -> Optional[bytes]:
119 # 'value' is a `str` that must be ASCII because it's base64-encoded.
120 # We want to transform that to base64-encoded `bytes` and then
121 # native `bytes`.
122 return b64decode(value.encode("ascii")) if value is not None else None
125class Base64Region(Base64Bytes):
126 """A SQLAlchemy custom type that maps Python `sphgeom.Region` to a
127 base64-encoded `sqlalchemy.String`.
128 """
130 def process_bind_param(self, value: Optional[Region], dialect: sqlalchemy.engine.Dialect
131 ) -> Optional[str]:
132 if value is None:
133 return None
134 return super().process_bind_param(value.encode(), dialect)
136 def process_result_value(self, value: Optional[str], dialect: sqlalchemy.engine.Dialect
137 ) -> Optional[Region]:
138 if value is None:
139 return None
140 return Region.decode(super().process_result_value(value, dialect))
143class AstropyTimeNsecTai(sqlalchemy.TypeDecorator):
144 """A SQLAlchemy custom type that maps Python `astropy.time.Time` to a
145 number of nanoseconds since Unix epoch in TAI scale.
146 """
148 impl = sqlalchemy.BigInteger
150 def process_bind_param(self, value: Optional[astropy.time.Time], dialect: sqlalchemy.engine.Dialect
151 ) -> Optional[int]:
152 if value is None:
153 return None
154 if not isinstance(value, astropy.time.Time):
155 raise TypeError(f"Unsupported type: {type(value)}, expected astropy.time.Time")
156 value = time_utils.TimeConverter().astropy_to_nsec(value)
157 return value
159 def process_result_value(self, value: Optional[int], dialect: sqlalchemy.engine.Dialect
160 ) -> Optional[astropy.time.Time]:
161 # value is nanoseconds since epoch, or None
162 if value is None:
163 return None
164 value = time_utils.TimeConverter().nsec_to_astropy(value)
165 return value
168VALID_CONFIG_COLUMN_TYPES = {
169 "string": sqlalchemy.String,
170 "int": sqlalchemy.BigInteger,
171 "float": sqlalchemy.Float,
172 "region": Base64Region,
173 "bool": sqlalchemy.Boolean,
174 "blob": sqlalchemy.LargeBinary,
175 "datetime": AstropyTimeNsecTai,
176 "hash": Base64Bytes
177}
180@dataclass
181class FieldSpec:
182 """A struct-like class used to define a column in a logical `Registry`
183 table.
184 """
186 name: str
187 """Name of the column."""
189 dtype: type
190 """Type of the column; usually a `type` subclass provided by SQLAlchemy
191 that defines both a Python type and a corresponding precise SQL type.
192 """
194 length: Optional[int] = None
195 """Length of the type in the database, for variable-length types."""
197 nbytes: Optional[int] = None
198 """Natural length used for hash and encoded-region columns, to be converted
199 into the post-encoding length.
200 """
202 primaryKey: bool = False
203 """Whether this field is (part of) its table's primary key."""
205 autoincrement: bool = False
206 """Whether the database should insert automatically incremented values when
207 no value is provided in an INSERT.
208 """
210 nullable: bool = True
211 """Whether this field is allowed to be NULL."""
213 default: Any = None
214 """A server-side default value for this field.
216 This is passed directly as the ``server_default`` argument to
217 `sqlalchemy.schema.Column`. It does _not_ go through SQLAlchemy's usual
218 type conversion or quoting for Python literals, and should hence be used
219 with care. See the SQLAlchemy documentation for more information.
220 """
222 doc: Optional[str] = None
223 """Documentation for this field."""
225 def __eq__(self, other: Any) -> bool:
226 if isinstance(other, FieldSpec):
227 return self.name == other.name
228 else:
229 return NotImplemented
231 def __hash__(self) -> int:
232 return hash(self.name)
234 @classmethod
235 @SchemaValidationError.translate(KeyError, "Missing key {err} in column config '{config}'.")
236 def fromConfig(cls, config: Config, **kwds: Any) -> FieldSpec:
237 """Create a `FieldSpec` from a subset of a `SchemaConfig`.
239 Parameters
240 ----------
241 config: `Config`
242 Configuration describing the column. Nested configuration keys
243 correspond to `FieldSpec` attributes.
244 kwds
245 Additional keyword arguments that provide defaults for values
246 not present in config.
248 Returns
249 -------
250 spec: `FieldSpec`
251 Specification structure for the column.
253 Raises
254 ------
255 SchemaValidationError
256 Raised if configuration keys are missing or have invalid values.
257 """
258 dtype = VALID_CONFIG_COLUMN_TYPES.get(config["type"])
259 if dtype is None:
260 raise SchemaValidationError(f"Invalid field type string: '{config['type']}'.")
261 if not config["name"].islower():
262 raise SchemaValidationError(f"Column name '{config['name']}' is not all lowercase.")
263 self = cls(name=config["name"], dtype=dtype, **kwds)
264 self.length = config.get("length", self.length)
265 self.nbytes = config.get("nbytes", self.nbytes)
266 if self.length is not None and self.nbytes is not None:
267 raise SchemaValidationError(f"Both length and nbytes provided for field '{self.name}'.")
268 self.primaryKey = config.get("primaryKey", self.primaryKey)
269 self.autoincrement = config.get("autoincrement", self.autoincrement)
270 self.nullable = config.get("nullable", False if self.primaryKey else self.nullable)
271 self.doc = stripIfNotNone(config.get("doc", None))
272 return self
274 def isStringType(self) -> bool:
275 """Indicate that this is a sqlalchemy.String field spec.
277 Returns
278 -------
279 isString : `bool`
280 The field refers to a `sqlalchemy.String` and not any other type.
281 This can return `False` even if the object was created with a
282 string type if it has been decided that it should be implemented
283 as a `sqlalchemy.Text` type.
284 """
285 if self.dtype == sqlalchemy.String:
286 # For short strings retain them as strings
287 if self.dtype == sqlalchemy.String and self.length and self.length <= 32:
288 return True
289 return False
291 def getSizedColumnType(self) -> sqlalchemy.types.TypeEngine:
292 """Return a sized version of the column type, utilizing either (or
293 neither) of ``self.length`` and ``self.nbytes``.
295 Returns
296 -------
297 dtype : `sqlalchemy.types.TypeEngine`
298 A SQLAlchemy column type object.
299 """
300 if self.length is not None:
301 # Last chance check that we are only looking at possible String
302 if self.dtype == sqlalchemy.String and not self.isStringType():
303 return sqlalchemy.Text
304 return self.dtype(length=self.length)
305 if self.nbytes is not None:
306 return self.dtype(nbytes=self.nbytes)
307 return self.dtype
309 def getPythonType(self) -> type:
310 """Return the Python type associated with this field's (SQL) dtype.
312 Returns
313 -------
314 type : `type`
315 Python type associated with this field's (SQL) `dtype`.
316 """
317 return self.dtype().python_type
320@dataclass
321class ForeignKeySpec:
322 """A struct-like class used to define a foreign key constraint in a logical
323 `Registry` table.
324 """
326 table: str
327 """Name of the target table."""
329 source: Tuple[str, ...]
330 """Tuple of source table column names."""
332 target: Tuple[str, ...]
333 """Tuple of target table column names."""
335 onDelete: Optional[str] = None
336 """SQL clause indicating how to handle deletes to the target table.
338 If not `None` (which indicates that a constraint violation exception should
339 be raised), should be either "SET NULL" or "CASCADE".
340 """
342 addIndex: bool = True
343 """If `True`, create an index on the columns of this foreign key in the
344 source table.
345 """
347 @classmethod
348 @SchemaValidationError.translate(KeyError, "Missing key {err} in foreignKey config '{config}'.")
349 def fromConfig(cls, config: Config) -> ForeignKeySpec:
350 """Create a `ForeignKeySpec` from a subset of a `SchemaConfig`.
352 Parameters
353 ----------
354 config: `Config`
355 Configuration describing the constraint. Nested configuration keys
356 correspond to `ForeignKeySpec` attributes.
358 Returns
359 -------
360 spec: `ForeignKeySpec`
361 Specification structure for the constraint.
363 Raises
364 ------
365 SchemaValidationError
366 Raised if configuration keys are missing or have invalid values.
367 """
368 return cls(table=config["table"],
369 source=tuple(iterable(config["source"])),
370 target=tuple(iterable(config["target"])),
371 onDelete=config.get("onDelete", None))
374@dataclass
375class TableSpec:
376 """A struct-like class used to define a table or table-like
377 query interface.
379 Parameters
380 ----------
381 fields : `Iterable` [ `FieldSpec` ]
382 Specifications for the columns in this table.
383 unique : `Iterable` [ `tuple` [ `str` ] ], optional
384 Non-primary-key unique constraints for the table.
385 indexes: `Iterable` [ `tuple` [ `str` ] ], optional
386 Indexes for the table.
387 foreignKeys : `Iterable` [ `ForeignKeySpec` ], optional
388 Foreign key constraints for the table.
389 exclusion : `Iterable` [ `tuple` [ `str` or `type` ] ]
390 Special constraints that prohibit overlaps between timespans over rows
391 where other columns are equal. These take the same form as unique
392 constraints, but each tuple may contain a single
393 `TimespanDatabaseRepresentation` subclass representing a timespan
394 column.
395 recycleIds : bool, optional
396 If `True`, allow databases that might normally recycle autoincrement
397 IDs to do so (usually better for performance) on any autoincrement
398 field in this table.
399 doc : str, optional
400 Documentation for the table.
401 """
402 def __init__(
403 self, fields: Iterable[FieldSpec], *,
404 unique: Iterable[Tuple[str, ...]] = (),
405 indexes: Iterable[Tuple[str, ...]] = (),
406 foreignKeys: Iterable[ForeignKeySpec] = (),
407 exclusion: Iterable[Tuple[Union[str, Type[TimespanDatabaseRepresentation]], ...]] = (),
408 recycleIds: bool = True,
409 doc: Optional[str] = None,
410 ):
411 self.fields = NamedValueSet(fields)
412 self.unique = set(unique)
413 self.indexes = set(indexes)
414 self.foreignKeys = list(foreignKeys)
415 self.exclusion = set(exclusion)
416 self.recycleIds = recycleIds
417 self.doc = doc
419 fields: NamedValueSet[FieldSpec]
420 """Specifications for the columns in this table."""
422 unique: Set[Tuple[str, ...]]
423 """Non-primary-key unique constraints for the table."""
425 indexes: Set[Tuple[str, ...]]
426 """Indexes for the table."""
428 foreignKeys: List[ForeignKeySpec]
429 """Foreign key constraints for the table."""
431 exclusion: Set[Tuple[Union[str, Type[TimespanDatabaseRepresentation]], ...]]
432 """Exclusion constraints for the table.
434 Exclusion constraints behave mostly like unique constraints, but may
435 contain a database-native Timespan column that is restricted to not overlap
436 across rows (for identical combinations of any non-Timespan columns in the
437 constraint).
438 """
440 recycleIds: bool = True
441 """If `True`, allow databases that might normally recycle autoincrement IDs
442 to do so (usually better for performance) on any autoincrement field in
443 this table.
444 """
446 doc: Optional[str] = None
447 """Documentation for the table."""
449 @classmethod
450 @SchemaValidationError.translate(KeyError, "Missing key {err} in table config '{config}'.")
451 def fromConfig(cls, config: Config) -> TableSpec:
452 """Create a `ForeignKeySpec` from a subset of a `SchemaConfig`.
454 Parameters
455 ----------
456 config: `Config`
457 Configuration describing the constraint. Nested configuration keys
458 correspond to `TableSpec` attributes.
460 Returns
461 -------
462 spec: `TableSpec`
463 Specification structure for the table.
465 Raises
466 ------
467 SchemaValidationError
468 Raised if configuration keys are missing or have invalid values.
469 """
470 return cls(
471 fields=NamedValueSet(FieldSpec.fromConfig(c) for c in config["columns"]),
472 unique={tuple(u) for u in config.get("unique", ())},
473 foreignKeys=[ForeignKeySpec.fromConfig(c) for c in config.get("foreignKeys", ())],
474 doc=stripIfNotNone(config.get("doc")),
475 )