Coverage for python/lsst/daf/butler/core/ddl.py: 46%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21"""Classes for representing SQL data-definition language (DDL) in Python.
23This include "CREATE TABLE" etc.
25This provides an extra layer on top of SQLAlchemy's classes for these concepts,
26because we need a level of indirection between logical tables and the actual
27SQL, and SQLAlchemy's DDL classes always map 1-1 to SQL.
29We've opted for the rather more obscure "ddl" as the name of this module
30instead of "schema" because the latter is too overloaded; in most SQL
31databases, a "schema" is also another term for a namespace.
32"""
33from __future__ import annotations
35from lsst import sphgeom
37__all__ = ("TableSpec", "FieldSpec", "ForeignKeySpec", "Base64Bytes", "Base64Region",
38 "AstropyTimeNsecTai", "GUID")
40from base64 import b64encode, b64decode
41import logging
42from math import ceil
43from dataclasses import dataclass
44from typing import Any, Callable, Iterable, List, Optional, Set, Tuple, Type, TYPE_CHECKING, Union
45import uuid
47import sqlalchemy
48from sqlalchemy.dialects.postgresql import UUID
49import astropy.time
51from lsst.utils.iteration import ensure_iterable
52from lsst.sphgeom import Region
53from .config import Config
54from .exceptions import ValidationError
55from . import time_utils
56from .utils import stripIfNotNone
57from .named import NamedValueSet
59if TYPE_CHECKING: 59 ↛ 60line 59 didn't jump to line 60, because the condition on line 59 was never true
60 from .timespan import TimespanDatabaseRepresentation
63_LOG = logging.getLogger(__name__)
66class SchemaValidationError(ValidationError):
67 """Exceptions that indicate problems in Registry schema configuration."""
69 @classmethod
70 def translate(cls, caught: Type[Exception], message: str) -> Callable:
71 """Return decorator to re-raise exceptions as `SchemaValidationError`.
73 Decorated functions must be class or instance methods, with a
74 ``config`` parameter as their first argument. This will be passed
75 to ``message.format()`` as a keyword argument, along with ``err``,
76 the original exception.
78 Parameters
79 ----------
80 caught : `type` (`Exception` subclass)
81 The type of exception to catch.
82 message : `str`
83 A `str.format` string that may contain named placeholders for
84 ``config``, ``err``, or any keyword-only argument accepted by
85 the decorated function.
86 """
87 def decorate(func: Callable) -> Callable:
88 def decorated(self: Any, config: Config, *args: Any, **kwargs: Any) -> Any:
89 try:
90 return func(self, config, *args, **kwargs)
91 except caught as err:
92 raise cls(message.format(config=str(config), err=err))
93 return decorated
94 return decorate
97class Base64Bytes(sqlalchemy.TypeDecorator):
98 """A SQLAlchemy custom type for Python `bytes`.
100 Maps Python `bytes` to a base64-encoded `sqlalchemy.Text` field.
101 """
103 impl = sqlalchemy.Text
105 cache_ok = True
107 def __init__(self, nbytes: int, *args: Any, **kwargs: Any):
108 length = 4*ceil(nbytes/3) if self.impl == sqlalchemy.String else None
109 super().__init__(*args, length=length, **kwargs)
110 self.nbytes = nbytes
112 def process_bind_param(self, value: Optional[bytes], dialect: sqlalchemy.engine.Dialect
113 ) -> Optional[str]:
114 # 'value' is native `bytes`. We want to encode that to base64 `bytes`
115 # and then ASCII `str`, because `str` is what SQLAlchemy expects for
116 # String fields.
117 if value is None:
118 return None
119 if not isinstance(value, bytes):
120 raise TypeError(
121 f"Base64Bytes fields require 'bytes' values; got '{value}' with type {type(value)}."
122 )
123 return b64encode(value).decode("ascii")
125 def process_result_value(self, value: Optional[str], dialect: sqlalchemy.engine.Dialect
126 ) -> Optional[bytes]:
127 # 'value' is a `str` that must be ASCII because it's base64-encoded.
128 # We want to transform that to base64-encoded `bytes` and then
129 # native `bytes`.
130 return b64decode(value.encode("ascii")) if value is not None else None
132 @property
133 def python_type(self) -> Type[bytes]:
134 return bytes
137# create an alias, for use below to disambiguate between the built in
138# sqlachemy type
139LocalBase64Bytes = Base64Bytes
142class Base64Region(Base64Bytes):
143 """A SQLAlchemy custom type for Python `sphgeom.Region`.
145 Maps Python `sphgeom.Region` to a base64-encoded `sqlalchemy.String`.
146 """
148 def process_bind_param(self, value: Optional[Region], dialect: sqlalchemy.engine.Dialect
149 ) -> Optional[str]:
150 if value is None:
151 return None
152 return super().process_bind_param(value.encode(), dialect)
154 def process_result_value(self, value: Optional[str], dialect: sqlalchemy.engine.Dialect
155 ) -> Optional[Region]:
156 if value is None:
157 return None
158 return Region.decode(super().process_result_value(value, dialect))
160 @property
161 def python_type(self) -> Type[sphgeom.Region]:
162 return sphgeom.Region
165class AstropyTimeNsecTai(sqlalchemy.TypeDecorator):
166 """A SQLAlchemy custom type for Python `astropy.time.Time`.
168 Maps Python `astropy.time.Time` to a number of nanoseconds since Unix
169 epoch in TAI scale.
170 """
172 impl = sqlalchemy.BigInteger
174 cache_ok = True
176 def process_bind_param(self, value: Optional[astropy.time.Time], dialect: sqlalchemy.engine.Dialect
177 ) -> Optional[int]:
178 if value is None:
179 return None
180 if not isinstance(value, astropy.time.Time):
181 raise TypeError(f"Unsupported type: {type(value)}, expected astropy.time.Time")
182 value = time_utils.TimeConverter().astropy_to_nsec(value)
183 return value
185 def process_result_value(self, value: Optional[int], dialect: sqlalchemy.engine.Dialect
186 ) -> Optional[astropy.time.Time]:
187 # value is nanoseconds since epoch, or None
188 if value is None:
189 return None
190 value = time_utils.TimeConverter().nsec_to_astropy(value)
191 return value
194class GUID(sqlalchemy.TypeDecorator):
195 """Platform-independent GUID type.
197 Uses PostgreSQL's UUID type, otherwise uses CHAR(32), storing as
198 stringified hex values.
199 """
201 impl = sqlalchemy.CHAR
203 cache_ok = True
205 def load_dialect_impl(self, dialect: sqlalchemy.Dialect) -> sqlalchemy.TypeEngine:
206 if dialect.name == 'postgresql':
207 return dialect.type_descriptor(UUID())
208 else:
209 return dialect.type_descriptor(sqlalchemy.CHAR(32))
211 def process_bind_param(self, value: Any, dialect: sqlalchemy.Dialect) -> Optional[str]:
212 if value is None:
213 return value
215 # Coerce input to UUID type, in general having UUID on input is the
216 # only thing that we want but there is code right now that uses ints.
217 if isinstance(value, int):
218 value = uuid.UUID(int=value)
219 elif isinstance(value, bytes):
220 value = uuid.UUID(bytes=value)
221 elif isinstance(value, str):
222 # hexstring
223 value = uuid.UUID(hex=value)
224 elif not isinstance(value, uuid.UUID):
225 raise TypeError(f"Unexpected type of a bind value: {type(value)}")
227 if dialect.name == 'postgresql':
228 return str(value)
229 else:
230 return "%.32x" % value.int
232 def process_result_value(self, value: Optional[str], dialect: sqlalchemy.Dialect) -> Optional[uuid.UUID]:
233 if value is None:
234 return value
235 else:
236 return uuid.UUID(hex=value)
239VALID_CONFIG_COLUMN_TYPES = {
240 "string": sqlalchemy.String,
241 "int": sqlalchemy.BigInteger,
242 "float": sqlalchemy.Float,
243 "region": Base64Region,
244 "bool": sqlalchemy.Boolean,
245 "blob": sqlalchemy.LargeBinary,
246 "datetime": AstropyTimeNsecTai,
247 "hash": Base64Bytes,
248 "uuid": GUID,
249}
252@dataclass
253class FieldSpec:
254 """A data class for defining a column in a logical `Registry` table."""
256 name: str
257 """Name of the column."""
259 dtype: type
260 """Type of the column; usually a `type` subclass provided by SQLAlchemy
261 that defines both a Python type and a corresponding precise SQL type.
262 """
264 length: Optional[int] = None
265 """Length of the type in the database, for variable-length types."""
267 nbytes: Optional[int] = None
268 """Natural length used for hash and encoded-region columns, to be converted
269 into the post-encoding length.
270 """
272 primaryKey: bool = False
273 """Whether this field is (part of) its table's primary key."""
275 autoincrement: bool = False
276 """Whether the database should insert automatically incremented values when
277 no value is provided in an INSERT.
278 """
280 nullable: bool = True
281 """Whether this field is allowed to be NULL."""
283 default: Any = None
284 """A server-side default value for this field.
286 This is passed directly as the ``server_default`` argument to
287 `sqlalchemy.schema.Column`. It does _not_ go through SQLAlchemy's usual
288 type conversion or quoting for Python literals, and should hence be used
289 with care. See the SQLAlchemy documentation for more information.
290 """
292 doc: Optional[str] = None
293 """Documentation for this field."""
295 def __eq__(self, other: Any) -> bool:
296 if isinstance(other, FieldSpec):
297 return self.name == other.name
298 else:
299 return NotImplemented
301 def __hash__(self) -> int:
302 return hash(self.name)
304 @classmethod
305 @SchemaValidationError.translate(KeyError, "Missing key {err} in column config '{config}'.")
306 def fromConfig(cls, config: Config, **kwargs: Any) -> FieldSpec:
307 """Create a `FieldSpec` from a subset of a `SchemaConfig`.
309 Parameters
310 ----------
311 config: `Config`
312 Configuration describing the column. Nested configuration keys
313 correspond to `FieldSpec` attributes.
314 **kwargs
315 Additional keyword arguments that provide defaults for values
316 not present in config.
318 Returns
319 -------
320 spec: `FieldSpec`
321 Specification structure for the column.
323 Raises
324 ------
325 SchemaValidationError
326 Raised if configuration keys are missing or have invalid values.
327 """
328 dtype = VALID_CONFIG_COLUMN_TYPES.get(config["type"])
329 if dtype is None:
330 raise SchemaValidationError(f"Invalid field type string: '{config['type']}'.")
331 if not config["name"].islower():
332 raise SchemaValidationError(f"Column name '{config['name']}' is not all lowercase.")
333 self = cls(name=config["name"], dtype=dtype, **kwargs)
334 self.length = config.get("length", self.length)
335 self.nbytes = config.get("nbytes", self.nbytes)
336 if self.length is not None and self.nbytes is not None:
337 raise SchemaValidationError(f"Both length and nbytes provided for field '{self.name}'.")
338 self.primaryKey = config.get("primaryKey", self.primaryKey)
339 self.autoincrement = config.get("autoincrement", self.autoincrement)
340 self.nullable = config.get("nullable", False if self.primaryKey else self.nullable)
341 self.doc = stripIfNotNone(config.get("doc", None))
342 return self
344 def isStringType(self) -> bool:
345 """Indicate that this is a sqlalchemy.String field spec.
347 Returns
348 -------
349 isString : `bool`
350 The field refers to a `sqlalchemy.String` and not any other type.
351 This can return `False` even if the object was created with a
352 string type if it has been decided that it should be implemented
353 as a `sqlalchemy.Text` type.
354 """
355 if self.dtype == sqlalchemy.String:
356 # For short strings retain them as strings
357 if self.dtype == sqlalchemy.String and self.length and self.length <= 32:
358 return True
359 return False
361 def getSizedColumnType(self) -> sqlalchemy.types.TypeEngine:
362 """Return a sized version of the column type.
364 Utilizes either (or neither) of ``self.length`` and ``self.nbytes``.
366 Returns
367 -------
368 dtype : `sqlalchemy.types.TypeEngine`
369 A SQLAlchemy column type object.
370 """
371 if self.length is not None:
372 # Last chance check that we are only looking at possible String
373 if self.dtype == sqlalchemy.String and not self.isStringType():
374 return sqlalchemy.Text
375 return self.dtype(length=self.length)
376 if self.nbytes is not None:
377 return self.dtype(nbytes=self.nbytes)
378 return self.dtype
380 def getPythonType(self) -> type:
381 """Return the Python type associated with this field's (SQL) dtype.
383 Returns
384 -------
385 type : `type`
386 Python type associated with this field's (SQL) `dtype`.
387 """
388 # to construct these objects, nbytes keyword is needed
389 if issubclass(self.dtype, LocalBase64Bytes):
390 # satisfy mypy for something that must be true
391 assert self.nbytes is not None
392 return self.dtype(nbytes=self.nbytes).python_type
393 else:
394 return self.dtype().python_type # type: ignore
397@dataclass
398class ForeignKeySpec:
399 """Definition of a foreign key constraint in a logical `Registry` table."""
401 table: str
402 """Name of the target table."""
404 source: Tuple[str, ...]
405 """Tuple of source table column names."""
407 target: Tuple[str, ...]
408 """Tuple of target table column names."""
410 onDelete: Optional[str] = None
411 """SQL clause indicating how to handle deletes to the target table.
413 If not `None` (which indicates that a constraint violation exception should
414 be raised), should be either "SET NULL" or "CASCADE".
415 """
417 addIndex: bool = True
418 """If `True`, create an index on the columns of this foreign key in the
419 source table.
420 """
422 @classmethod
423 @SchemaValidationError.translate(KeyError, "Missing key {err} in foreignKey config '{config}'.")
424 def fromConfig(cls, config: Config) -> ForeignKeySpec:
425 """Create a `ForeignKeySpec` from a subset of a `SchemaConfig`.
427 Parameters
428 ----------
429 config: `Config`
430 Configuration describing the constraint. Nested configuration keys
431 correspond to `ForeignKeySpec` attributes.
433 Returns
434 -------
435 spec: `ForeignKeySpec`
436 Specification structure for the constraint.
438 Raises
439 ------
440 SchemaValidationError
441 Raised if configuration keys are missing or have invalid values.
442 """
443 return cls(table=config["table"],
444 source=tuple(ensure_iterable(config["source"])),
445 target=tuple(ensure_iterable(config["target"])),
446 onDelete=config.get("onDelete", None))
449@dataclass
450class TableSpec:
451 """A data class used to define a table or table-like query interface.
453 Parameters
454 ----------
455 fields : `Iterable` [ `FieldSpec` ]
456 Specifications for the columns in this table.
457 unique : `Iterable` [ `tuple` [ `str` ] ], optional
458 Non-primary-key unique constraints for the table.
459 indexes: `Iterable` [ `tuple` [ `str` ] ], optional
460 Indexes for the table.
461 foreignKeys : `Iterable` [ `ForeignKeySpec` ], optional
462 Foreign key constraints for the table.
463 exclusion : `Iterable` [ `tuple` [ `str` or `type` ] ]
464 Special constraints that prohibit overlaps between timespans over rows
465 where other columns are equal. These take the same form as unique
466 constraints, but each tuple may contain a single
467 `TimespanDatabaseRepresentation` subclass representing a timespan
468 column.
469 recycleIds : `bool`, optional
470 If `True`, allow databases that might normally recycle autoincrement
471 IDs to do so (usually better for performance) on any autoincrement
472 field in this table.
473 doc : `str`, optional
474 Documentation for the table.
475 """
477 def __init__(
478 self, fields: Iterable[FieldSpec], *,
479 unique: Iterable[Tuple[str, ...]] = (),
480 indexes: Iterable[Tuple[str, ...]] = (),
481 foreignKeys: Iterable[ForeignKeySpec] = (),
482 exclusion: Iterable[Tuple[Union[str, Type[TimespanDatabaseRepresentation]], ...]] = (),
483 recycleIds: bool = True,
484 doc: Optional[str] = None,
485 ):
486 self.fields = NamedValueSet(fields)
487 self.unique = set(unique)
488 self.indexes = set(indexes)
489 self.foreignKeys = list(foreignKeys)
490 self.exclusion = set(exclusion)
491 self.recycleIds = recycleIds
492 self.doc = doc
494 fields: NamedValueSet[FieldSpec]
495 """Specifications for the columns in this table."""
497 unique: Set[Tuple[str, ...]]
498 """Non-primary-key unique constraints for the table."""
500 indexes: Set[Tuple[str, ...]]
501 """Indexes for the table."""
503 foreignKeys: List[ForeignKeySpec]
504 """Foreign key constraints for the table."""
506 exclusion: Set[Tuple[Union[str, Type[TimespanDatabaseRepresentation]], ...]]
507 """Exclusion constraints for the table.
509 Exclusion constraints behave mostly like unique constraints, but may
510 contain a database-native Timespan column that is restricted to not overlap
511 across rows (for identical combinations of any non-Timespan columns in the
512 constraint).
513 """
515 recycleIds: bool = True
516 """If `True`, allow databases that might normally recycle autoincrement IDs
517 to do so (usually better for performance) on any autoincrement field in
518 this table.
519 """
521 doc: Optional[str] = None
522 """Documentation for the table."""
524 @classmethod
525 @SchemaValidationError.translate(KeyError, "Missing key {err} in table config '{config}'.")
526 def fromConfig(cls, config: Config) -> TableSpec:
527 """Create a `ForeignKeySpec` from a subset of a `SchemaConfig`.
529 Parameters
530 ----------
531 config: `Config`
532 Configuration describing the constraint. Nested configuration keys
533 correspond to `TableSpec` attributes.
535 Returns
536 -------
537 spec: `TableSpec`
538 Specification structure for the table.
540 Raises
541 ------
542 SchemaValidationError
543 Raised if configuration keys are missing or have invalid values.
544 """
545 return cls(
546 fields=NamedValueSet(FieldSpec.fromConfig(c) for c in config["columns"]),
547 unique={tuple(u) for u in config.get("unique", ())},
548 foreignKeys=[ForeignKeySpec.fromConfig(c) for c in config.get("foreignKeys", ())],
549 doc=stripIfNotNone(config.get("doc")),
550 )