Coverage for python/lsst/daf/butler/core/ddl.py: 46%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21"""Classes for representing SQL data-definition language (DDL) in Python.
23This include "CREATE TABLE" etc.
25This provides an extra layer on top of SQLAlchemy's classes for these concepts,
26because we need a level of indirection between logical tables and the actual
27SQL, and SQLAlchemy's DDL classes always map 1-1 to SQL.
29We've opted for the rather more obscure "ddl" as the name of this module
30instead of "schema" because the latter is too overloaded; in most SQL
31databases, a "schema" is also another term for a namespace.
32"""
33from __future__ import annotations
35from lsst import sphgeom
37__all__ = (
38 "TableSpec",
39 "FieldSpec",
40 "ForeignKeySpec",
41 "Base64Bytes",
42 "Base64Region",
43 "AstropyTimeNsecTai",
44 "GUID",
45)
47import logging
48import uuid
49from base64 import b64decode, b64encode
50from dataclasses import dataclass
51from math import ceil
52from typing import TYPE_CHECKING, Any, Callable, Iterable, List, Optional, Set, Tuple, Type, Union
54import astropy.time
55import sqlalchemy
56from lsst.sphgeom import Region
57from lsst.utils.iteration import ensure_iterable
58from sqlalchemy.dialects.postgresql import UUID
60from . import time_utils
61from .config import Config
62from .exceptions import ValidationError
63from .named import NamedValueSet
64from .utils import stripIfNotNone
66if TYPE_CHECKING: 66 ↛ 67line 66 didn't jump to line 67, because the condition on line 66 was never true
67 from .timespan import TimespanDatabaseRepresentation
70_LOG = logging.getLogger(__name__)
73class SchemaValidationError(ValidationError):
74 """Exceptions that indicate problems in Registry schema configuration."""
76 @classmethod
77 def translate(cls, caught: Type[Exception], message: str) -> Callable:
78 """Return decorator to re-raise exceptions as `SchemaValidationError`.
80 Decorated functions must be class or instance methods, with a
81 ``config`` parameter as their first argument. This will be passed
82 to ``message.format()`` as a keyword argument, along with ``err``,
83 the original exception.
85 Parameters
86 ----------
87 caught : `type` (`Exception` subclass)
88 The type of exception to catch.
89 message : `str`
90 A `str.format` string that may contain named placeholders for
91 ``config``, ``err``, or any keyword-only argument accepted by
92 the decorated function.
93 """
95 def decorate(func: Callable) -> Callable:
96 def decorated(self: Any, config: Config, *args: Any, **kwargs: Any) -> Any:
97 try:
98 return func(self, config, *args, **kwargs)
99 except caught as err:
100 raise cls(message.format(config=str(config), err=err))
102 return decorated
104 return decorate
107class Base64Bytes(sqlalchemy.TypeDecorator):
108 """A SQLAlchemy custom type for Python `bytes`.
110 Maps Python `bytes` to a base64-encoded `sqlalchemy.Text` field.
111 """
113 impl = sqlalchemy.Text
115 cache_ok = True
117 def __init__(self, nbytes: int, *args: Any, **kwargs: Any):
118 length = 4 * ceil(nbytes / 3) if self.impl == sqlalchemy.String else None
119 super().__init__(*args, length=length, **kwargs)
120 self.nbytes = nbytes
122 def process_bind_param(self, value: Optional[bytes], dialect: sqlalchemy.engine.Dialect) -> Optional[str]:
123 # 'value' is native `bytes`. We want to encode that to base64 `bytes`
124 # and then ASCII `str`, because `str` is what SQLAlchemy expects for
125 # String fields.
126 if value is None:
127 return None
128 if not isinstance(value, bytes):
129 raise TypeError(
130 f"Base64Bytes fields require 'bytes' values; got '{value}' with type {type(value)}."
131 )
132 return b64encode(value).decode("ascii")
134 def process_result_value(
135 self, value: Optional[str], dialect: sqlalchemy.engine.Dialect
136 ) -> Optional[bytes]:
137 # 'value' is a `str` that must be ASCII because it's base64-encoded.
138 # We want to transform that to base64-encoded `bytes` and then
139 # native `bytes`.
140 return b64decode(value.encode("ascii")) if value is not None else None
142 @property
143 def python_type(self) -> Type[bytes]:
144 return bytes
147# create an alias, for use below to disambiguate between the built in
148# sqlachemy type
149LocalBase64Bytes = Base64Bytes
152class Base64Region(Base64Bytes):
153 """A SQLAlchemy custom type for Python `sphgeom.Region`.
155 Maps Python `sphgeom.Region` to a base64-encoded `sqlalchemy.String`.
156 """
158 def process_bind_param(
159 self, value: Optional[Region], dialect: sqlalchemy.engine.Dialect
160 ) -> Optional[str]:
161 if value is None:
162 return None
163 return super().process_bind_param(value.encode(), dialect)
165 def process_result_value(
166 self, value: Optional[str], dialect: sqlalchemy.engine.Dialect
167 ) -> Optional[Region]:
168 if value is None:
169 return None
170 return Region.decode(super().process_result_value(value, dialect))
172 @property
173 def python_type(self) -> Type[sphgeom.Region]:
174 return sphgeom.Region
177class AstropyTimeNsecTai(sqlalchemy.TypeDecorator):
178 """A SQLAlchemy custom type for Python `astropy.time.Time`.
180 Maps Python `astropy.time.Time` to a number of nanoseconds since Unix
181 epoch in TAI scale.
182 """
184 impl = sqlalchemy.BigInteger
186 cache_ok = True
188 def process_bind_param(
189 self, value: Optional[astropy.time.Time], dialect: sqlalchemy.engine.Dialect
190 ) -> Optional[int]:
191 if value is None:
192 return None
193 if not isinstance(value, astropy.time.Time):
194 raise TypeError(f"Unsupported type: {type(value)}, expected astropy.time.Time")
195 value = time_utils.TimeConverter().astropy_to_nsec(value)
196 return value
198 def process_result_value(
199 self, value: Optional[int], dialect: sqlalchemy.engine.Dialect
200 ) -> Optional[astropy.time.Time]:
201 # value is nanoseconds since epoch, or None
202 if value is None:
203 return None
204 value = time_utils.TimeConverter().nsec_to_astropy(value)
205 return value
208class GUID(sqlalchemy.TypeDecorator):
209 """Platform-independent GUID type.
211 Uses PostgreSQL's UUID type, otherwise uses CHAR(32), storing as
212 stringified hex values.
213 """
215 impl = sqlalchemy.CHAR
217 cache_ok = True
219 def load_dialect_impl(self, dialect: sqlalchemy.Dialect) -> sqlalchemy.TypeEngine:
220 if dialect.name == "postgresql":
221 return dialect.type_descriptor(UUID())
222 else:
223 return dialect.type_descriptor(sqlalchemy.CHAR(32))
225 def process_bind_param(self, value: Any, dialect: sqlalchemy.Dialect) -> Optional[str]:
226 if value is None:
227 return value
229 # Coerce input to UUID type, in general having UUID on input is the
230 # only thing that we want but there is code right now that uses ints.
231 if isinstance(value, int):
232 value = uuid.UUID(int=value)
233 elif isinstance(value, bytes):
234 value = uuid.UUID(bytes=value)
235 elif isinstance(value, str):
236 # hexstring
237 value = uuid.UUID(hex=value)
238 elif not isinstance(value, uuid.UUID):
239 raise TypeError(f"Unexpected type of a bind value: {type(value)}")
241 if dialect.name == "postgresql":
242 return str(value)
243 else:
244 return "%.32x" % value.int
246 def process_result_value(self, value: Optional[str], dialect: sqlalchemy.Dialect) -> Optional[uuid.UUID]:
247 if value is None:
248 return value
249 else:
250 return uuid.UUID(hex=value)
253VALID_CONFIG_COLUMN_TYPES = {
254 "string": sqlalchemy.String,
255 "int": sqlalchemy.BigInteger,
256 "float": sqlalchemy.Float,
257 "region": Base64Region,
258 "bool": sqlalchemy.Boolean,
259 "blob": sqlalchemy.LargeBinary,
260 "datetime": AstropyTimeNsecTai,
261 "hash": Base64Bytes,
262 "uuid": GUID,
263}
266@dataclass
267class FieldSpec:
268 """A data class for defining a column in a logical `Registry` table."""
270 name: str
271 """Name of the column."""
273 dtype: type
274 """Type of the column; usually a `type` subclass provided by SQLAlchemy
275 that defines both a Python type and a corresponding precise SQL type.
276 """
278 length: Optional[int] = None
279 """Length of the type in the database, for variable-length types."""
281 nbytes: Optional[int] = None
282 """Natural length used for hash and encoded-region columns, to be converted
283 into the post-encoding length.
284 """
286 primaryKey: bool = False
287 """Whether this field is (part of) its table's primary key."""
289 autoincrement: bool = False
290 """Whether the database should insert automatically incremented values when
291 no value is provided in an INSERT.
292 """
294 nullable: bool = True
295 """Whether this field is allowed to be NULL."""
297 default: Any = None
298 """A server-side default value for this field.
300 This is passed directly as the ``server_default`` argument to
301 `sqlalchemy.schema.Column`. It does _not_ go through SQLAlchemy's usual
302 type conversion or quoting for Python literals, and should hence be used
303 with care. See the SQLAlchemy documentation for more information.
304 """
306 doc: Optional[str] = None
307 """Documentation for this field."""
309 def __eq__(self, other: Any) -> bool:
310 if isinstance(other, FieldSpec):
311 return self.name == other.name
312 else:
313 return NotImplemented
315 def __hash__(self) -> int:
316 return hash(self.name)
318 @classmethod
319 @SchemaValidationError.translate(KeyError, "Missing key {err} in column config '{config}'.")
320 def fromConfig(cls, config: Config, **kwargs: Any) -> FieldSpec:
321 """Create a `FieldSpec` from a subset of a `SchemaConfig`.
323 Parameters
324 ----------
325 config: `Config`
326 Configuration describing the column. Nested configuration keys
327 correspond to `FieldSpec` attributes.
328 **kwargs
329 Additional keyword arguments that provide defaults for values
330 not present in config.
332 Returns
333 -------
334 spec: `FieldSpec`
335 Specification structure for the column.
337 Raises
338 ------
339 SchemaValidationError
340 Raised if configuration keys are missing or have invalid values.
341 """
342 dtype = VALID_CONFIG_COLUMN_TYPES.get(config["type"])
343 if dtype is None:
344 raise SchemaValidationError(f"Invalid field type string: '{config['type']}'.")
345 if not config["name"].islower():
346 raise SchemaValidationError(f"Column name '{config['name']}' is not all lowercase.")
347 self = cls(name=config["name"], dtype=dtype, **kwargs)
348 self.length = config.get("length", self.length)
349 self.nbytes = config.get("nbytes", self.nbytes)
350 if self.length is not None and self.nbytes is not None:
351 raise SchemaValidationError(f"Both length and nbytes provided for field '{self.name}'.")
352 self.primaryKey = config.get("primaryKey", self.primaryKey)
353 self.autoincrement = config.get("autoincrement", self.autoincrement)
354 self.nullable = config.get("nullable", False if self.primaryKey else self.nullable)
355 self.doc = stripIfNotNone(config.get("doc", None))
356 return self
358 def isStringType(self) -> bool:
359 """Indicate that this is a sqlalchemy.String field spec.
361 Returns
362 -------
363 isString : `bool`
364 The field refers to a `sqlalchemy.String` and not any other type.
365 This can return `False` even if the object was created with a
366 string type if it has been decided that it should be implemented
367 as a `sqlalchemy.Text` type.
368 """
369 if self.dtype == sqlalchemy.String:
370 # For short strings retain them as strings
371 if self.dtype == sqlalchemy.String and self.length and self.length <= 32:
372 return True
373 return False
375 def getSizedColumnType(self) -> sqlalchemy.types.TypeEngine:
376 """Return a sized version of the column type.
378 Utilizes either (or neither) of ``self.length`` and ``self.nbytes``.
380 Returns
381 -------
382 dtype : `sqlalchemy.types.TypeEngine`
383 A SQLAlchemy column type object.
384 """
385 if self.length is not None:
386 # Last chance check that we are only looking at possible String
387 if self.dtype == sqlalchemy.String and not self.isStringType():
388 return sqlalchemy.Text
389 return self.dtype(length=self.length)
390 if self.nbytes is not None:
391 return self.dtype(nbytes=self.nbytes)
392 return self.dtype
394 def getPythonType(self) -> type:
395 """Return the Python type associated with this field's (SQL) dtype.
397 Returns
398 -------
399 type : `type`
400 Python type associated with this field's (SQL) `dtype`.
401 """
402 # to construct these objects, nbytes keyword is needed
403 if issubclass(self.dtype, LocalBase64Bytes):
404 # satisfy mypy for something that must be true
405 assert self.nbytes is not None
406 return self.dtype(nbytes=self.nbytes).python_type
407 else:
408 return self.dtype().python_type # type: ignore
411@dataclass
412class ForeignKeySpec:
413 """Definition of a foreign key constraint in a logical `Registry` table."""
415 table: str
416 """Name of the target table."""
418 source: Tuple[str, ...]
419 """Tuple of source table column names."""
421 target: Tuple[str, ...]
422 """Tuple of target table column names."""
424 onDelete: Optional[str] = None
425 """SQL clause indicating how to handle deletes to the target table.
427 If not `None` (which indicates that a constraint violation exception should
428 be raised), should be either "SET NULL" or "CASCADE".
429 """
431 addIndex: bool = True
432 """If `True`, create an index on the columns of this foreign key in the
433 source table.
434 """
436 @classmethod
437 @SchemaValidationError.translate(KeyError, "Missing key {err} in foreignKey config '{config}'.")
438 def fromConfig(cls, config: Config) -> ForeignKeySpec:
439 """Create a `ForeignKeySpec` from a subset of a `SchemaConfig`.
441 Parameters
442 ----------
443 config: `Config`
444 Configuration describing the constraint. Nested configuration keys
445 correspond to `ForeignKeySpec` attributes.
447 Returns
448 -------
449 spec: `ForeignKeySpec`
450 Specification structure for the constraint.
452 Raises
453 ------
454 SchemaValidationError
455 Raised if configuration keys are missing or have invalid values.
456 """
457 return cls(
458 table=config["table"],
459 source=tuple(ensure_iterable(config["source"])),
460 target=tuple(ensure_iterable(config["target"])),
461 onDelete=config.get("onDelete", None),
462 )
465@dataclass
466class TableSpec:
467 """A data class used to define a table or table-like query interface.
469 Parameters
470 ----------
471 fields : `Iterable` [ `FieldSpec` ]
472 Specifications for the columns in this table.
473 unique : `Iterable` [ `tuple` [ `str` ] ], optional
474 Non-primary-key unique constraints for the table.
475 indexes: `Iterable` [ `tuple` [ `str` ] ], optional
476 Indexes for the table.
477 foreignKeys : `Iterable` [ `ForeignKeySpec` ], optional
478 Foreign key constraints for the table.
479 exclusion : `Iterable` [ `tuple` [ `str` or `type` ] ]
480 Special constraints that prohibit overlaps between timespans over rows
481 where other columns are equal. These take the same form as unique
482 constraints, but each tuple may contain a single
483 `TimespanDatabaseRepresentation` subclass representing a timespan
484 column.
485 recycleIds : `bool`, optional
486 If `True`, allow databases that might normally recycle autoincrement
487 IDs to do so (usually better for performance) on any autoincrement
488 field in this table.
489 doc : `str`, optional
490 Documentation for the table.
491 """
493 def __init__(
494 self,
495 fields: Iterable[FieldSpec],
496 *,
497 unique: Iterable[Tuple[str, ...]] = (),
498 indexes: Iterable[Tuple[str, ...]] = (),
499 foreignKeys: Iterable[ForeignKeySpec] = (),
500 exclusion: Iterable[Tuple[Union[str, Type[TimespanDatabaseRepresentation]], ...]] = (),
501 recycleIds: bool = True,
502 doc: Optional[str] = None,
503 ):
504 self.fields = NamedValueSet(fields)
505 self.unique = set(unique)
506 self.indexes = set(indexes)
507 self.foreignKeys = list(foreignKeys)
508 self.exclusion = set(exclusion)
509 self.recycleIds = recycleIds
510 self.doc = doc
512 fields: NamedValueSet[FieldSpec]
513 """Specifications for the columns in this table."""
515 unique: Set[Tuple[str, ...]]
516 """Non-primary-key unique constraints for the table."""
518 indexes: Set[Tuple[str, ...]]
519 """Indexes for the table."""
521 foreignKeys: List[ForeignKeySpec]
522 """Foreign key constraints for the table."""
524 exclusion: Set[Tuple[Union[str, Type[TimespanDatabaseRepresentation]], ...]]
525 """Exclusion constraints for the table.
527 Exclusion constraints behave mostly like unique constraints, but may
528 contain a database-native Timespan column that is restricted to not overlap
529 across rows (for identical combinations of any non-Timespan columns in the
530 constraint).
531 """
533 recycleIds: bool = True
534 """If `True`, allow databases that might normally recycle autoincrement IDs
535 to do so (usually better for performance) on any autoincrement field in
536 this table.
537 """
539 doc: Optional[str] = None
540 """Documentation for the table."""
542 @classmethod
543 @SchemaValidationError.translate(KeyError, "Missing key {err} in table config '{config}'.")
544 def fromConfig(cls, config: Config) -> TableSpec:
545 """Create a `ForeignKeySpec` from a subset of a `SchemaConfig`.
547 Parameters
548 ----------
549 config: `Config`
550 Configuration describing the constraint. Nested configuration keys
551 correspond to `TableSpec` attributes.
553 Returns
554 -------
555 spec: `TableSpec`
556 Specification structure for the table.
558 Raises
559 ------
560 SchemaValidationError
561 Raised if configuration keys are missing or have invalid values.
562 """
563 return cls(
564 fields=NamedValueSet(FieldSpec.fromConfig(c) for c in config["columns"]),
565 unique={tuple(u) for u in config.get("unique", ())},
566 foreignKeys=[ForeignKeySpec.fromConfig(c) for c in config.get("foreignKeys", ())],
567 doc=stripIfNotNone(config.get("doc")),
568 )