Coverage for python/lsst/daf/butler/core/ddl.py: 47%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21"""Classes for representing SQL data-definition language (DDL) in Python.
23This include "CREATE TABLE" etc.
25This provides an extra layer on top of SQLAlchemy's classes for these concepts,
26because we need a level of indirection between logical tables and the actual
27SQL, and SQLAlchemy's DDL classes always map 1-1 to SQL.
29We've opted for the rather more obscure "ddl" as the name of this module
30instead of "schema" because the latter is too overloaded; in most SQL
31databases, a "schema" is also another term for a namespace.
32"""
33from __future__ import annotations
35from lsst import sphgeom
37__all__ = (
38 "TableSpec",
39 "FieldSpec",
40 "ForeignKeySpec",
41 "Base64Bytes",
42 "Base64Region",
43 "AstropyTimeNsecTai",
44 "GUID",
45)
47import logging
48import uuid
49from base64 import b64decode, b64encode
50from dataclasses import dataclass
51from math import ceil
52from typing import TYPE_CHECKING, Any, Callable, Iterable, List, Optional, Set, Tuple, Type, Union
54import astropy.time
55import sqlalchemy
56from lsst.sphgeom import Region
57from lsst.utils.iteration import ensure_iterable
58from sqlalchemy.dialects.postgresql import UUID
60from . import time_utils
61from .config import Config
62from .exceptions import ValidationError
63from .named import NamedValueSet
64from .utils import stripIfNotNone
66if TYPE_CHECKING: 66 ↛ 67line 66 didn't jump to line 67, because the condition on line 66 was never true
67 from .timespan import TimespanDatabaseRepresentation
70_LOG = logging.getLogger(__name__)
73class SchemaValidationError(ValidationError):
74 """Exceptions that indicate problems in Registry schema configuration."""
76 @classmethod
77 def translate(cls, caught: Type[Exception], message: str) -> Callable:
78 """Return decorator to re-raise exceptions as `SchemaValidationError`.
80 Decorated functions must be class or instance methods, with a
81 ``config`` parameter as their first argument. This will be passed
82 to ``message.format()`` as a keyword argument, along with ``err``,
83 the original exception.
85 Parameters
86 ----------
87 caught : `type` (`Exception` subclass)
88 The type of exception to catch.
89 message : `str`
90 A `str.format` string that may contain named placeholders for
91 ``config``, ``err``, or any keyword-only argument accepted by
92 the decorated function.
93 """
95 def decorate(func: Callable) -> Callable:
96 def decorated(self: Any, config: Config, *args: Any, **kwargs: Any) -> Any:
97 try:
98 return func(self, config, *args, **kwargs)
99 except caught as err:
100 raise cls(message.format(config=str(config), err=err))
102 return decorated
104 return decorate
107class Base64Bytes(sqlalchemy.TypeDecorator):
108 """A SQLAlchemy custom type for Python `bytes`.
110 Maps Python `bytes` to a base64-encoded `sqlalchemy.Text` field.
111 """
113 impl = sqlalchemy.Text
115 cache_ok = True
117 def __init__(self, nbytes: int, *args: Any, **kwargs: Any):
118 length = 4 * ceil(nbytes / 3) if self.impl == sqlalchemy.String else None
119 super().__init__(*args, length=length, **kwargs)
120 self.nbytes = nbytes
122 def process_bind_param(self, value: Optional[bytes], dialect: sqlalchemy.engine.Dialect) -> Optional[str]:
123 # 'value' is native `bytes`. We want to encode that to base64 `bytes`
124 # and then ASCII `str`, because `str` is what SQLAlchemy expects for
125 # String fields.
126 if value is None:
127 return None
128 if not isinstance(value, bytes):
129 raise TypeError(
130 f"Base64Bytes fields require 'bytes' values; got '{value}' with type {type(value)}."
131 )
132 return b64encode(value).decode("ascii")
134 def process_result_value(
135 self, value: Optional[str], dialect: sqlalchemy.engine.Dialect
136 ) -> Optional[bytes]:
137 # 'value' is a `str` that must be ASCII because it's base64-encoded.
138 # We want to transform that to base64-encoded `bytes` and then
139 # native `bytes`.
140 return b64decode(value.encode("ascii")) if value is not None else None
142 @property
143 def python_type(self) -> Type[bytes]:
144 return bytes
147# create an alias, for use below to disambiguate between the built in
148# sqlachemy type
149LocalBase64Bytes = Base64Bytes
152class Base64Region(Base64Bytes):
153 """A SQLAlchemy custom type for Python `sphgeom.Region`.
155 Maps Python `sphgeom.Region` to a base64-encoded `sqlalchemy.String`.
156 """
158 cache_ok = True # have to be set explicitly in each class
160 def process_bind_param(
161 self, value: Optional[Region], dialect: sqlalchemy.engine.Dialect
162 ) -> Optional[str]:
163 if value is None:
164 return None
165 return super().process_bind_param(value.encode(), dialect)
167 def process_result_value(
168 self, value: Optional[str], dialect: sqlalchemy.engine.Dialect
169 ) -> Optional[Region]:
170 if value is None:
171 return None
172 return Region.decode(super().process_result_value(value, dialect))
174 @property
175 def python_type(self) -> Type[sphgeom.Region]:
176 return sphgeom.Region
179class AstropyTimeNsecTai(sqlalchemy.TypeDecorator):
180 """A SQLAlchemy custom type for Python `astropy.time.Time`.
182 Maps Python `astropy.time.Time` to a number of nanoseconds since Unix
183 epoch in TAI scale.
184 """
186 impl = sqlalchemy.BigInteger
188 cache_ok = True
190 def process_bind_param(
191 self, value: Optional[astropy.time.Time], dialect: sqlalchemy.engine.Dialect
192 ) -> Optional[int]:
193 if value is None:
194 return None
195 if not isinstance(value, astropy.time.Time):
196 raise TypeError(f"Unsupported type: {type(value)}, expected astropy.time.Time")
197 value = time_utils.TimeConverter().astropy_to_nsec(value)
198 return value
200 def process_result_value(
201 self, value: Optional[int], dialect: sqlalchemy.engine.Dialect
202 ) -> Optional[astropy.time.Time]:
203 # value is nanoseconds since epoch, or None
204 if value is None:
205 return None
206 value = time_utils.TimeConverter().nsec_to_astropy(value)
207 return value
210class GUID(sqlalchemy.TypeDecorator):
211 """Platform-independent GUID type.
213 Uses PostgreSQL's UUID type, otherwise uses CHAR(32), storing as
214 stringified hex values.
215 """
217 impl = sqlalchemy.CHAR
219 cache_ok = True
221 def load_dialect_impl(self, dialect: sqlalchemy.Dialect) -> sqlalchemy.TypeEngine:
222 if dialect.name == "postgresql":
223 return dialect.type_descriptor(UUID())
224 else:
225 return dialect.type_descriptor(sqlalchemy.CHAR(32))
227 def process_bind_param(self, value: Any, dialect: sqlalchemy.Dialect) -> Optional[str]:
228 if value is None:
229 return value
231 # Coerce input to UUID type, in general having UUID on input is the
232 # only thing that we want but there is code right now that uses ints.
233 if isinstance(value, int):
234 value = uuid.UUID(int=value)
235 elif isinstance(value, bytes):
236 value = uuid.UUID(bytes=value)
237 elif isinstance(value, str):
238 # hexstring
239 value = uuid.UUID(hex=value)
240 elif not isinstance(value, uuid.UUID):
241 raise TypeError(f"Unexpected type of a bind value: {type(value)}")
243 if dialect.name == "postgresql":
244 return str(value)
245 else:
246 return "%.32x" % value.int
248 def process_result_value(self, value: Optional[str], dialect: sqlalchemy.Dialect) -> Optional[uuid.UUID]:
249 if value is None:
250 return value
251 else:
252 return uuid.UUID(hex=value)
255VALID_CONFIG_COLUMN_TYPES = {
256 "string": sqlalchemy.String,
257 "int": sqlalchemy.BigInteger,
258 "float": sqlalchemy.Float,
259 "region": Base64Region,
260 "bool": sqlalchemy.Boolean,
261 "blob": sqlalchemy.LargeBinary,
262 "datetime": AstropyTimeNsecTai,
263 "hash": Base64Bytes,
264 "uuid": GUID,
265}
268@dataclass
269class FieldSpec:
270 """A data class for defining a column in a logical `Registry` table."""
272 name: str
273 """Name of the column."""
275 dtype: type
276 """Type of the column; usually a `type` subclass provided by SQLAlchemy
277 that defines both a Python type and a corresponding precise SQL type.
278 """
280 length: Optional[int] = None
281 """Length of the type in the database, for variable-length types."""
283 nbytes: Optional[int] = None
284 """Natural length used for hash and encoded-region columns, to be converted
285 into the post-encoding length.
286 """
288 primaryKey: bool = False
289 """Whether this field is (part of) its table's primary key."""
291 autoincrement: bool = False
292 """Whether the database should insert automatically incremented values when
293 no value is provided in an INSERT.
294 """
296 nullable: bool = True
297 """Whether this field is allowed to be NULL."""
299 default: Any = None
300 """A server-side default value for this field.
302 This is passed directly as the ``server_default`` argument to
303 `sqlalchemy.schema.Column`. It does _not_ go through SQLAlchemy's usual
304 type conversion or quoting for Python literals, and should hence be used
305 with care. See the SQLAlchemy documentation for more information.
306 """
308 doc: Optional[str] = None
309 """Documentation for this field."""
311 def __eq__(self, other: Any) -> bool:
312 if isinstance(other, FieldSpec):
313 return self.name == other.name
314 else:
315 return NotImplemented
317 def __hash__(self) -> int:
318 return hash(self.name)
320 @classmethod
321 @SchemaValidationError.translate(KeyError, "Missing key {err} in column config '{config}'.")
322 def fromConfig(cls, config: Config, **kwargs: Any) -> FieldSpec:
323 """Create a `FieldSpec` from a subset of a `SchemaConfig`.
325 Parameters
326 ----------
327 config: `Config`
328 Configuration describing the column. Nested configuration keys
329 correspond to `FieldSpec` attributes.
330 **kwargs
331 Additional keyword arguments that provide defaults for values
332 not present in config.
334 Returns
335 -------
336 spec: `FieldSpec`
337 Specification structure for the column.
339 Raises
340 ------
341 SchemaValidationError
342 Raised if configuration keys are missing or have invalid values.
343 """
344 dtype = VALID_CONFIG_COLUMN_TYPES.get(config["type"])
345 if dtype is None:
346 raise SchemaValidationError(f"Invalid field type string: '{config['type']}'.")
347 if not config["name"].islower():
348 raise SchemaValidationError(f"Column name '{config['name']}' is not all lowercase.")
349 self = cls(name=config["name"], dtype=dtype, **kwargs)
350 self.length = config.get("length", self.length)
351 self.nbytes = config.get("nbytes", self.nbytes)
352 if self.length is not None and self.nbytes is not None:
353 raise SchemaValidationError(f"Both length and nbytes provided for field '{self.name}'.")
354 self.primaryKey = config.get("primaryKey", self.primaryKey)
355 self.autoincrement = config.get("autoincrement", self.autoincrement)
356 self.nullable = config.get("nullable", False if self.primaryKey else self.nullable)
357 self.doc = stripIfNotNone(config.get("doc", None))
358 return self
360 def isStringType(self) -> bool:
361 """Indicate that this is a sqlalchemy.String field spec.
363 Returns
364 -------
365 isString : `bool`
366 The field refers to a `sqlalchemy.String` and not any other type.
367 This can return `False` even if the object was created with a
368 string type if it has been decided that it should be implemented
369 as a `sqlalchemy.Text` type.
370 """
371 if self.dtype == sqlalchemy.String:
372 # For short strings retain them as strings
373 if self.dtype == sqlalchemy.String and self.length and self.length <= 32:
374 return True
375 return False
377 def getSizedColumnType(self) -> sqlalchemy.types.TypeEngine:
378 """Return a sized version of the column type.
380 Utilizes either (or neither) of ``self.length`` and ``self.nbytes``.
382 Returns
383 -------
384 dtype : `sqlalchemy.types.TypeEngine`
385 A SQLAlchemy column type object.
386 """
387 if self.length is not None:
388 # Last chance check that we are only looking at possible String
389 if self.dtype == sqlalchemy.String and not self.isStringType():
390 return sqlalchemy.Text
391 return self.dtype(length=self.length)
392 if self.nbytes is not None:
393 return self.dtype(nbytes=self.nbytes)
394 return self.dtype
396 def getPythonType(self) -> type:
397 """Return the Python type associated with this field's (SQL) dtype.
399 Returns
400 -------
401 type : `type`
402 Python type associated with this field's (SQL) `dtype`.
403 """
404 # to construct these objects, nbytes keyword is needed
405 if issubclass(self.dtype, LocalBase64Bytes):
406 # satisfy mypy for something that must be true
407 assert self.nbytes is not None
408 return self.dtype(nbytes=self.nbytes).python_type
409 else:
410 return self.dtype().python_type # type: ignore
413@dataclass
414class ForeignKeySpec:
415 """Definition of a foreign key constraint in a logical `Registry` table."""
417 table: str
418 """Name of the target table."""
420 source: Tuple[str, ...]
421 """Tuple of source table column names."""
423 target: Tuple[str, ...]
424 """Tuple of target table column names."""
426 onDelete: Optional[str] = None
427 """SQL clause indicating how to handle deletes to the target table.
429 If not `None` (which indicates that a constraint violation exception should
430 be raised), should be either "SET NULL" or "CASCADE".
431 """
433 addIndex: bool = True
434 """If `True`, create an index on the columns of this foreign key in the
435 source table.
436 """
438 @classmethod
439 @SchemaValidationError.translate(KeyError, "Missing key {err} in foreignKey config '{config}'.")
440 def fromConfig(cls, config: Config) -> ForeignKeySpec:
441 """Create a `ForeignKeySpec` from a subset of a `SchemaConfig`.
443 Parameters
444 ----------
445 config: `Config`
446 Configuration describing the constraint. Nested configuration keys
447 correspond to `ForeignKeySpec` attributes.
449 Returns
450 -------
451 spec: `ForeignKeySpec`
452 Specification structure for the constraint.
454 Raises
455 ------
456 SchemaValidationError
457 Raised if configuration keys are missing or have invalid values.
458 """
459 return cls(
460 table=config["table"],
461 source=tuple(ensure_iterable(config["source"])),
462 target=tuple(ensure_iterable(config["target"])),
463 onDelete=config.get("onDelete", None),
464 )
467@dataclass
468class TableSpec:
469 """A data class used to define a table or table-like query interface.
471 Parameters
472 ----------
473 fields : `Iterable` [ `FieldSpec` ]
474 Specifications for the columns in this table.
475 unique : `Iterable` [ `tuple` [ `str` ] ], optional
476 Non-primary-key unique constraints for the table.
477 indexes: `Iterable` [ `tuple` [ `str` ] ], optional
478 Indexes for the table.
479 foreignKeys : `Iterable` [ `ForeignKeySpec` ], optional
480 Foreign key constraints for the table.
481 exclusion : `Iterable` [ `tuple` [ `str` or `type` ] ]
482 Special constraints that prohibit overlaps between timespans over rows
483 where other columns are equal. These take the same form as unique
484 constraints, but each tuple may contain a single
485 `TimespanDatabaseRepresentation` subclass representing a timespan
486 column.
487 recycleIds : `bool`, optional
488 If `True`, allow databases that might normally recycle autoincrement
489 IDs to do so (usually better for performance) on any autoincrement
490 field in this table.
491 doc : `str`, optional
492 Documentation for the table.
493 """
495 def __init__(
496 self,
497 fields: Iterable[FieldSpec],
498 *,
499 unique: Iterable[Tuple[str, ...]] = (),
500 indexes: Iterable[Tuple[str, ...]] = (),
501 foreignKeys: Iterable[ForeignKeySpec] = (),
502 exclusion: Iterable[Tuple[Union[str, Type[TimespanDatabaseRepresentation]], ...]] = (),
503 recycleIds: bool = True,
504 doc: Optional[str] = None,
505 ):
506 self.fields = NamedValueSet(fields)
507 self.unique = set(unique)
508 self.indexes = set(indexes)
509 self.foreignKeys = list(foreignKeys)
510 self.exclusion = set(exclusion)
511 self.recycleIds = recycleIds
512 self.doc = doc
514 fields: NamedValueSet[FieldSpec]
515 """Specifications for the columns in this table."""
517 unique: Set[Tuple[str, ...]]
518 """Non-primary-key unique constraints for the table."""
520 indexes: Set[Tuple[str, ...]]
521 """Indexes for the table."""
523 foreignKeys: List[ForeignKeySpec]
524 """Foreign key constraints for the table."""
526 exclusion: Set[Tuple[Union[str, Type[TimespanDatabaseRepresentation]], ...]]
527 """Exclusion constraints for the table.
529 Exclusion constraints behave mostly like unique constraints, but may
530 contain a database-native Timespan column that is restricted to not overlap
531 across rows (for identical combinations of any non-Timespan columns in the
532 constraint).
533 """
535 recycleIds: bool = True
536 """If `True`, allow databases that might normally recycle autoincrement IDs
537 to do so (usually better for performance) on any autoincrement field in
538 this table.
539 """
541 doc: Optional[str] = None
542 """Documentation for the table."""
544 @classmethod
545 @SchemaValidationError.translate(KeyError, "Missing key {err} in table config '{config}'.")
546 def fromConfig(cls, config: Config) -> TableSpec:
547 """Create a `ForeignKeySpec` from a subset of a `SchemaConfig`.
549 Parameters
550 ----------
551 config: `Config`
552 Configuration describing the constraint. Nested configuration keys
553 correspond to `TableSpec` attributes.
555 Returns
556 -------
557 spec: `TableSpec`
558 Specification structure for the table.
560 Raises
561 ------
562 SchemaValidationError
563 Raised if configuration keys are missing or have invalid values.
564 """
565 return cls(
566 fields=NamedValueSet(FieldSpec.fromConfig(c) for c in config["columns"]),
567 unique={tuple(u) for u in config.get("unique", ())},
568 foreignKeys=[ForeignKeySpec.fromConfig(c) for c in config.get("foreignKeys", ())],
569 doc=stripIfNotNone(config.get("doc")),
570 )