Coverage for python/lsst/daf/butler/core/ddl.py: 46%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21"""Classes for representing SQL data-definition language (DDL) in Python.
23This include "CREATE TABLE" etc.
25This provides an extra layer on top of SQLAlchemy's classes for these concepts,
26because we need a level of indirection between logical tables and the actual
27SQL, and SQLAlchemy's DDL classes always map 1-1 to SQL.
29We've opted for the rather more obscure "ddl" as the name of this module
30instead of "schema" because the latter is too overloaded; in most SQL
31databases, a "schema" is also another term for a namespace.
32"""
33from __future__ import annotations
35__all__ = ("TableSpec", "FieldSpec", "ForeignKeySpec", "Base64Bytes", "Base64Region",
36 "AstropyTimeNsecTai", "GUID")
38from base64 import b64encode, b64decode
39import logging
40from math import ceil
41from dataclasses import dataclass
42from typing import Any, Callable, Iterable, List, Optional, Set, Tuple, Type, TYPE_CHECKING, Union
43import uuid
45import sqlalchemy
46from sqlalchemy.dialects.postgresql import UUID
47import astropy.time
49from lsst.utils.iteration import ensure_iterable
50from lsst.sphgeom import Region
51from .config import Config
52from .exceptions import ValidationError
53from . import time_utils
54from .utils import stripIfNotNone
55from .named import NamedValueSet
57if TYPE_CHECKING: 57 ↛ 58line 57 didn't jump to line 58, because the condition on line 57 was never true
58 from .timespan import TimespanDatabaseRepresentation
61_LOG = logging.getLogger(__name__)
64class SchemaValidationError(ValidationError):
65 """Exceptions that indicate problems in Registry schema configuration."""
67 @classmethod
68 def translate(cls, caught: Type[Exception], message: str) -> Callable:
69 """Return decorator to re-raise exceptions as `SchemaValidationError`.
71 Decorated functions must be class or instance methods, with a
72 ``config`` parameter as their first argument. This will be passed
73 to ``message.format()`` as a keyword argument, along with ``err``,
74 the original exception.
76 Parameters
77 ----------
78 caught : `type` (`Exception` subclass)
79 The type of exception to catch.
80 message : `str`
81 A `str.format` string that may contain named placeholders for
82 ``config``, ``err``, or any keyword-only argument accepted by
83 the decorated function.
84 """
85 def decorate(func: Callable) -> Callable:
86 def decorated(self: Any, config: Config, *args: Any, **kwargs: Any) -> Any:
87 try:
88 return func(self, config, *args, **kwargs)
89 except caught as err:
90 raise cls(message.format(config=str(config), err=err))
91 return decorated
92 return decorate
95class Base64Bytes(sqlalchemy.TypeDecorator):
96 """A SQLAlchemy custom type for Python `bytes`.
98 Maps Python `bytes` to a base64-encoded `sqlalchemy.Text` field.
99 """
101 impl = sqlalchemy.Text
103 cache_ok = True
105 def __init__(self, nbytes: int, *args: Any, **kwargs: Any):
106 length = 4*ceil(nbytes/3) if self.impl == sqlalchemy.String else None
107 super().__init__(*args, length=length, **kwargs)
108 self.nbytes = nbytes
110 def process_bind_param(self, value: Optional[bytes], dialect: sqlalchemy.engine.Dialect
111 ) -> Optional[str]:
112 # 'value' is native `bytes`. We want to encode that to base64 `bytes`
113 # and then ASCII `str`, because `str` is what SQLAlchemy expects for
114 # String fields.
115 if value is None:
116 return None
117 if not isinstance(value, bytes):
118 raise TypeError(
119 f"Base64Bytes fields require 'bytes' values; got '{value}' with type {type(value)}."
120 )
121 return b64encode(value).decode("ascii")
123 def process_result_value(self, value: Optional[str], dialect: sqlalchemy.engine.Dialect
124 ) -> Optional[bytes]:
125 # 'value' is a `str` that must be ASCII because it's base64-encoded.
126 # We want to transform that to base64-encoded `bytes` and then
127 # native `bytes`.
128 return b64decode(value.encode("ascii")) if value is not None else None
131class Base64Region(Base64Bytes):
132 """A SQLAlchemy custom type for Python `sphgeom.Region`.
134 Maps Python `sphgeom.Region` to a base64-encoded `sqlalchemy.String`.
135 """
137 def process_bind_param(self, value: Optional[Region], dialect: sqlalchemy.engine.Dialect
138 ) -> Optional[str]:
139 if value is None:
140 return None
141 return super().process_bind_param(value.encode(), dialect)
143 def process_result_value(self, value: Optional[str], dialect: sqlalchemy.engine.Dialect
144 ) -> Optional[Region]:
145 if value is None:
146 return None
147 return Region.decode(super().process_result_value(value, dialect))
150class AstropyTimeNsecTai(sqlalchemy.TypeDecorator):
151 """A SQLAlchemy custom type for Python `astropy.time.Time`.
153 Maps Python `astropy.time.Time` to a number of nanoseconds since Unix
154 epoch in TAI scale.
155 """
157 impl = sqlalchemy.BigInteger
159 cache_ok = True
161 def process_bind_param(self, value: Optional[astropy.time.Time], dialect: sqlalchemy.engine.Dialect
162 ) -> Optional[int]:
163 if value is None:
164 return None
165 if not isinstance(value, astropy.time.Time):
166 raise TypeError(f"Unsupported type: {type(value)}, expected astropy.time.Time")
167 value = time_utils.TimeConverter().astropy_to_nsec(value)
168 return value
170 def process_result_value(self, value: Optional[int], dialect: sqlalchemy.engine.Dialect
171 ) -> Optional[astropy.time.Time]:
172 # value is nanoseconds since epoch, or None
173 if value is None:
174 return None
175 value = time_utils.TimeConverter().nsec_to_astropy(value)
176 return value
179class GUID(sqlalchemy.TypeDecorator):
180 """Platform-independent GUID type.
182 Uses PostgreSQL's UUID type, otherwise uses CHAR(32), storing as
183 stringified hex values.
184 """
186 impl = sqlalchemy.CHAR
188 cache_ok = True
190 def load_dialect_impl(self, dialect: sqlalchemy.Dialect) -> sqlalchemy.TypeEngine:
191 if dialect.name == 'postgresql':
192 return dialect.type_descriptor(UUID())
193 else:
194 return dialect.type_descriptor(sqlalchemy.CHAR(32))
196 def process_bind_param(self, value: Any, dialect: sqlalchemy.Dialect) -> Optional[str]:
197 if value is None:
198 return value
200 # Coerce input to UUID type, in general having UUID on input is the
201 # only thing that we want but there is code right now that uses ints.
202 if isinstance(value, int):
203 value = uuid.UUID(int=value)
204 elif isinstance(value, bytes):
205 value = uuid.UUID(bytes=value)
206 elif isinstance(value, str):
207 # hexstring
208 value = uuid.UUID(hex=value)
209 elif not isinstance(value, uuid.UUID):
210 raise TypeError(f"Unexpected type of a bind value: {type(value)}")
212 if dialect.name == 'postgresql':
213 return str(value)
214 else:
215 return "%.32x" % value.int
217 def process_result_value(self, value: Optional[str], dialect: sqlalchemy.Dialect) -> Optional[uuid.UUID]:
218 if value is None:
219 return value
220 else:
221 return uuid.UUID(hex=value)
224VALID_CONFIG_COLUMN_TYPES = {
225 "string": sqlalchemy.String,
226 "int": sqlalchemy.BigInteger,
227 "float": sqlalchemy.Float,
228 "region": Base64Region,
229 "bool": sqlalchemy.Boolean,
230 "blob": sqlalchemy.LargeBinary,
231 "datetime": AstropyTimeNsecTai,
232 "hash": Base64Bytes,
233 "uuid": GUID,
234}
237@dataclass
238class FieldSpec:
239 """A data class for defining a column in a logical `Registry` table."""
241 name: str
242 """Name of the column."""
244 dtype: type
245 """Type of the column; usually a `type` subclass provided by SQLAlchemy
246 that defines both a Python type and a corresponding precise SQL type.
247 """
249 length: Optional[int] = None
250 """Length of the type in the database, for variable-length types."""
252 nbytes: Optional[int] = None
253 """Natural length used for hash and encoded-region columns, to be converted
254 into the post-encoding length.
255 """
257 primaryKey: bool = False
258 """Whether this field is (part of) its table's primary key."""
260 autoincrement: bool = False
261 """Whether the database should insert automatically incremented values when
262 no value is provided in an INSERT.
263 """
265 nullable: bool = True
266 """Whether this field is allowed to be NULL."""
268 default: Any = None
269 """A server-side default value for this field.
271 This is passed directly as the ``server_default`` argument to
272 `sqlalchemy.schema.Column`. It does _not_ go through SQLAlchemy's usual
273 type conversion or quoting for Python literals, and should hence be used
274 with care. See the SQLAlchemy documentation for more information.
275 """
277 doc: Optional[str] = None
278 """Documentation for this field."""
280 def __eq__(self, other: Any) -> bool:
281 if isinstance(other, FieldSpec):
282 return self.name == other.name
283 else:
284 return NotImplemented
286 def __hash__(self) -> int:
287 return hash(self.name)
289 @classmethod
290 @SchemaValidationError.translate(KeyError, "Missing key {err} in column config '{config}'.")
291 def fromConfig(cls, config: Config, **kwargs: Any) -> FieldSpec:
292 """Create a `FieldSpec` from a subset of a `SchemaConfig`.
294 Parameters
295 ----------
296 config: `Config`
297 Configuration describing the column. Nested configuration keys
298 correspond to `FieldSpec` attributes.
299 **kwargs
300 Additional keyword arguments that provide defaults for values
301 not present in config.
303 Returns
304 -------
305 spec: `FieldSpec`
306 Specification structure for the column.
308 Raises
309 ------
310 SchemaValidationError
311 Raised if configuration keys are missing or have invalid values.
312 """
313 dtype = VALID_CONFIG_COLUMN_TYPES.get(config["type"])
314 if dtype is None:
315 raise SchemaValidationError(f"Invalid field type string: '{config['type']}'.")
316 if not config["name"].islower():
317 raise SchemaValidationError(f"Column name '{config['name']}' is not all lowercase.")
318 self = cls(name=config["name"], dtype=dtype, **kwargs)
319 self.length = config.get("length", self.length)
320 self.nbytes = config.get("nbytes", self.nbytes)
321 if self.length is not None and self.nbytes is not None:
322 raise SchemaValidationError(f"Both length and nbytes provided for field '{self.name}'.")
323 self.primaryKey = config.get("primaryKey", self.primaryKey)
324 self.autoincrement = config.get("autoincrement", self.autoincrement)
325 self.nullable = config.get("nullable", False if self.primaryKey else self.nullable)
326 self.doc = stripIfNotNone(config.get("doc", None))
327 return self
329 def isStringType(self) -> bool:
330 """Indicate that this is a sqlalchemy.String field spec.
332 Returns
333 -------
334 isString : `bool`
335 The field refers to a `sqlalchemy.String` and not any other type.
336 This can return `False` even if the object was created with a
337 string type if it has been decided that it should be implemented
338 as a `sqlalchemy.Text` type.
339 """
340 if self.dtype == sqlalchemy.String:
341 # For short strings retain them as strings
342 if self.dtype == sqlalchemy.String and self.length and self.length <= 32:
343 return True
344 return False
346 def getSizedColumnType(self) -> sqlalchemy.types.TypeEngine:
347 """Return a sized version of the column type.
349 Utilizes either (or neither) of ``self.length`` and ``self.nbytes``.
351 Returns
352 -------
353 dtype : `sqlalchemy.types.TypeEngine`
354 A SQLAlchemy column type object.
355 """
356 if self.length is not None:
357 # Last chance check that we are only looking at possible String
358 if self.dtype == sqlalchemy.String and not self.isStringType():
359 return sqlalchemy.Text
360 return self.dtype(length=self.length)
361 if self.nbytes is not None:
362 return self.dtype(nbytes=self.nbytes)
363 return self.dtype
365 def getPythonType(self) -> type:
366 """Return the Python type associated with this field's (SQL) dtype.
368 Returns
369 -------
370 type : `type`
371 Python type associated with this field's (SQL) `dtype`.
372 """
373 return self.dtype().python_type
376@dataclass
377class ForeignKeySpec:
378 """Definition of a foreign key constraint in a logical `Registry` table."""
380 table: str
381 """Name of the target table."""
383 source: Tuple[str, ...]
384 """Tuple of source table column names."""
386 target: Tuple[str, ...]
387 """Tuple of target table column names."""
389 onDelete: Optional[str] = None
390 """SQL clause indicating how to handle deletes to the target table.
392 If not `None` (which indicates that a constraint violation exception should
393 be raised), should be either "SET NULL" or "CASCADE".
394 """
396 addIndex: bool = True
397 """If `True`, create an index on the columns of this foreign key in the
398 source table.
399 """
401 @classmethod
402 @SchemaValidationError.translate(KeyError, "Missing key {err} in foreignKey config '{config}'.")
403 def fromConfig(cls, config: Config) -> ForeignKeySpec:
404 """Create a `ForeignKeySpec` from a subset of a `SchemaConfig`.
406 Parameters
407 ----------
408 config: `Config`
409 Configuration describing the constraint. Nested configuration keys
410 correspond to `ForeignKeySpec` attributes.
412 Returns
413 -------
414 spec: `ForeignKeySpec`
415 Specification structure for the constraint.
417 Raises
418 ------
419 SchemaValidationError
420 Raised if configuration keys are missing or have invalid values.
421 """
422 return cls(table=config["table"],
423 source=tuple(ensure_iterable(config["source"])),
424 target=tuple(ensure_iterable(config["target"])),
425 onDelete=config.get("onDelete", None))
428@dataclass
429class TableSpec:
430 """A data class used to define a table or table-like query interface.
432 Parameters
433 ----------
434 fields : `Iterable` [ `FieldSpec` ]
435 Specifications for the columns in this table.
436 unique : `Iterable` [ `tuple` [ `str` ] ], optional
437 Non-primary-key unique constraints for the table.
438 indexes: `Iterable` [ `tuple` [ `str` ] ], optional
439 Indexes for the table.
440 foreignKeys : `Iterable` [ `ForeignKeySpec` ], optional
441 Foreign key constraints for the table.
442 exclusion : `Iterable` [ `tuple` [ `str` or `type` ] ]
443 Special constraints that prohibit overlaps between timespans over rows
444 where other columns are equal. These take the same form as unique
445 constraints, but each tuple may contain a single
446 `TimespanDatabaseRepresentation` subclass representing a timespan
447 column.
448 recycleIds : `bool`, optional
449 If `True`, allow databases that might normally recycle autoincrement
450 IDs to do so (usually better for performance) on any autoincrement
451 field in this table.
452 doc : `str`, optional
453 Documentation for the table.
454 """
456 def __init__(
457 self, fields: Iterable[FieldSpec], *,
458 unique: Iterable[Tuple[str, ...]] = (),
459 indexes: Iterable[Tuple[str, ...]] = (),
460 foreignKeys: Iterable[ForeignKeySpec] = (),
461 exclusion: Iterable[Tuple[Union[str, Type[TimespanDatabaseRepresentation]], ...]] = (),
462 recycleIds: bool = True,
463 doc: Optional[str] = None,
464 ):
465 self.fields = NamedValueSet(fields)
466 self.unique = set(unique)
467 self.indexes = set(indexes)
468 self.foreignKeys = list(foreignKeys)
469 self.exclusion = set(exclusion)
470 self.recycleIds = recycleIds
471 self.doc = doc
473 fields: NamedValueSet[FieldSpec]
474 """Specifications for the columns in this table."""
476 unique: Set[Tuple[str, ...]]
477 """Non-primary-key unique constraints for the table."""
479 indexes: Set[Tuple[str, ...]]
480 """Indexes for the table."""
482 foreignKeys: List[ForeignKeySpec]
483 """Foreign key constraints for the table."""
485 exclusion: Set[Tuple[Union[str, Type[TimespanDatabaseRepresentation]], ...]]
486 """Exclusion constraints for the table.
488 Exclusion constraints behave mostly like unique constraints, but may
489 contain a database-native Timespan column that is restricted to not overlap
490 across rows (for identical combinations of any non-Timespan columns in the
491 constraint).
492 """
494 recycleIds: bool = True
495 """If `True`, allow databases that might normally recycle autoincrement IDs
496 to do so (usually better for performance) on any autoincrement field in
497 this table.
498 """
500 doc: Optional[str] = None
501 """Documentation for the table."""
503 @classmethod
504 @SchemaValidationError.translate(KeyError, "Missing key {err} in table config '{config}'.")
505 def fromConfig(cls, config: Config) -> TableSpec:
506 """Create a `ForeignKeySpec` from a subset of a `SchemaConfig`.
508 Parameters
509 ----------
510 config: `Config`
511 Configuration describing the constraint. Nested configuration keys
512 correspond to `TableSpec` attributes.
514 Returns
515 -------
516 spec: `TableSpec`
517 Specification structure for the table.
519 Raises
520 ------
521 SchemaValidationError
522 Raised if configuration keys are missing or have invalid values.
523 """
524 return cls(
525 fields=NamedValueSet(FieldSpec.fromConfig(c) for c in config["columns"]),
526 unique={tuple(u) for u in config.get("unique", ())},
527 foreignKeys=[ForeignKeySpec.fromConfig(c) for c in config.get("foreignKeys", ())],
528 doc=stripIfNotNone(config.get("doc")),
529 )