Coverage for python/lsst/daf/butler/core/ddl.py : 43%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21"""Classes for representing SQL data-definition language (DDL) in Python.
23This include "CREATE TABLE" etc.
25This provides an extra layer on top of SQLAlchemy's classes for these concepts,
26because we need a level of indirection between logical tables and the actual
27SQL, and SQLAlchemy's DDL classes always map 1-1 to SQL.
29We've opted for the rather more obscure "ddl" as the name of this module
30instead of "schema" because the latter is too overloaded; in most SQL
31databases, a "schema" is also another term for a namespace.
32"""
33from __future__ import annotations
35__all__ = ("TableSpec", "FieldSpec", "ForeignKeySpec", "Base64Bytes", "Base64Region",
36 "AstropyTimeNsecTai", "GUID")
38from base64 import b64encode, b64decode
39import logging
40from math import ceil
41from dataclasses import dataclass
42from typing import Any, Callable, Iterable, List, Optional, Set, Tuple, Type, TYPE_CHECKING, Union
43import uuid
45import sqlalchemy
46from sqlalchemy.dialects.postgresql import UUID
47import astropy.time
49from lsst.sphgeom import Region
50from .config import Config
51from .exceptions import ValidationError
52from . import time_utils
53from .utils import iterable, stripIfNotNone
54from .named import NamedValueSet
56if TYPE_CHECKING: 56 ↛ 57line 56 didn't jump to line 57, because the condition on line 56 was never true
57 from .timespan import TimespanDatabaseRepresentation
60_LOG = logging.getLogger(__name__)
63class SchemaValidationError(ValidationError):
64 """Exceptions that indicate problems in Registry schema configuration."""
66 @classmethod
67 def translate(cls, caught: Type[Exception], message: str) -> Callable:
68 """Return decorator to re-raise exceptions as `SchemaValidationError`.
70 Decorated functions must be class or instance methods, with a
71 ``config`` parameter as their first argument. This will be passed
72 to ``message.format()`` as a keyword argument, along with ``err``,
73 the original exception.
75 Parameters
76 ----------
77 caught : `type` (`Exception` subclass)
78 The type of exception to catch.
79 message : `str`
80 A `str.format` string that may contain named placeholders for
81 ``config``, ``err``, or any keyword-only argument accepted by
82 the decorated function.
83 """
84 def decorate(func: Callable) -> Callable:
85 def decorated(self: Any, config: Config, *args: Any, **kwds: Any) -> Any:
86 try:
87 return func(self, config, *args, **kwds)
88 except caught as err:
89 raise cls(message.format(config=str(config), err=err))
90 return decorated
91 return decorate
94class Base64Bytes(sqlalchemy.TypeDecorator):
95 """A SQLAlchemy custom type for Python `bytes`.
97 Maps Python `bytes` to a base64-encoded `sqlalchemy.Text` field.
98 """
100 impl = sqlalchemy.Text
102 def __init__(self, nbytes: int, *args: Any, **kwargs: Any):
103 length = 4*ceil(nbytes/3) if self.impl == sqlalchemy.String else None
104 super().__init__(*args, length=length, **kwargs)
105 self.nbytes = nbytes
107 def process_bind_param(self, value: Optional[bytes], dialect: sqlalchemy.engine.Dialect
108 ) -> Optional[str]:
109 # 'value' is native `bytes`. We want to encode that to base64 `bytes`
110 # and then ASCII `str`, because `str` is what SQLAlchemy expects for
111 # String fields.
112 if value is None:
113 return None
114 if not isinstance(value, bytes):
115 raise TypeError(
116 f"Base64Bytes fields require 'bytes' values; got '{value}' with type {type(value)}."
117 )
118 return b64encode(value).decode("ascii")
120 def process_result_value(self, value: Optional[str], dialect: sqlalchemy.engine.Dialect
121 ) -> Optional[bytes]:
122 # 'value' is a `str` that must be ASCII because it's base64-encoded.
123 # We want to transform that to base64-encoded `bytes` and then
124 # native `bytes`.
125 return b64decode(value.encode("ascii")) if value is not None else None
128class Base64Region(Base64Bytes):
129 """A SQLAlchemy custom type for Python `sphgeom.Region`.
131 Maps Python `sphgeom.Region` to a base64-encoded `sqlalchemy.String`.
132 """
134 def process_bind_param(self, value: Optional[Region], dialect: sqlalchemy.engine.Dialect
135 ) -> Optional[str]:
136 if value is None:
137 return None
138 return super().process_bind_param(value.encode(), dialect)
140 def process_result_value(self, value: Optional[str], dialect: sqlalchemy.engine.Dialect
141 ) -> Optional[Region]:
142 if value is None:
143 return None
144 return Region.decode(super().process_result_value(value, dialect))
147class AstropyTimeNsecTai(sqlalchemy.TypeDecorator):
148 """A SQLAlchemy custom type for Python `astropy.time.Time`.
150 Maps Python `astropy.time.Time` to a number of nanoseconds since Unix
151 epoch in TAI scale.
152 """
154 impl = sqlalchemy.BigInteger
156 def process_bind_param(self, value: Optional[astropy.time.Time], dialect: sqlalchemy.engine.Dialect
157 ) -> Optional[int]:
158 if value is None:
159 return None
160 if not isinstance(value, astropy.time.Time):
161 raise TypeError(f"Unsupported type: {type(value)}, expected astropy.time.Time")
162 value = time_utils.TimeConverter().astropy_to_nsec(value)
163 return value
165 def process_result_value(self, value: Optional[int], dialect: sqlalchemy.engine.Dialect
166 ) -> Optional[astropy.time.Time]:
167 # value is nanoseconds since epoch, or None
168 if value is None:
169 return None
170 value = time_utils.TimeConverter().nsec_to_astropy(value)
171 return value
174class GUID(sqlalchemy.TypeDecorator):
175 """Platform-independent GUID type.
177 Uses PostgreSQL's UUID type, otherwise uses CHAR(32), storing as
178 stringified hex values.
179 """
181 impl = sqlalchemy.CHAR
183 def load_dialect_impl(self, dialect: sqlalchemy.Dialect) -> sqlalchemy.TypeEngine:
184 if dialect.name == 'postgresql':
185 return dialect.type_descriptor(UUID())
186 else:
187 return dialect.type_descriptor(sqlalchemy.CHAR(32))
189 def process_bind_param(self, value: Any, dialect: sqlalchemy.Dialect) -> Optional[str]:
190 if value is None:
191 return value
193 # Coerce input to UUID type, in general having UUID on input is the
194 # only thing that we want but there is code right now that uses ints.
195 if isinstance(value, int):
196 value = uuid.UUID(int=value)
197 elif isinstance(value, bytes):
198 value = uuid.UUID(bytes=value)
199 elif isinstance(value, str):
200 # hexstring
201 value = uuid.UUID(hex=value)
202 elif not isinstance(value, uuid.UUID):
203 raise TypeError(f"Unexpected type of a bind value: {type(value)}")
205 if dialect.name == 'postgresql':
206 return str(value)
207 else:
208 return "%.32x" % value.int
210 def process_result_value(self, value: Optional[str], dialect: sqlalchemy.Dialect) -> Optional[uuid.UUID]:
211 if value is None:
212 return value
213 else:
214 return uuid.UUID(hex=value)
217VALID_CONFIG_COLUMN_TYPES = {
218 "string": sqlalchemy.String,
219 "int": sqlalchemy.BigInteger,
220 "float": sqlalchemy.Float,
221 "region": Base64Region,
222 "bool": sqlalchemy.Boolean,
223 "blob": sqlalchemy.LargeBinary,
224 "datetime": AstropyTimeNsecTai,
225 "hash": Base64Bytes,
226 "uuid": GUID,
227}
230@dataclass
231class FieldSpec:
232 """A data class for defining a column in a logical `Registry` table."""
234 name: str
235 """Name of the column."""
237 dtype: type
238 """Type of the column; usually a `type` subclass provided by SQLAlchemy
239 that defines both a Python type and a corresponding precise SQL type.
240 """
242 length: Optional[int] = None
243 """Length of the type in the database, for variable-length types."""
245 nbytes: Optional[int] = None
246 """Natural length used for hash and encoded-region columns, to be converted
247 into the post-encoding length.
248 """
250 primaryKey: bool = False
251 """Whether this field is (part of) its table's primary key."""
253 autoincrement: bool = False
254 """Whether the database should insert automatically incremented values when
255 no value is provided in an INSERT.
256 """
258 nullable: bool = True
259 """Whether this field is allowed to be NULL."""
261 default: Any = None
262 """A server-side default value for this field.
264 This is passed directly as the ``server_default`` argument to
265 `sqlalchemy.schema.Column`. It does _not_ go through SQLAlchemy's usual
266 type conversion or quoting for Python literals, and should hence be used
267 with care. See the SQLAlchemy documentation for more information.
268 """
270 doc: Optional[str] = None
271 """Documentation for this field."""
273 def __eq__(self, other: Any) -> bool:
274 if isinstance(other, FieldSpec):
275 return self.name == other.name
276 else:
277 return NotImplemented
279 def __hash__(self) -> int:
280 return hash(self.name)
282 @classmethod
283 @SchemaValidationError.translate(KeyError, "Missing key {err} in column config '{config}'.")
284 def fromConfig(cls, config: Config, **kwds: Any) -> FieldSpec:
285 """Create a `FieldSpec` from a subset of a `SchemaConfig`.
287 Parameters
288 ----------
289 config: `Config`
290 Configuration describing the column. Nested configuration keys
291 correspond to `FieldSpec` attributes.
292 kwds
293 Additional keyword arguments that provide defaults for values
294 not present in config.
296 Returns
297 -------
298 spec: `FieldSpec`
299 Specification structure for the column.
301 Raises
302 ------
303 SchemaValidationError
304 Raised if configuration keys are missing or have invalid values.
305 """
306 dtype = VALID_CONFIG_COLUMN_TYPES.get(config["type"])
307 if dtype is None:
308 raise SchemaValidationError(f"Invalid field type string: '{config['type']}'.")
309 if not config["name"].islower():
310 raise SchemaValidationError(f"Column name '{config['name']}' is not all lowercase.")
311 self = cls(name=config["name"], dtype=dtype, **kwds)
312 self.length = config.get("length", self.length)
313 self.nbytes = config.get("nbytes", self.nbytes)
314 if self.length is not None and self.nbytes is not None:
315 raise SchemaValidationError(f"Both length and nbytes provided for field '{self.name}'.")
316 self.primaryKey = config.get("primaryKey", self.primaryKey)
317 self.autoincrement = config.get("autoincrement", self.autoincrement)
318 self.nullable = config.get("nullable", False if self.primaryKey else self.nullable)
319 self.doc = stripIfNotNone(config.get("doc", None))
320 return self
322 def isStringType(self) -> bool:
323 """Indicate that this is a sqlalchemy.String field spec.
325 Returns
326 -------
327 isString : `bool`
328 The field refers to a `sqlalchemy.String` and not any other type.
329 This can return `False` even if the object was created with a
330 string type if it has been decided that it should be implemented
331 as a `sqlalchemy.Text` type.
332 """
333 if self.dtype == sqlalchemy.String:
334 # For short strings retain them as strings
335 if self.dtype == sqlalchemy.String and self.length and self.length <= 32:
336 return True
337 return False
339 def getSizedColumnType(self) -> sqlalchemy.types.TypeEngine:
340 """Return a sized version of the column type.
342 Utilizes either (or neither) of ``self.length`` and ``self.nbytes``.
344 Returns
345 -------
346 dtype : `sqlalchemy.types.TypeEngine`
347 A SQLAlchemy column type object.
348 """
349 if self.length is not None:
350 # Last chance check that we are only looking at possible String
351 if self.dtype == sqlalchemy.String and not self.isStringType():
352 return sqlalchemy.Text
353 return self.dtype(length=self.length)
354 if self.nbytes is not None:
355 return self.dtype(nbytes=self.nbytes)
356 return self.dtype
358 def getPythonType(self) -> type:
359 """Return the Python type associated with this field's (SQL) dtype.
361 Returns
362 -------
363 type : `type`
364 Python type associated with this field's (SQL) `dtype`.
365 """
366 return self.dtype().python_type
369@dataclass
370class ForeignKeySpec:
371 """Definition of a foreign key constraint in a logical `Registry` table."""
373 table: str
374 """Name of the target table."""
376 source: Tuple[str, ...]
377 """Tuple of source table column names."""
379 target: Tuple[str, ...]
380 """Tuple of target table column names."""
382 onDelete: Optional[str] = None
383 """SQL clause indicating how to handle deletes to the target table.
385 If not `None` (which indicates that a constraint violation exception should
386 be raised), should be either "SET NULL" or "CASCADE".
387 """
389 addIndex: bool = True
390 """If `True`, create an index on the columns of this foreign key in the
391 source table.
392 """
394 @classmethod
395 @SchemaValidationError.translate(KeyError, "Missing key {err} in foreignKey config '{config}'.")
396 def fromConfig(cls, config: Config) -> ForeignKeySpec:
397 """Create a `ForeignKeySpec` from a subset of a `SchemaConfig`.
399 Parameters
400 ----------
401 config: `Config`
402 Configuration describing the constraint. Nested configuration keys
403 correspond to `ForeignKeySpec` attributes.
405 Returns
406 -------
407 spec: `ForeignKeySpec`
408 Specification structure for the constraint.
410 Raises
411 ------
412 SchemaValidationError
413 Raised if configuration keys are missing or have invalid values.
414 """
415 return cls(table=config["table"],
416 source=tuple(iterable(config["source"])),
417 target=tuple(iterable(config["target"])),
418 onDelete=config.get("onDelete", None))
421@dataclass
422class TableSpec:
423 """A data class used to define a table or table-like query interface.
425 Parameters
426 ----------
427 fields : `Iterable` [ `FieldSpec` ]
428 Specifications for the columns in this table.
429 unique : `Iterable` [ `tuple` [ `str` ] ], optional
430 Non-primary-key unique constraints for the table.
431 indexes: `Iterable` [ `tuple` [ `str` ] ], optional
432 Indexes for the table.
433 foreignKeys : `Iterable` [ `ForeignKeySpec` ], optional
434 Foreign key constraints for the table.
435 exclusion : `Iterable` [ `tuple` [ `str` or `type` ] ]
436 Special constraints that prohibit overlaps between timespans over rows
437 where other columns are equal. These take the same form as unique
438 constraints, but each tuple may contain a single
439 `TimespanDatabaseRepresentation` subclass representing a timespan
440 column.
441 recycleIds : `bool`, optional
442 If `True`, allow databases that might normally recycle autoincrement
443 IDs to do so (usually better for performance) on any autoincrement
444 field in this table.
445 doc : `str`, optional
446 Documentation for the table.
447 """
449 def __init__(
450 self, fields: Iterable[FieldSpec], *,
451 unique: Iterable[Tuple[str, ...]] = (),
452 indexes: Iterable[Tuple[str, ...]] = (),
453 foreignKeys: Iterable[ForeignKeySpec] = (),
454 exclusion: Iterable[Tuple[Union[str, Type[TimespanDatabaseRepresentation]], ...]] = (),
455 recycleIds: bool = True,
456 doc: Optional[str] = None,
457 ):
458 self.fields = NamedValueSet(fields)
459 self.unique = set(unique)
460 self.indexes = set(indexes)
461 self.foreignKeys = list(foreignKeys)
462 self.exclusion = set(exclusion)
463 self.recycleIds = recycleIds
464 self.doc = doc
466 fields: NamedValueSet[FieldSpec]
467 """Specifications for the columns in this table."""
469 unique: Set[Tuple[str, ...]]
470 """Non-primary-key unique constraints for the table."""
472 indexes: Set[Tuple[str, ...]]
473 """Indexes for the table."""
475 foreignKeys: List[ForeignKeySpec]
476 """Foreign key constraints for the table."""
478 exclusion: Set[Tuple[Union[str, Type[TimespanDatabaseRepresentation]], ...]]
479 """Exclusion constraints for the table.
481 Exclusion constraints behave mostly like unique constraints, but may
482 contain a database-native Timespan column that is restricted to not overlap
483 across rows (for identical combinations of any non-Timespan columns in the
484 constraint).
485 """
487 recycleIds: bool = True
488 """If `True`, allow databases that might normally recycle autoincrement IDs
489 to do so (usually better for performance) on any autoincrement field in
490 this table.
491 """
493 doc: Optional[str] = None
494 """Documentation for the table."""
496 @classmethod
497 @SchemaValidationError.translate(KeyError, "Missing key {err} in table config '{config}'.")
498 def fromConfig(cls, config: Config) -> TableSpec:
499 """Create a `ForeignKeySpec` from a subset of a `SchemaConfig`.
501 Parameters
502 ----------
503 config: `Config`
504 Configuration describing the constraint. Nested configuration keys
505 correspond to `TableSpec` attributes.
507 Returns
508 -------
509 spec: `TableSpec`
510 Specification structure for the table.
512 Raises
513 ------
514 SchemaValidationError
515 Raised if configuration keys are missing or have invalid values.
516 """
517 return cls(
518 fields=NamedValueSet(FieldSpec.fromConfig(c) for c in config["columns"]),
519 unique={tuple(u) for u in config.get("unique", ())},
520 foreignKeys=[ForeignKeySpec.fromConfig(c) for c in config.get("foreignKeys", ())],
521 doc=stripIfNotNone(config.get("doc")),
522 )