Coverage for python/lsst/daf/butler/core/ddl.py: 51%
216 statements
« prev ^ index » next coverage.py v6.4.4, created at 2022-08-31 04:05 -0700
« prev ^ index » next coverage.py v6.4.4, created at 2022-08-31 04:05 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21"""Classes for representing SQL data-definition language (DDL) in Python.
23This include "CREATE TABLE" etc.
25This provides an extra layer on top of SQLAlchemy's classes for these concepts,
26because we need a level of indirection between logical tables and the actual
27SQL, and SQLAlchemy's DDL classes always map 1-1 to SQL.
29We've opted for the rather more obscure "ddl" as the name of this module
30instead of "schema" because the latter is too overloaded; in most SQL
31databases, a "schema" is also another term for a namespace.
32"""
33from __future__ import annotations
35from lsst import sphgeom
37__all__ = (
38 "TableSpec",
39 "FieldSpec",
40 "ForeignKeySpec",
41 "Base64Bytes",
42 "Base64Region",
43 "AstropyTimeNsecTai",
44 "GUID",
45)
47import logging
48import uuid
49from base64 import b64decode, b64encode
50from dataclasses import dataclass
51from math import ceil
52from typing import TYPE_CHECKING, Any, Callable, Iterable, List, Optional, Set, Tuple, Type, Union
54import astropy.time
55import sqlalchemy
56from lsst.sphgeom import Region
57from lsst.utils.iteration import ensure_iterable
58from sqlalchemy.dialects.postgresql import UUID
60from . import time_utils
61from .config import Config
62from .exceptions import ValidationError
63from .named import NamedValueSet
64from .utils import stripIfNotNone
66if TYPE_CHECKING: 66 ↛ 67line 66 didn't jump to line 67, because the condition on line 66 was never true
67 from .timespan import TimespanDatabaseRepresentation
70_LOG = logging.getLogger(__name__)
73class SchemaValidationError(ValidationError):
74 """Exceptions that indicate problems in Registry schema configuration."""
76 @classmethod
77 def translate(cls, caught: Type[Exception], message: str) -> Callable:
78 """Return decorator to re-raise exceptions as `SchemaValidationError`.
80 Decorated functions must be class or instance methods, with a
81 ``config`` parameter as their first argument. This will be passed
82 to ``message.format()`` as a keyword argument, along with ``err``,
83 the original exception.
85 Parameters
86 ----------
87 caught : `type` (`Exception` subclass)
88 The type of exception to catch.
89 message : `str`
90 A `str.format` string that may contain named placeholders for
91 ``config``, ``err``, or any keyword-only argument accepted by
92 the decorated function.
93 """
95 def decorate(func: Callable) -> Callable:
96 def decorated(self: Any, config: Config, *args: Any, **kwargs: Any) -> Any:
97 try:
98 return func(self, config, *args, **kwargs)
99 except caught as err:
100 raise cls(message.format(config=str(config), err=err))
102 return decorated
104 return decorate
107class Base64Bytes(sqlalchemy.TypeDecorator):
108 """A SQLAlchemy custom type for Python `bytes`.
110 Maps Python `bytes` to a base64-encoded `sqlalchemy.Text` field.
111 """
113 impl = sqlalchemy.Text
115 cache_ok = True
117 def __init__(self, nbytes: int, *args: Any, **kwargs: Any):
118 length = 4 * ceil(nbytes / 3) if self.impl == sqlalchemy.String else None
119 super().__init__(*args, length=length, **kwargs)
120 self.nbytes = nbytes
122 def process_bind_param(self, value: Optional[bytes], dialect: sqlalchemy.engine.Dialect) -> Optional[str]:
123 # 'value' is native `bytes`. We want to encode that to base64 `bytes`
124 # and then ASCII `str`, because `str` is what SQLAlchemy expects for
125 # String fields.
126 if value is None:
127 return None
128 if not isinstance(value, bytes):
129 raise TypeError(
130 f"Base64Bytes fields require 'bytes' values; got '{value}' with type {type(value)}."
131 )
132 return b64encode(value).decode("ascii")
134 def process_result_value(
135 self, value: Optional[str], dialect: sqlalchemy.engine.Dialect
136 ) -> Optional[bytes]:
137 # 'value' is a `str` that must be ASCII because it's base64-encoded.
138 # We want to transform that to base64-encoded `bytes` and then
139 # native `bytes`.
140 return b64decode(value.encode("ascii")) if value is not None else None
142 @property
143 def python_type(self) -> Type[bytes]:
144 return bytes
147# create an alias, for use below to disambiguate between the built in
148# sqlachemy type
149LocalBase64Bytes = Base64Bytes
152class Base64Region(Base64Bytes):
153 """A SQLAlchemy custom type for Python `sphgeom.Region`.
155 Maps Python `sphgeom.Region` to a base64-encoded `sqlalchemy.String`.
156 """
158 cache_ok = True # have to be set explicitly in each class
160 def process_bind_param(
161 self, value: Optional[Region], dialect: sqlalchemy.engine.Dialect
162 ) -> Optional[str]:
163 if value is None:
164 return None
165 return super().process_bind_param(value.encode(), dialect)
167 def process_result_value(
168 self, value: Optional[str], dialect: sqlalchemy.engine.Dialect
169 ) -> Optional[Region]:
170 if value is None:
171 return None
172 return Region.decode(super().process_result_value(value, dialect))
174 @property
175 def python_type(self) -> Type[sphgeom.Region]:
176 return sphgeom.Region
179class AstropyTimeNsecTai(sqlalchemy.TypeDecorator):
180 """A SQLAlchemy custom type for Python `astropy.time.Time`.
182 Maps Python `astropy.time.Time` to a number of nanoseconds since Unix
183 epoch in TAI scale.
184 """
186 impl = sqlalchemy.BigInteger
188 cache_ok = True
190 def process_bind_param(
191 self, value: Optional[astropy.time.Time], dialect: sqlalchemy.engine.Dialect
192 ) -> Optional[int]:
193 if value is None:
194 return None
195 if not isinstance(value, astropy.time.Time):
196 raise TypeError(f"Unsupported type: {type(value)}, expected astropy.time.Time")
197 value = time_utils.TimeConverter().astropy_to_nsec(value)
198 return value
200 def process_result_value(
201 self, value: Optional[int], dialect: sqlalchemy.engine.Dialect
202 ) -> Optional[astropy.time.Time]:
203 # value is nanoseconds since epoch, or None
204 if value is None:
205 return None
206 value = time_utils.TimeConverter().nsec_to_astropy(value)
207 return value
210class GUID(sqlalchemy.TypeDecorator):
211 """Platform-independent GUID type.
213 Uses PostgreSQL's UUID type, otherwise uses CHAR(32), storing as
214 stringified hex values.
215 """
217 impl = sqlalchemy.CHAR
219 cache_ok = True
221 def load_dialect_impl(self, dialect: sqlalchemy.Dialect) -> sqlalchemy.TypeEngine:
222 if dialect.name == "postgresql":
223 return dialect.type_descriptor(UUID())
224 else:
225 return dialect.type_descriptor(sqlalchemy.CHAR(32))
227 def process_bind_param(self, value: Any, dialect: sqlalchemy.Dialect) -> Optional[str]:
228 if value is None:
229 return value
231 # Coerce input to UUID type, in general having UUID on input is the
232 # only thing that we want but there is code right now that uses ints.
233 if isinstance(value, int):
234 value = uuid.UUID(int=value)
235 elif isinstance(value, bytes):
236 value = uuid.UUID(bytes=value)
237 elif isinstance(value, str):
238 # hexstring
239 value = uuid.UUID(hex=value)
240 elif not isinstance(value, uuid.UUID):
241 raise TypeError(f"Unexpected type of a bind value: {type(value)}")
243 if dialect.name == "postgresql":
244 return str(value)
245 else:
246 return "%.32x" % value.int
248 def process_result_value(self, value: Optional[str], dialect: sqlalchemy.Dialect) -> Optional[uuid.UUID]:
249 if value is None:
250 return value
251 else:
252 return uuid.UUID(hex=value)
255VALID_CONFIG_COLUMN_TYPES = {
256 "string": sqlalchemy.String,
257 "int": sqlalchemy.BigInteger,
258 "float": sqlalchemy.Float,
259 "region": Base64Region,
260 "bool": sqlalchemy.Boolean,
261 "blob": sqlalchemy.LargeBinary,
262 "datetime": AstropyTimeNsecTai,
263 "hash": Base64Bytes,
264 "uuid": GUID,
265}
268@dataclass
269class FieldSpec:
270 """A data class for defining a column in a logical `Registry` table."""
272 name: str
273 """Name of the column."""
275 dtype: type
276 """Type of the column; usually a `type` subclass provided by SQLAlchemy
277 that defines both a Python type and a corresponding precise SQL type.
278 """
280 length: Optional[int] = None
281 """Length of the type in the database, for variable-length types."""
283 nbytes: Optional[int] = None
284 """Natural length used for hash and encoded-region columns, to be converted
285 into the post-encoding length.
286 """
288 primaryKey: bool = False
289 """Whether this field is (part of) its table's primary key."""
291 autoincrement: bool = False
292 """Whether the database should insert automatically incremented values when
293 no value is provided in an INSERT.
294 """
296 nullable: bool = True
297 """Whether this field is allowed to be NULL. If ``primaryKey`` is
298 `True`, during construction this value will be forced to `False`."""
300 default: Any = None
301 """A server-side default value for this field.
303 This is passed directly as the ``server_default`` argument to
304 `sqlalchemy.schema.Column`. It does _not_ go through SQLAlchemy's usual
305 type conversion or quoting for Python literals, and should hence be used
306 with care. See the SQLAlchemy documentation for more information.
307 """
309 doc: Optional[str] = None
310 """Documentation for this field."""
312 def __post_init__(self) -> None:
313 if self.primaryKey:
314 # Change the default to match primaryKey.
315 self.nullable = False
317 def __eq__(self, other: Any) -> bool:
318 if isinstance(other, FieldSpec):
319 return self.name == other.name
320 else:
321 return NotImplemented
323 def __hash__(self) -> int:
324 return hash(self.name)
326 @classmethod
327 @SchemaValidationError.translate(KeyError, "Missing key {err} in column config '{config}'.")
328 def fromConfig(cls, config: Config, **kwargs: Any) -> FieldSpec:
329 """Create a `FieldSpec` from a subset of a `SchemaConfig`.
331 Parameters
332 ----------
333 config: `Config`
334 Configuration describing the column. Nested configuration keys
335 correspond to `FieldSpec` attributes.
336 **kwargs
337 Additional keyword arguments that provide defaults for values
338 not present in config.
340 Returns
341 -------
342 spec: `FieldSpec`
343 Specification structure for the column.
345 Raises
346 ------
347 SchemaValidationError
348 Raised if configuration keys are missing or have invalid values.
349 """
350 dtype = VALID_CONFIG_COLUMN_TYPES.get(config["type"])
351 if dtype is None:
352 raise SchemaValidationError(f"Invalid field type string: '{config['type']}'.")
353 if not config["name"].islower():
354 raise SchemaValidationError(f"Column name '{config['name']}' is not all lowercase.")
355 self = cls(name=config["name"], dtype=dtype, **kwargs)
356 self.length = config.get("length", self.length)
357 self.nbytes = config.get("nbytes", self.nbytes)
358 if self.length is not None and self.nbytes is not None:
359 raise SchemaValidationError(f"Both length and nbytes provided for field '{self.name}'.")
360 self.primaryKey = config.get("primaryKey", self.primaryKey)
361 self.autoincrement = config.get("autoincrement", self.autoincrement)
362 self.nullable = config.get("nullable", False if self.primaryKey else self.nullable)
363 self.doc = stripIfNotNone(config.get("doc", None))
364 return self
366 def isStringType(self) -> bool:
367 """Indicate that this is a sqlalchemy.String field spec.
369 Returns
370 -------
371 isString : `bool`
372 The field refers to a `sqlalchemy.String` and not any other type.
373 This can return `False` even if the object was created with a
374 string type if it has been decided that it should be implemented
375 as a `sqlalchemy.Text` type.
376 """
377 if self.dtype == sqlalchemy.String:
378 # For short strings retain them as strings
379 if self.dtype == sqlalchemy.String and self.length and self.length <= 32:
380 return True
381 return False
383 def getSizedColumnType(self) -> sqlalchemy.types.TypeEngine:
384 """Return a sized version of the column type.
386 Utilizes either (or neither) of ``self.length`` and ``self.nbytes``.
388 Returns
389 -------
390 dtype : `sqlalchemy.types.TypeEngine`
391 A SQLAlchemy column type object.
392 """
393 if self.length is not None:
394 # Last chance check that we are only looking at possible String
395 if self.dtype == sqlalchemy.String and not self.isStringType():
396 return sqlalchemy.Text
397 return self.dtype(length=self.length)
398 if self.nbytes is not None:
399 return self.dtype(nbytes=self.nbytes)
400 return self.dtype
402 def getPythonType(self) -> type:
403 """Return the Python type associated with this field's (SQL) dtype.
405 Returns
406 -------
407 type : `type`
408 Python type associated with this field's (SQL) `dtype`.
409 """
410 # to construct these objects, nbytes keyword is needed
411 if issubclass(self.dtype, LocalBase64Bytes):
412 # satisfy mypy for something that must be true
413 assert self.nbytes is not None
414 return self.dtype(nbytes=self.nbytes).python_type
415 else:
416 return self.dtype().python_type # type: ignore
419@dataclass
420class ForeignKeySpec:
421 """Definition of a foreign key constraint in a logical `Registry` table."""
423 table: str
424 """Name of the target table."""
426 source: Tuple[str, ...]
427 """Tuple of source table column names."""
429 target: Tuple[str, ...]
430 """Tuple of target table column names."""
432 onDelete: Optional[str] = None
433 """SQL clause indicating how to handle deletes to the target table.
435 If not `None` (which indicates that a constraint violation exception should
436 be raised), should be either "SET NULL" or "CASCADE".
437 """
439 addIndex: bool = True
440 """If `True`, create an index on the columns of this foreign key in the
441 source table.
442 """
444 @classmethod
445 @SchemaValidationError.translate(KeyError, "Missing key {err} in foreignKey config '{config}'.")
446 def fromConfig(cls, config: Config) -> ForeignKeySpec:
447 """Create a `ForeignKeySpec` from a subset of a `SchemaConfig`.
449 Parameters
450 ----------
451 config: `Config`
452 Configuration describing the constraint. Nested configuration keys
453 correspond to `ForeignKeySpec` attributes.
455 Returns
456 -------
457 spec: `ForeignKeySpec`
458 Specification structure for the constraint.
460 Raises
461 ------
462 SchemaValidationError
463 Raised if configuration keys are missing or have invalid values.
464 """
465 return cls(
466 table=config["table"],
467 source=tuple(ensure_iterable(config["source"])),
468 target=tuple(ensure_iterable(config["target"])),
469 onDelete=config.get("onDelete", None),
470 )
473@dataclass
474class TableSpec:
475 """A data class used to define a table or table-like query interface.
477 Parameters
478 ----------
479 fields : `Iterable` [ `FieldSpec` ]
480 Specifications for the columns in this table.
481 unique : `Iterable` [ `tuple` [ `str` ] ], optional
482 Non-primary-key unique constraints for the table.
483 indexes: `Iterable` [ `tuple` [ `str` ] ], optional
484 Indexes for the table.
485 foreignKeys : `Iterable` [ `ForeignKeySpec` ], optional
486 Foreign key constraints for the table.
487 exclusion : `Iterable` [ `tuple` [ `str` or `type` ] ]
488 Special constraints that prohibit overlaps between timespans over rows
489 where other columns are equal. These take the same form as unique
490 constraints, but each tuple may contain a single
491 `TimespanDatabaseRepresentation` subclass representing a timespan
492 column.
493 recycleIds : `bool`, optional
494 If `True`, allow databases that might normally recycle autoincrement
495 IDs to do so (usually better for performance) on any autoincrement
496 field in this table.
497 doc : `str`, optional
498 Documentation for the table.
499 """
501 def __init__(
502 self,
503 fields: Iterable[FieldSpec],
504 *,
505 unique: Iterable[Tuple[str, ...]] = (),
506 indexes: Iterable[Tuple[str, ...]] = (),
507 foreignKeys: Iterable[ForeignKeySpec] = (),
508 exclusion: Iterable[Tuple[Union[str, Type[TimespanDatabaseRepresentation]], ...]] = (),
509 recycleIds: bool = True,
510 doc: Optional[str] = None,
511 ):
512 self.fields = NamedValueSet(fields)
513 self.unique = set(unique)
514 self.indexes = set(indexes)
515 self.foreignKeys = list(foreignKeys)
516 self.exclusion = set(exclusion)
517 self.recycleIds = recycleIds
518 self.doc = doc
520 fields: NamedValueSet[FieldSpec]
521 """Specifications for the columns in this table."""
523 unique: Set[Tuple[str, ...]]
524 """Non-primary-key unique constraints for the table."""
526 indexes: Set[Tuple[str, ...]]
527 """Indexes for the table."""
529 foreignKeys: List[ForeignKeySpec]
530 """Foreign key constraints for the table."""
532 exclusion: Set[Tuple[Union[str, Type[TimespanDatabaseRepresentation]], ...]]
533 """Exclusion constraints for the table.
535 Exclusion constraints behave mostly like unique constraints, but may
536 contain a database-native Timespan column that is restricted to not overlap
537 across rows (for identical combinations of any non-Timespan columns in the
538 constraint).
539 """
541 recycleIds: bool = True
542 """If `True`, allow databases that might normally recycle autoincrement IDs
543 to do so (usually better for performance) on any autoincrement field in
544 this table.
545 """
547 doc: Optional[str] = None
548 """Documentation for the table."""
550 @classmethod
551 @SchemaValidationError.translate(KeyError, "Missing key {err} in table config '{config}'.")
552 def fromConfig(cls, config: Config) -> TableSpec:
553 """Create a `ForeignKeySpec` from a subset of a `SchemaConfig`.
555 Parameters
556 ----------
557 config: `Config`
558 Configuration describing the constraint. Nested configuration keys
559 correspond to `TableSpec` attributes.
561 Returns
562 -------
563 spec: `TableSpec`
564 Specification structure for the table.
566 Raises
567 ------
568 SchemaValidationError
569 Raised if configuration keys are missing or have invalid values.
570 """
571 return cls(
572 fields=NamedValueSet(FieldSpec.fromConfig(c) for c in config["columns"]),
573 unique={tuple(u) for u in config.get("unique", ())},
574 foreignKeys=[ForeignKeySpec.fromConfig(c) for c in config.get("foreignKeys", ())],
575 doc=stripIfNotNone(config.get("doc")),
576 )