Coverage for python/lsst/daf/butler/core/ddl.py: 50%
232 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-04-13 02:34 -0700
« prev ^ index » next coverage.py v6.5.0, created at 2023-04-13 02:34 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21"""Classes for representing SQL data-definition language (DDL) in Python.
23This include "CREATE TABLE" etc.
25This provides an extra layer on top of SQLAlchemy's classes for these concepts,
26because we need a level of indirection between logical tables and the actual
27SQL, and SQLAlchemy's DDL classes always map 1-1 to SQL.
29We've opted for the rather more obscure "ddl" as the name of this module
30instead of "schema" because the latter is too overloaded; in most SQL
31databases, a "schema" is also another term for a namespace.
32"""
33from __future__ import annotations
35from lsst import sphgeom
37__all__ = (
38 "TableSpec",
39 "FieldSpec",
40 "ForeignKeySpec",
41 "IndexSpec",
42 "Base64Bytes",
43 "Base64Region",
44 "AstropyTimeNsecTai",
45 "GUID",
46)
48import logging
49import uuid
50from base64 import b64decode, b64encode
51from dataclasses import dataclass
52from math import ceil
53from typing import TYPE_CHECKING, Any, Callable, Iterable, List, Optional, Set, Tuple, Type, Union
55import astropy.time
56import sqlalchemy
57from lsst.sphgeom import Region
58from lsst.utils.iteration import ensure_iterable
59from sqlalchemy.dialects.postgresql import UUID
61from . import time_utils
62from .config import Config
63from .exceptions import ValidationError
64from .named import NamedValueSet
65from .utils import stripIfNotNone
67if TYPE_CHECKING:
68 from .timespan import TimespanDatabaseRepresentation
71_LOG = logging.getLogger(__name__)
74class SchemaValidationError(ValidationError):
75 """Exceptions that indicate problems in Registry schema configuration."""
77 @classmethod
78 def translate(cls, caught: Type[Exception], message: str) -> Callable:
79 """Return decorator to re-raise exceptions as `SchemaValidationError`.
81 Decorated functions must be class or instance methods, with a
82 ``config`` parameter as their first argument. This will be passed
83 to ``message.format()`` as a keyword argument, along with ``err``,
84 the original exception.
86 Parameters
87 ----------
88 caught : `type` (`Exception` subclass)
89 The type of exception to catch.
90 message : `str`
91 A `str.format` string that may contain named placeholders for
92 ``config``, ``err``, or any keyword-only argument accepted by
93 the decorated function.
94 """
96 def decorate(func: Callable) -> Callable:
97 def decorated(self: Any, config: Config, *args: Any, **kwargs: Any) -> Any:
98 try:
99 return func(self, config, *args, **kwargs)
100 except caught as err:
101 raise cls(message.format(config=str(config), err=err))
103 return decorated
105 return decorate
108class Base64Bytes(sqlalchemy.TypeDecorator):
109 """A SQLAlchemy custom type for Python `bytes`.
111 Maps Python `bytes` to a base64-encoded `sqlalchemy.Text` field.
112 """
114 impl = sqlalchemy.Text
116 cache_ok = True
118 def __init__(self, nbytes: int | None = None, *args: Any, **kwargs: Any):
119 if nbytes is not None:
120 length = 4 * ceil(nbytes / 3) if self.impl == sqlalchemy.String else None
121 else:
122 length = None
123 super().__init__(*args, length=length, **kwargs)
124 self.nbytes = nbytes
126 def process_bind_param(self, value: Optional[bytes], dialect: sqlalchemy.engine.Dialect) -> Optional[str]:
127 # 'value' is native `bytes`. We want to encode that to base64 `bytes`
128 # and then ASCII `str`, because `str` is what SQLAlchemy expects for
129 # String fields.
130 if value is None:
131 return None
132 if not isinstance(value, bytes):
133 raise TypeError(
134 f"Base64Bytes fields require 'bytes' values; got '{value}' with type {type(value)}."
135 )
136 return b64encode(value).decode("ascii")
138 def process_result_value(
139 self, value: Optional[str], dialect: sqlalchemy.engine.Dialect
140 ) -> Optional[bytes]:
141 # 'value' is a `str` that must be ASCII because it's base64-encoded.
142 # We want to transform that to base64-encoded `bytes` and then
143 # native `bytes`.
144 return b64decode(value.encode("ascii")) if value is not None else None
146 @property
147 def python_type(self) -> Type[bytes]:
148 return bytes
151# create an alias, for use below to disambiguate between the built in
152# sqlachemy type
153LocalBase64Bytes = Base64Bytes
156class Base64Region(Base64Bytes):
157 """A SQLAlchemy custom type for Python `sphgeom.Region`.
159 Maps Python `sphgeom.Region` to a base64-encoded `sqlalchemy.String`.
160 """
162 cache_ok = True # have to be set explicitly in each class
164 def process_bind_param(
165 self, value: Optional[Region], dialect: sqlalchemy.engine.Dialect
166 ) -> Optional[str]:
167 if value is None:
168 return None
169 return super().process_bind_param(value.encode(), dialect)
171 def process_result_value(
172 self, value: Optional[str], dialect: sqlalchemy.engine.Dialect
173 ) -> Optional[Region]:
174 if value is None:
175 return None
176 return Region.decode(super().process_result_value(value, dialect))
178 @property
179 def python_type(self) -> Type[sphgeom.Region]:
180 return sphgeom.Region
183class AstropyTimeNsecTai(sqlalchemy.TypeDecorator):
184 """A SQLAlchemy custom type for Python `astropy.time.Time`.
186 Maps Python `astropy.time.Time` to a number of nanoseconds since Unix
187 epoch in TAI scale.
188 """
190 impl = sqlalchemy.BigInteger
192 cache_ok = True
194 def process_bind_param(
195 self, value: Optional[astropy.time.Time], dialect: sqlalchemy.engine.Dialect
196 ) -> Optional[int]:
197 if value is None:
198 return None
199 if not isinstance(value, astropy.time.Time):
200 raise TypeError(f"Unsupported type: {type(value)}, expected astropy.time.Time")
201 value = time_utils.TimeConverter().astropy_to_nsec(value)
202 return value
204 def process_result_value(
205 self, value: Optional[int], dialect: sqlalchemy.engine.Dialect
206 ) -> Optional[astropy.time.Time]:
207 # value is nanoseconds since epoch, or None
208 if value is None:
209 return None
210 value = time_utils.TimeConverter().nsec_to_astropy(value)
211 return value
214# TODO: sqlalchemy 2 has internal support for UUID:
215# https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Uuid
216class GUID(sqlalchemy.TypeDecorator):
217 """Platform-independent GUID type.
219 Uses PostgreSQL's UUID type, otherwise uses CHAR(32), storing as
220 stringified hex values.
221 """
223 impl = sqlalchemy.CHAR
225 cache_ok = True
227 def load_dialect_impl(self, dialect: sqlalchemy.Dialect) -> sqlalchemy.types.TypeEngine:
228 if dialect.name == "postgresql":
229 return dialect.type_descriptor(UUID())
230 else:
231 return dialect.type_descriptor(sqlalchemy.CHAR(32))
233 def process_bind_param(self, value: Any, dialect: sqlalchemy.Dialect) -> Optional[str]:
234 if value is None:
235 return value
237 # Coerce input to UUID type, in general having UUID on input is the
238 # only thing that we want but there is code right now that uses ints.
239 if isinstance(value, int):
240 value = uuid.UUID(int=value)
241 elif isinstance(value, bytes):
242 value = uuid.UUID(bytes=value)
243 elif isinstance(value, str):
244 # hexstring
245 value = uuid.UUID(hex=value)
246 elif not isinstance(value, uuid.UUID):
247 raise TypeError(f"Unexpected type of a bind value: {type(value)}")
249 if dialect.name == "postgresql":
250 return str(value)
251 else:
252 return "%.32x" % value.int
254 def process_result_value(
255 self, value: str | uuid.UUID | None, dialect: sqlalchemy.Dialect
256 ) -> Optional[uuid.UUID]:
257 if value is None:
258 return value
259 elif isinstance(value, uuid.UUID):
260 # sqlalchemy 2 converts to UUID internally
261 return value
262 else:
263 return uuid.UUID(hex=value)
266VALID_CONFIG_COLUMN_TYPES = {
267 "string": sqlalchemy.String,
268 "int": sqlalchemy.BigInteger,
269 "float": sqlalchemy.Float,
270 "region": Base64Region,
271 "bool": sqlalchemy.Boolean,
272 "blob": sqlalchemy.LargeBinary,
273 "datetime": AstropyTimeNsecTai,
274 "hash": Base64Bytes,
275 "uuid": GUID,
276}
279@dataclass
280class FieldSpec:
281 """A data class for defining a column in a logical `Registry` table."""
283 name: str
284 """Name of the column."""
286 dtype: type
287 """Type of the column; usually a `type` subclass provided by SQLAlchemy
288 that defines both a Python type and a corresponding precise SQL type.
289 """
291 length: Optional[int] = None
292 """Length of the type in the database, for variable-length types."""
294 nbytes: Optional[int] = None
295 """Natural length used for hash and encoded-region columns, to be converted
296 into the post-encoding length.
297 """
299 primaryKey: bool = False
300 """Whether this field is (part of) its table's primary key."""
302 autoincrement: bool = False
303 """Whether the database should insert automatically incremented values when
304 no value is provided in an INSERT.
305 """
307 nullable: bool = True
308 """Whether this field is allowed to be NULL. If ``primaryKey`` is
309 `True`, during construction this value will be forced to `False`."""
311 default: Any = None
312 """A server-side default value for this field.
314 This is passed directly as the ``server_default`` argument to
315 `sqlalchemy.schema.Column`. It does _not_ go through SQLAlchemy's usual
316 type conversion or quoting for Python literals, and should hence be used
317 with care. See the SQLAlchemy documentation for more information.
318 """
320 doc: Optional[str] = None
321 """Documentation for this field."""
323 def __post_init__(self) -> None:
324 if self.primaryKey:
325 # Change the default to match primaryKey.
326 self.nullable = False
328 def __eq__(self, other: Any) -> bool:
329 if isinstance(other, FieldSpec):
330 return self.name == other.name
331 else:
332 return NotImplemented
334 def __hash__(self) -> int:
335 return hash(self.name)
337 @classmethod
338 @SchemaValidationError.translate(KeyError, "Missing key {err} in column config '{config}'.")
339 def fromConfig(cls, config: Config, **kwargs: Any) -> FieldSpec:
340 """Create a `FieldSpec` from a subset of a `SchemaConfig`.
342 Parameters
343 ----------
344 config: `Config`
345 Configuration describing the column. Nested configuration keys
346 correspond to `FieldSpec` attributes.
347 **kwargs
348 Additional keyword arguments that provide defaults for values
349 not present in config.
351 Returns
352 -------
353 spec: `FieldSpec`
354 Specification structure for the column.
356 Raises
357 ------
358 SchemaValidationError
359 Raised if configuration keys are missing or have invalid values.
360 """
361 dtype = VALID_CONFIG_COLUMN_TYPES.get(config["type"])
362 if dtype is None:
363 raise SchemaValidationError(f"Invalid field type string: '{config['type']}'.")
364 if not config["name"].islower():
365 raise SchemaValidationError(f"Column name '{config['name']}' is not all lowercase.")
366 self = cls(name=config["name"], dtype=dtype, **kwargs)
367 self.length = config.get("length", self.length)
368 self.nbytes = config.get("nbytes", self.nbytes)
369 if self.length is not None and self.nbytes is not None:
370 raise SchemaValidationError(f"Both length and nbytes provided for field '{self.name}'.")
371 self.primaryKey = config.get("primaryKey", self.primaryKey)
372 self.autoincrement = config.get("autoincrement", self.autoincrement)
373 self.nullable = config.get("nullable", False if self.primaryKey else self.nullable)
374 self.doc = stripIfNotNone(config.get("doc", None))
375 return self
377 @classmethod
378 def for_region(cls, name: str = "region", nullable: bool = True, nbytes: int = 2048) -> FieldSpec:
379 """Create a `FieldSpec` for a spatial region column.
381 Parameters
382 ----------
383 name : `str`, optional
384 Name for the field.
385 nullable : `bool`, optional
386 Whether NULL values are permitted.
387 nbytes : `int`, optional
388 Maximum number of bytes for serialized regions. The actual column
389 size will be larger to allow for base-64 encoding.
391 Returns
392 -------
393 spec : `FieldSpec`
394 Specification structure for a region column.
395 """
396 return cls(name, nullable=nullable, dtype=Base64Region, nbytes=nbytes)
398 def isStringType(self) -> bool:
399 """Indicate that this is a sqlalchemy.String field spec.
401 Returns
402 -------
403 isString : `bool`
404 The field refers to a `sqlalchemy.String` and not any other type.
405 This can return `False` even if the object was created with a
406 string type if it has been decided that it should be implemented
407 as a `sqlalchemy.Text` type.
408 """
409 if self.dtype == sqlalchemy.String:
410 # For short strings retain them as strings
411 if self.dtype == sqlalchemy.String and self.length and self.length <= 32:
412 return True
413 return False
415 def getSizedColumnType(self) -> sqlalchemy.types.TypeEngine | type:
416 """Return a sized version of the column type.
418 Utilizes either (or neither) of ``self.length`` and ``self.nbytes``.
420 Returns
421 -------
422 dtype : `sqlalchemy.types.TypeEngine`
423 A SQLAlchemy column type object.
424 """
425 if self.length is not None:
426 # Last chance check that we are only looking at possible String
427 if self.dtype == sqlalchemy.String and not self.isStringType():
428 return sqlalchemy.Text
429 return self.dtype(length=self.length)
430 if self.nbytes is not None:
431 return self.dtype(nbytes=self.nbytes)
432 return self.dtype
434 def getPythonType(self) -> type:
435 """Return the Python type associated with this field's (SQL) dtype.
437 Returns
438 -------
439 type : `type`
440 Python type associated with this field's (SQL) `dtype`.
441 """
442 # to construct these objects, nbytes keyword is needed
443 if issubclass(self.dtype, LocalBase64Bytes):
444 # satisfy mypy for something that must be true
445 assert self.nbytes is not None
446 return self.dtype(nbytes=self.nbytes).python_type
447 else:
448 return self.dtype().python_type # type: ignore
451@dataclass
452class ForeignKeySpec:
453 """Definition of a foreign key constraint in a logical `Registry` table."""
455 table: str
456 """Name of the target table."""
458 source: Tuple[str, ...]
459 """Tuple of source table column names."""
461 target: Tuple[str, ...]
462 """Tuple of target table column names."""
464 onDelete: Optional[str] = None
465 """SQL clause indicating how to handle deletes to the target table.
467 If not `None` (which indicates that a constraint violation exception should
468 be raised), should be either "SET NULL" or "CASCADE".
469 """
471 addIndex: bool = True
472 """If `True`, create an index on the columns of this foreign key in the
473 source table.
474 """
476 @classmethod
477 @SchemaValidationError.translate(KeyError, "Missing key {err} in foreignKey config '{config}'.")
478 def fromConfig(cls, config: Config) -> ForeignKeySpec:
479 """Create a `ForeignKeySpec` from a subset of a `SchemaConfig`.
481 Parameters
482 ----------
483 config: `Config`
484 Configuration describing the constraint. Nested configuration keys
485 correspond to `ForeignKeySpec` attributes.
487 Returns
488 -------
489 spec: `ForeignKeySpec`
490 Specification structure for the constraint.
492 Raises
493 ------
494 SchemaValidationError
495 Raised if configuration keys are missing or have invalid values.
496 """
497 return cls(
498 table=config["table"],
499 source=tuple(ensure_iterable(config["source"])),
500 target=tuple(ensure_iterable(config["target"])),
501 onDelete=config.get("onDelete", None),
502 )
505@dataclass(frozen=True)
506class IndexSpec:
507 """Specification of an index on table columns.
509 Parameters
510 ----------
511 *columns : `str`
512 Names of the columns to index.
513 **kwargs: `Any`
514 Additional keyword arguments to pass directly to
515 `sqlalchemy.schema.Index` constructor. This could be used to provide
516 backend-specific options, e.g. to create a ``GIST`` index in PostgreSQL
517 one can pass ``postgresql_using="gist"``.
518 """
520 def __init__(self, *columns: str, **kwargs: Any):
521 object.__setattr__(self, "columns", tuple(columns))
522 object.__setattr__(self, "kwargs", kwargs)
524 def __hash__(self) -> int:
525 return hash(self.columns)
527 columns: Tuple[str, ...]
528 """Column names to include in the index (`Tuple` [ `str` ])."""
530 kwargs: dict[str, Any]
531 """Additional keyword arguments passed directly to
532 `sqlalchemy.schema.Index` constructor (`dict` [ `str`, `Any` ]).
533 """
536@dataclass
537class TableSpec:
538 """A data class used to define a table or table-like query interface.
540 Parameters
541 ----------
542 fields : `Iterable` [ `FieldSpec` ]
543 Specifications for the columns in this table.
544 unique : `Iterable` [ `tuple` [ `str` ] ], optional
545 Non-primary-key unique constraints for the table.
546 indexes: `Iterable` [ `IndexSpec` ], optional
547 Indexes for the table.
548 foreignKeys : `Iterable` [ `ForeignKeySpec` ], optional
549 Foreign key constraints for the table.
550 exclusion : `Iterable` [ `tuple` [ `str` or `type` ] ]
551 Special constraints that prohibit overlaps between timespans over rows
552 where other columns are equal. These take the same form as unique
553 constraints, but each tuple may contain a single
554 `TimespanDatabaseRepresentation` subclass representing a timespan
555 column.
556 recycleIds : `bool`, optional
557 If `True`, allow databases that might normally recycle autoincrement
558 IDs to do so (usually better for performance) on any autoincrement
559 field in this table.
560 doc : `str`, optional
561 Documentation for the table.
562 """
564 def __init__(
565 self,
566 fields: Iterable[FieldSpec],
567 *,
568 unique: Iterable[Tuple[str, ...]] = (),
569 indexes: Iterable[IndexSpec] = (),
570 foreignKeys: Iterable[ForeignKeySpec] = (),
571 exclusion: Iterable[Tuple[Union[str, Type[TimespanDatabaseRepresentation]], ...]] = (),
572 recycleIds: bool = True,
573 doc: Optional[str] = None,
574 ):
575 self.fields = NamedValueSet(fields)
576 self.unique = set(unique)
577 self.indexes = set(indexes)
578 self.foreignKeys = list(foreignKeys)
579 self.exclusion = set(exclusion)
580 self.recycleIds = recycleIds
581 self.doc = doc
583 fields: NamedValueSet[FieldSpec]
584 """Specifications for the columns in this table."""
586 unique: Set[Tuple[str, ...]]
587 """Non-primary-key unique constraints for the table."""
589 indexes: Set[IndexSpec]
590 """Indexes for the table."""
592 foreignKeys: List[ForeignKeySpec]
593 """Foreign key constraints for the table."""
595 exclusion: Set[Tuple[Union[str, Type[TimespanDatabaseRepresentation]], ...]]
596 """Exclusion constraints for the table.
598 Exclusion constraints behave mostly like unique constraints, but may
599 contain a database-native Timespan column that is restricted to not overlap
600 across rows (for identical combinations of any non-Timespan columns in the
601 constraint).
602 """
604 recycleIds: bool = True
605 """If `True`, allow databases that might normally recycle autoincrement IDs
606 to do so (usually better for performance) on any autoincrement field in
607 this table.
608 """
610 doc: Optional[str] = None
611 """Documentation for the table."""
613 @classmethod
614 @SchemaValidationError.translate(KeyError, "Missing key {err} in table config '{config}'.")
615 def fromConfig(cls, config: Config) -> TableSpec:
616 """Create a `ForeignKeySpec` from a subset of a `SchemaConfig`.
618 Parameters
619 ----------
620 config: `Config`
621 Configuration describing the constraint. Nested configuration keys
622 correspond to `TableSpec` attributes.
624 Returns
625 -------
626 spec: `TableSpec`
627 Specification structure for the table.
629 Raises
630 ------
631 SchemaValidationError
632 Raised if configuration keys are missing or have invalid values.
633 """
634 return cls(
635 fields=NamedValueSet(FieldSpec.fromConfig(c) for c in config["columns"]),
636 unique={tuple(u) for u in config.get("unique", ())},
637 foreignKeys=[ForeignKeySpec.fromConfig(c) for c in config.get("foreignKeys", ())],
638 doc=stripIfNotNone(config.get("doc")),
639 )