Coverage for python/lsst/daf/butler/ddl.py: 55%
230 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-27 09:44 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-27 09:44 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27"""Classes for representing SQL data-definition language (DDL) in Python.
29This include "CREATE TABLE" etc.
31This provides an extra layer on top of SQLAlchemy's classes for these concepts,
32because we need a level of indirection between logical tables and the actual
33SQL, and SQLAlchemy's DDL classes always map 1-1 to SQL.
35We've opted for the rather more obscure "ddl" as the name of this module
36instead of "schema" because the latter is too overloaded; in most SQL
37databases, a "schema" is also another term for a namespace.
38"""
39from __future__ import annotations
41from lsst import sphgeom
43__all__ = (
44 "TableSpec",
45 "FieldSpec",
46 "ForeignKeySpec",
47 "IndexSpec",
48 "Base64Bytes",
49 "Base64Region",
50 "AstropyTimeNsecTai",
51 "GUID",
52)
54import logging
55import uuid
56from base64 import b64decode, b64encode
57from collections.abc import Callable, Iterable
58from dataclasses import dataclass
59from math import ceil
60from typing import TYPE_CHECKING, Any
62import astropy.time
63import sqlalchemy
64from lsst.sphgeom import Region
65from lsst.utils.iteration import ensure_iterable
66from sqlalchemy.dialects.postgresql import UUID
68from . import time_utils
69from ._config import Config
70from ._exceptions import ValidationError
71from ._named import NamedValueSet
72from .utils import stripIfNotNone
74if TYPE_CHECKING:
75 from ._timespan import TimespanDatabaseRepresentation
78_LOG = logging.getLogger(__name__)
81class SchemaValidationError(ValidationError):
82 """Exceptions that indicate problems in Registry schema configuration."""
84 @classmethod
85 def translate(cls, caught: type[Exception], message: str) -> Callable:
86 """Return decorator to re-raise exceptions as `SchemaValidationError`.
88 Decorated functions must be class or instance methods, with a
89 ``config`` parameter as their first argument. This will be passed
90 to ``message.format()`` as a keyword argument, along with ``err``,
91 the original exception.
93 Parameters
94 ----------
95 caught : `type` (`Exception` subclass)
96 The type of exception to catch.
97 message : `str`
98 A `str.format` string that may contain named placeholders for
99 ``config``, ``err``, or any keyword-only argument accepted by
100 the decorated function.
101 """
103 def decorate(func: Callable) -> Callable:
104 def decorated(self: Any, config: Config, *args: Any, **kwargs: Any) -> Any:
105 try:
106 return func(self, config, *args, **kwargs)
107 except caught as err:
108 raise cls(message.format(config=str(config), err=err)) from err
110 return decorated
112 return decorate
115class Base64Bytes(sqlalchemy.TypeDecorator):
116 """A SQLAlchemy custom type for Python `bytes`.
118 Maps Python `bytes` to a base64-encoded `sqlalchemy.Text` field.
119 """
121 impl = sqlalchemy.Text
123 cache_ok = True
125 def __init__(self, nbytes: int | None = None, *args: Any, **kwargs: Any):
126 if nbytes is not None:
127 length = 4 * ceil(nbytes / 3) if self.impl is sqlalchemy.String else None
128 else:
129 length = None
130 super().__init__(*args, length=length, **kwargs)
131 self.nbytes = nbytes
133 def process_bind_param(self, value: bytes | None, dialect: sqlalchemy.engine.Dialect) -> str | None:
134 # 'value' is native `bytes`. We want to encode that to base64 `bytes`
135 # and then ASCII `str`, because `str` is what SQLAlchemy expects for
136 # String fields.
137 if value is None:
138 return None
139 if not isinstance(value, bytes):
140 raise TypeError(
141 f"Base64Bytes fields require 'bytes' values; got '{value}' with type {type(value)}."
142 )
143 return b64encode(value).decode("ascii")
145 def process_result_value(self, value: str | None, dialect: sqlalchemy.engine.Dialect) -> bytes | None:
146 # 'value' is a `str` that must be ASCII because it's base64-encoded.
147 # We want to transform that to base64-encoded `bytes` and then
148 # native `bytes`.
149 return b64decode(value.encode("ascii")) if value is not None else None
151 @property
152 def python_type(self) -> type[bytes]:
153 return bytes
156# create an alias, for use below to disambiguate between the built in
157# sqlachemy type
158LocalBase64Bytes = Base64Bytes
161class Base64Region(Base64Bytes):
162 """A SQLAlchemy custom type for Python `sphgeom.Region`.
164 Maps Python `sphgeom.Region` to a base64-encoded `sqlalchemy.String`.
165 """
167 cache_ok = True # have to be set explicitly in each class
169 def process_bind_param(self, value: Region | None, dialect: sqlalchemy.engine.Dialect) -> str | None:
170 if value is None:
171 return None
172 return super().process_bind_param(value.encode(), dialect)
174 def process_result_value(self, value: str | None, dialect: sqlalchemy.engine.Dialect) -> Region | None:
175 if value is None:
176 return None
177 return Region.decode(super().process_result_value(value, dialect))
179 @property
180 def python_type(self) -> type[sphgeom.Region]:
181 return sphgeom.Region
184class AstropyTimeNsecTai(sqlalchemy.TypeDecorator):
185 """A SQLAlchemy custom type for Python `astropy.time.Time`.
187 Maps Python `astropy.time.Time` to a number of nanoseconds since Unix
188 epoch in TAI scale.
189 """
191 impl = sqlalchemy.BigInteger
193 cache_ok = True
195 def process_bind_param(
196 self, value: astropy.time.Time | None, dialect: sqlalchemy.engine.Dialect
197 ) -> int | None:
198 if value is None:
199 return None
200 if not isinstance(value, astropy.time.Time):
201 raise TypeError(f"Unsupported type: {type(value)}, expected astropy.time.Time")
202 value = time_utils.TimeConverter().astropy_to_nsec(value)
203 return value
205 def process_result_value(
206 self, value: int | None, dialect: sqlalchemy.engine.Dialect
207 ) -> astropy.time.Time | None:
208 # value is nanoseconds since epoch, or None
209 if value is None:
210 return None
211 value = time_utils.TimeConverter().nsec_to_astropy(value)
212 return value
215# TODO: sqlalchemy 2 has internal support for UUID:
216# https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Uuid
217class GUID(sqlalchemy.TypeDecorator):
218 """Platform-independent GUID type.
220 Uses PostgreSQL's UUID type, otherwise uses CHAR(32), storing as
221 stringified hex values.
222 """
224 impl = sqlalchemy.CHAR
226 cache_ok = True
228 def load_dialect_impl(self, dialect: sqlalchemy.Dialect) -> sqlalchemy.types.TypeEngine:
229 if dialect.name == "postgresql":
230 return dialect.type_descriptor(UUID())
231 else:
232 return dialect.type_descriptor(sqlalchemy.CHAR(32))
234 def process_bind_param(self, value: Any, dialect: sqlalchemy.Dialect) -> str | None:
235 if value is None:
236 return value
238 # Coerce input to UUID type, in general having UUID on input is the
239 # only thing that we want but there is code right now that uses ints.
240 if isinstance(value, int):
241 value = uuid.UUID(int=value)
242 elif isinstance(value, bytes):
243 value = uuid.UUID(bytes=value)
244 elif isinstance(value, str):
245 # hexstring
246 value = uuid.UUID(hex=value)
247 elif not isinstance(value, uuid.UUID):
248 raise TypeError(f"Unexpected type of a bind value: {type(value)}")
250 if dialect.name == "postgresql":
251 return str(value)
252 else:
253 return "%.32x" % value.int
255 def process_result_value(
256 self, value: str | uuid.UUID | None, dialect: sqlalchemy.Dialect
257 ) -> uuid.UUID | None:
258 if value is None or isinstance(value, uuid.UUID):
259 # sqlalchemy 2 converts to UUID internally
260 return value
261 else:
262 return uuid.UUID(hex=value)
265VALID_CONFIG_COLUMN_TYPES = {
266 "string": sqlalchemy.String,
267 "int": sqlalchemy.BigInteger,
268 "float": sqlalchemy.Float,
269 "region": Base64Region,
270 "bool": sqlalchemy.Boolean,
271 "blob": sqlalchemy.LargeBinary,
272 "datetime": AstropyTimeNsecTai,
273 "hash": Base64Bytes,
274 "uuid": GUID,
275}
278@dataclass
279class FieldSpec:
280 """A data class for defining a column in a logical `Registry` table."""
282 name: str
283 """Name of the column."""
285 dtype: type
286 """Type of the column; usually a `type` subclass provided by SQLAlchemy
287 that defines both a Python type and a corresponding precise SQL type.
288 """
290 length: int | None = None
291 """Length of the type in the database, for variable-length types."""
293 nbytes: int | None = None
294 """Natural length used for hash and encoded-region columns, to be converted
295 into the post-encoding length.
296 """
298 primaryKey: bool = False
299 """Whether this field is (part of) its table's primary key."""
301 autoincrement: bool = False
302 """Whether the database should insert automatically incremented values when
303 no value is provided in an INSERT.
304 """
306 nullable: bool = True
307 """Whether this field is allowed to be NULL. If ``primaryKey`` is
308 `True`, during construction this value will be forced to `False`."""
310 default: Any = None
311 """A server-side default value for this field.
313 This is passed directly as the ``server_default`` argument to
314 `sqlalchemy.schema.Column`. It does _not_ go through SQLAlchemy's usual
315 type conversion or quoting for Python literals, and should hence be used
316 with care. See the SQLAlchemy documentation for more information.
317 """
319 doc: str | None = None
320 """Documentation for this field."""
322 def __post_init__(self) -> None:
323 if self.primaryKey:
324 # Change the default to match primaryKey.
325 self.nullable = False
327 def __eq__(self, other: Any) -> bool:
328 if isinstance(other, FieldSpec):
329 return self.name == other.name
330 else:
331 return NotImplemented
333 def __hash__(self) -> int:
334 return hash(self.name)
336 @classmethod
337 @SchemaValidationError.translate(KeyError, "Missing key {err} in column config '{config}'.")
338 def fromConfig(cls, config: Config, **kwargs: Any) -> FieldSpec:
339 """Create a `FieldSpec` from a subset of a `SchemaConfig`.
341 Parameters
342 ----------
343 config: `Config`
344 Configuration describing the column. Nested configuration keys
345 correspond to `FieldSpec` attributes.
346 **kwargs
347 Additional keyword arguments that provide defaults for values
348 not present in config.
350 Returns
351 -------
352 spec: `FieldSpec`
353 Specification structure for the column.
355 Raises
356 ------
357 SchemaValidationError
358 Raised if configuration keys are missing or have invalid values.
359 """
360 dtype = VALID_CONFIG_COLUMN_TYPES.get(config["type"])
361 if dtype is None:
362 raise SchemaValidationError(f"Invalid field type string: '{config['type']}'.")
363 if not config["name"].islower():
364 raise SchemaValidationError(f"Column name '{config['name']}' is not all lowercase.")
365 self = cls(name=config["name"], dtype=dtype, **kwargs)
366 self.length = config.get("length", self.length)
367 self.nbytes = config.get("nbytes", self.nbytes)
368 if self.length is not None and self.nbytes is not None:
369 raise SchemaValidationError(f"Both length and nbytes provided for field '{self.name}'.")
370 self.primaryKey = config.get("primaryKey", self.primaryKey)
371 self.autoincrement = config.get("autoincrement", self.autoincrement)
372 self.nullable = config.get("nullable", False if self.primaryKey else self.nullable)
373 self.doc = stripIfNotNone(config.get("doc", None))
374 return self
376 @classmethod
377 def for_region(cls, name: str = "region", nullable: bool = True, nbytes: int = 2048) -> FieldSpec:
378 """Create a `FieldSpec` for a spatial region column.
380 Parameters
381 ----------
382 name : `str`, optional
383 Name for the field.
384 nullable : `bool`, optional
385 Whether NULL values are permitted.
386 nbytes : `int`, optional
387 Maximum number of bytes for serialized regions. The actual column
388 size will be larger to allow for base-64 encoding.
390 Returns
391 -------
392 spec : `FieldSpec`
393 Specification structure for a region column.
394 """
395 return cls(name, nullable=nullable, dtype=Base64Region, nbytes=nbytes)
397 def isStringType(self) -> bool:
398 """Indicate that this is a sqlalchemy.String field spec.
400 Returns
401 -------
402 isString : `bool`
403 The field refers to a `sqlalchemy.String` and not any other type.
404 This can return `False` even if the object was created with a
405 string type if it has been decided that it should be implemented
406 as a `sqlalchemy.Text` type.
407 """
408 # For short strings retain them as strings
409 if self.dtype is sqlalchemy.String and self.length and self.length <= 32:
410 return True
411 return False
413 def getSizedColumnType(self) -> sqlalchemy.types.TypeEngine | type:
414 """Return a sized version of the column type.
416 Utilizes either (or neither) of ``self.length`` and ``self.nbytes``.
418 Returns
419 -------
420 dtype : `sqlalchemy.types.TypeEngine`
421 A SQLAlchemy column type object.
422 """
423 if self.length is not None:
424 # Last chance check that we are only looking at possible String
425 if self.dtype is sqlalchemy.String and not self.isStringType():
426 return sqlalchemy.Text
427 return self.dtype(length=self.length)
428 if self.nbytes is not None:
429 return self.dtype(nbytes=self.nbytes)
430 return self.dtype
432 def getPythonType(self) -> type:
433 """Return the Python type associated with this field's (SQL) dtype.
435 Returns
436 -------
437 type : `type`
438 Python type associated with this field's (SQL) `dtype`.
439 """
440 # to construct these objects, nbytes keyword is needed
441 if issubclass(self.dtype, LocalBase64Bytes):
442 # satisfy mypy for something that must be true
443 assert self.nbytes is not None
444 return self.dtype(nbytes=self.nbytes).python_type
445 else:
446 return self.dtype().python_type # type: ignore
449@dataclass
450class ForeignKeySpec:
451 """Definition of a foreign key constraint in a logical `Registry` table."""
453 table: str
454 """Name of the target table."""
456 source: tuple[str, ...]
457 """Tuple of source table column names."""
459 target: tuple[str, ...]
460 """Tuple of target table column names."""
462 onDelete: str | None = None
463 """SQL clause indicating how to handle deletes to the target table.
465 If not `None` (which indicates that a constraint violation exception should
466 be raised), should be either "SET NULL" or "CASCADE".
467 """
469 addIndex: bool = True
470 """If `True`, create an index on the columns of this foreign key in the
471 source table.
472 """
474 @classmethod
475 @SchemaValidationError.translate(KeyError, "Missing key {err} in foreignKey config '{config}'.")
476 def fromConfig(cls, config: Config) -> ForeignKeySpec:
477 """Create a `ForeignKeySpec` from a subset of a `SchemaConfig`.
479 Parameters
480 ----------
481 config: `Config`
482 Configuration describing the constraint. Nested configuration keys
483 correspond to `ForeignKeySpec` attributes.
485 Returns
486 -------
487 spec: `ForeignKeySpec`
488 Specification structure for the constraint.
490 Raises
491 ------
492 SchemaValidationError
493 Raised if configuration keys are missing or have invalid values.
494 """
495 return cls(
496 table=config["table"],
497 source=tuple(ensure_iterable(config["source"])),
498 target=tuple(ensure_iterable(config["target"])),
499 onDelete=config.get("onDelete", None),
500 )
503@dataclass(frozen=True)
504class IndexSpec:
505 """Specification of an index on table columns.
507 Parameters
508 ----------
509 *columns : `str`
510 Names of the columns to index.
511 **kwargs: `Any`
512 Additional keyword arguments to pass directly to
513 `sqlalchemy.schema.Index` constructor. This could be used to provide
514 backend-specific options, e.g. to create a ``GIST`` index in PostgreSQL
515 one can pass ``postgresql_using="gist"``.
516 """
518 def __init__(self, *columns: str, **kwargs: Any):
519 object.__setattr__(self, "columns", tuple(columns))
520 object.__setattr__(self, "kwargs", kwargs)
522 def __hash__(self) -> int:
523 return hash(self.columns)
525 columns: tuple[str, ...]
526 """Column names to include in the index (`Tuple` [ `str` ])."""
528 kwargs: dict[str, Any]
529 """Additional keyword arguments passed directly to
530 `sqlalchemy.schema.Index` constructor (`dict` [ `str`, `Any` ]).
531 """
534@dataclass
535class TableSpec:
536 """A data class used to define a table or table-like query interface.
538 Parameters
539 ----------
540 fields : `~collections.abc.Iterable` [ `FieldSpec` ]
541 Specifications for the columns in this table.
542 unique : `~collections.abc.Iterable` [ `tuple` [ `str` ] ], optional
543 Non-primary-key unique constraints for the table.
544 indexes: `~collections.abc.Iterable` [ `IndexSpec` ], optional
545 Indexes for the table.
546 foreignKeys : `~collections.abc.Iterable` [ `ForeignKeySpec` ], optional
547 Foreign key constraints for the table.
548 exclusion : `~collections.abc.Iterable` [ `tuple` [ `str` or `type` ] ]
549 Special constraints that prohibit overlaps between timespans over rows
550 where other columns are equal. These take the same form as unique
551 constraints, but each tuple may contain a single
552 `TimespanDatabaseRepresentation` subclass representing a timespan
553 column.
554 recycleIds : `bool`, optional
555 If `True`, allow databases that might normally recycle autoincrement
556 IDs to do so (usually better for performance) on any autoincrement
557 field in this table.
558 doc : `str`, optional
559 Documentation for the table.
560 """
562 def __init__(
563 self,
564 fields: Iterable[FieldSpec],
565 *,
566 unique: Iterable[tuple[str, ...]] = (),
567 indexes: Iterable[IndexSpec] = (),
568 foreignKeys: Iterable[ForeignKeySpec] = (),
569 exclusion: Iterable[tuple[str | type[TimespanDatabaseRepresentation], ...]] = (),
570 recycleIds: bool = True,
571 doc: str | None = None,
572 ):
573 self.fields = NamedValueSet(fields)
574 self.unique = set(unique)
575 self.indexes = set(indexes)
576 self.foreignKeys = list(foreignKeys)
577 self.exclusion = set(exclusion)
578 self.recycleIds = recycleIds
579 self.doc = doc
581 fields: NamedValueSet[FieldSpec]
582 """Specifications for the columns in this table."""
584 unique: set[tuple[str, ...]]
585 """Non-primary-key unique constraints for the table."""
587 indexes: set[IndexSpec]
588 """Indexes for the table."""
590 foreignKeys: list[ForeignKeySpec]
591 """Foreign key constraints for the table."""
593 exclusion: set[tuple[str | type[TimespanDatabaseRepresentation], ...]]
594 """Exclusion constraints for the table.
596 Exclusion constraints behave mostly like unique constraints, but may
597 contain a database-native Timespan column that is restricted to not overlap
598 across rows (for identical combinations of any non-Timespan columns in the
599 constraint).
600 """
602 recycleIds: bool = True
603 """If `True`, allow databases that might normally recycle autoincrement IDs
604 to do so (usually better for performance) on any autoincrement field in
605 this table.
606 """
608 doc: str | None = None
609 """Documentation for the table."""
611 @classmethod
612 @SchemaValidationError.translate(KeyError, "Missing key {err} in table config '{config}'.")
613 def fromConfig(cls, config: Config) -> TableSpec:
614 """Create a `ForeignKeySpec` from a subset of a `SchemaConfig`.
616 Parameters
617 ----------
618 config: `Config`
619 Configuration describing the constraint. Nested configuration keys
620 correspond to `TableSpec` attributes.
622 Returns
623 -------
624 spec: `TableSpec`
625 Specification structure for the table.
627 Raises
628 ------
629 SchemaValidationError
630 Raised if configuration keys are missing or have invalid values.
631 """
632 return cls(
633 fields=NamedValueSet(FieldSpec.fromConfig(c) for c in config["columns"]),
634 unique={tuple(u) for u in config.get("unique", ())},
635 foreignKeys=[ForeignKeySpec.fromConfig(c) for c in config.get("foreignKeys", ())],
636 doc=stripIfNotNone(config.get("doc")),
637 )