Coverage for python/lsst/daf/butler/core/ddl.py : 49%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21"""Classes for representing SQL data-definition language (DDL; "CREATE TABLE",
22etc.) in Python.
24This provides an extra layer on top of SQLAlchemy's classes for these concepts,
25because we need a level of indirection between logical tables and the actual
26SQL, and SQLAlchemy's DDL classes always map 1-1 to SQL.
28We've opted for the rather more obscure "ddl" as the name of this module
29instead of "schema" because the latter is too overloaded; in most SQL
30databases, a "schema" is also another term for a namespace.
31"""
32from __future__ import annotations
34__all__ = ("TableSpec", "FieldSpec", "ForeignKeySpec", "Base64Bytes", "Base64Region",
35 "AstropyTimeNsecTai")
37from base64 import b64encode, b64decode
38import logging
39from math import ceil
40from dataclasses import dataclass
41from typing import Optional, Tuple, Sequence, Set
43import sqlalchemy
44import astropy.time
46from lsst.sphgeom import ConvexPolygon
47from .config import Config
48from .exceptions import ValidationError
49from . import time_utils
50from .utils import iterable, stripIfNotNone, NamedValueSet
53_LOG = logging.getLogger(__name__)
56class SchemaValidationError(ValidationError):
57 """Exceptions used to indicate problems in Registry schema configuration.
58 """
60 @classmethod
61 def translate(cls, caught, message):
62 """A decorator that re-raises exceptions as `SchemaValidationError`.
64 Decorated functions must be class or instance methods, with a
65 ``config`` parameter as their first argument. This will be passed
66 to ``message.format()`` as a keyword argument, along with ``err``,
67 the original exception.
69 Parameters
70 ----------
71 caught : `type` (`Exception` subclass)
72 The type of exception to catch.
73 message : `str`
74 A `str.format` string that may contain named placeholders for
75 ``config``, ``err``, or any keyword-only argument accepted by
76 the decorated function.
77 """
78 def decorate(func):
79 def decorated(self, config, *args, **kwds):
80 try:
81 return func(self, config, *args, **kwds)
82 except caught as err:
83 raise cls(message.format(config=str(config), err=err))
84 return decorated
85 return decorate
88class Base64Bytes(sqlalchemy.TypeDecorator):
89 """A SQLAlchemy custom type that maps Python `bytes` to a base64-encoded
90 `sqlalchemy.String`.
91 """
93 impl = sqlalchemy.String
95 def __init__(self, nbytes, *args, **kwds):
96 length = 4*ceil(nbytes/3)
97 super().__init__(*args, length=length, **kwds)
98 self.nbytes = nbytes
100 def process_bind_param(self, value, dialect):
101 # 'value' is native `bytes`. We want to encode that to base64 `bytes`
102 # and then ASCII `str`, because `str` is what SQLAlchemy expects for
103 # String fields.
104 if value is None:
105 return None
106 if not isinstance(value, bytes):
107 raise TypeError(
108 f"Base64Bytes fields require 'bytes' values; got '{value}' with type {type(value)}."
109 )
110 return b64encode(value).decode("ascii")
112 def process_result_value(self, value, dialect):
113 # 'value' is a `str` that must be ASCII because it's base64-encoded.
114 # We want to transform that to base64-encoded `bytes` and then
115 # native `bytes`.
116 return b64decode(value.encode("ascii")) if value is not None else None
119class Base64Region(Base64Bytes):
120 """A SQLAlchemy custom type that maps Python `sphgeom.ConvexPolygon` to a
121 base64-encoded `sqlalchemy.String`.
122 """
124 def process_bind_param(self, value, dialect):
125 if value is None:
126 return None
127 return super().process_bind_param(value.encode(), dialect)
129 def process_result_value(self, value, dialect):
130 if value is None:
131 return None
132 return ConvexPolygon.decode(super().process_result_value(value, dialect))
135class AstropyTimeNsecTai(sqlalchemy.TypeDecorator):
136 """A SQLAlchemy custom type that maps Python `astropy.time.Time` to a
137 number of nanoseconds since Unix epoch in TAI scale.
138 """
140 impl = sqlalchemy.BigInteger
142 def process_bind_param(self, value, dialect):
143 # value is astropy.time.Time or None
144 if value is None:
145 return None
146 if not isinstance(value, astropy.time.Time):
147 raise TypeError(f"Unsupported type: {type(value)}, expected astropy.time.Time")
148 value = time_utils.astropy_to_nsec(value)
149 return value
151 def process_result_value(self, value, dialect):
152 # value is nanoseconds since epoch, or None
153 if value is None:
154 return None
155 value = time_utils.nsec_to_astropy(value)
156 return value
159VALID_CONFIG_COLUMN_TYPES = {
160 "string": sqlalchemy.String,
161 "int": sqlalchemy.Integer,
162 "float": sqlalchemy.Float,
163 "region": Base64Region,
164 "bool": sqlalchemy.Boolean,
165 "blob": sqlalchemy.LargeBinary,
166 "datetime": AstropyTimeNsecTai,
167 "hash": Base64Bytes
168}
171@dataclass
172class FieldSpec:
173 """A struct-like class used to define a column in a logical `Registry`
174 table.
175 """
177 name: str
178 """Name of the column."""
180 dtype: type
181 """Type of the column; usually a `type` subclass provided by SQLAlchemy
182 that defines both a Python type and a corresponding precise SQL type.
183 """
185 length: Optional[int] = None
186 """Length of the type in the database, for variable-length types."""
188 nbytes: Optional[int] = None
189 """Natural length used for hash and encoded-region columns, to be converted
190 into the post-encoding length.
191 """
193 primaryKey: bool = False
194 """Whether this field is (part of) its table's primary key."""
196 autoincrement: bool = False
197 """Whether the database should insert automatically incremented values when
198 no value is provided in an INSERT.
199 """
201 nullable: bool = True
202 """Whether this field is allowed to be NULL."""
204 doc: Optional[str] = None
205 """Documentation for this field."""
207 def __eq__(self, other):
208 return self.name == other.name
210 def __hash__(self):
211 return hash(self.name)
213 @classmethod
214 @SchemaValidationError.translate(KeyError, "Missing key {err} in column config '{config}'.")
215 def fromConfig(cls, config: Config, **kwds) -> FieldSpec:
216 """Create a `FieldSpec` from a subset of a `SchemaConfig`.
218 Parameters
219 ----------
220 config: `Config`
221 Configuration describing the column. Nested configuration keys
222 correspond to `FieldSpec` attributes.
223 kwds
224 Additional keyword arguments that provide defaults for values
225 not present in config.
227 Returns
228 -------
229 spec: `FieldSpec`
230 Specification structure for the column.
232 Raises
233 ------
234 SchemaValidationError
235 Raised if configuration keys are missing or have invalid values.
236 """
237 dtype = VALID_CONFIG_COLUMN_TYPES.get(config["type"])
238 if dtype is None:
239 raise SchemaValidationError(f"Invalid field type string: '{config['type']}'.")
240 if not config["name"].islower():
241 raise SchemaValidationError(f"Column name '{config['name']}' is not all lowercase.")
242 self = cls(name=config["name"], dtype=dtype, **kwds)
243 self.length = config.get("length", self.length)
244 self.nbytes = config.get("nbytes", self.nbytes)
245 if self.length is not None and self.nbytes is not None:
246 raise SchemaValidationError(f"Both length and nbytes provided for field '{self.name}'.")
247 self.primaryKey = config.get("primaryKey", self.primaryKey)
248 self.autoincrement = config.get("autoincrement", self.autoincrement)
249 self.nullable = config.get("nullable", False if self.primaryKey else self.nullable)
250 self.doc = stripIfNotNone(config.get("doc", None))
251 return self
253 def getSizedColumnType(self) -> sqlalchemy.types.TypeEngine:
254 """Return a sized version of the column type, utilizing either (or
255 neither) of ``self.length`` and ``self.nbytes``.
257 Returns
258 -------
259 dtype : `sqlalchemy.types.TypeEngine`
260 A SQLAlchemy column type object.
261 """
262 if self.length is not None:
263 return self.dtype(length=self.length)
264 if self.nbytes is not None:
265 return self.dtype(nbytes=self.nbytes)
266 return self.dtype
269@dataclass
270class ForeignKeySpec:
271 """A struct-like class used to define a foreign key constraint in a logical
272 `Registry` table.
273 """
275 table: str
276 """Name of the target table."""
278 source: Tuple[str, ...]
279 """Tuple of source table column names."""
281 target: Tuple[str, ...]
282 """Tuple of target table column names."""
284 onDelete: Optional[str] = None
285 """SQL clause indicating how to handle deletes to the target table.
287 If not `None` (which indicates that a constraint violation exception should
288 be raised), should be either "SET NULL" or "CASCADE".
289 """
291 addIndex: bool = True
292 """If `True`, create an index on the columns of this foreign key in the
293 source table.
294 """
296 @classmethod
297 @SchemaValidationError.translate(KeyError, "Missing key {err} in foreignKey config '{config}'.")
298 def fromConfig(cls, config: Config) -> ForeignKeySpec:
299 """Create a `ForeignKeySpec` from a subset of a `SchemaConfig`.
301 Parameters
302 ----------
303 config: `Config`
304 Configuration describing the constraint. Nested configuration keys
305 correspond to `ForeignKeySpec` attributes.
307 Returns
308 -------
309 spec: `ForeignKeySpec`
310 Specification structure for the constraint.
312 Raises
313 ------
314 SchemaValidationError
315 Raised if configuration keys are missing or have invalid values.
316 """
317 return cls(table=config["table"],
318 source=tuple(iterable(config["source"])),
319 target=tuple(iterable(config["target"])),
320 onDelete=config.get("onDelete", None))
323@dataclass
324class TableSpec:
325 """A struct-like class used to define a table or table-like
326 query interface.
327 """
329 fields: NamedValueSet[FieldSpec]
330 """Specifications for the columns in this table."""
332 unique: Set[Tuple[str, ...]] = frozenset()
333 """Non-primary-key unique constraints for the table."""
335 indexes: Set[Tuple[str, ...]] = frozenset()
336 """Indexes for the table."""
338 foreignKeys: Sequence[ForeignKeySpec] = tuple()
339 """Foreign key constraints for the table."""
341 recycleIds: bool = True
342 """If `True`, allow databases that might normally recycle autoincrement IDs
343 to do so (usually better for performance) on any autoincrement field in
344 this table.
345 """
347 doc: Optional[str] = None
348 """Documentation for the table."""
350 def __post_init__(self):
351 self.fields = NamedValueSet(self.fields)
352 self.unique = set(self.unique)
353 self.indexes = set(self.indexes)
354 self.foreignKeys = list(self.foreignKeys)
356 @classmethod
357 @SchemaValidationError.translate(KeyError, "Missing key {err} in table config '{config}'.")
358 def fromConfig(cls, config: Config) -> TableSpec:
359 """Create a `ForeignKeySpec` from a subset of a `SchemaConfig`.
361 Parameters
362 ----------
363 config: `Config`
364 Configuration describing the constraint. Nested configuration keys
365 correspond to `TableSpec` attributes.
367 Returns
368 -------
369 spec: `TableSpec`
370 Specification structure for the table.
372 Raises
373 ------
374 SchemaValidationError
375 Raised if configuration keys are missing or have invalid values.
376 """
377 return cls(
378 fields=NamedValueSet(FieldSpec.fromConfig(c) for c in config["columns"]),
379 unique={tuple(u) for u in config.get("unique", ())},
380 foreignKeys=[ForeignKeySpec.fromConfig(c) for c in config.get("foreignKeys", ())],
381 sql=config.get("sql"),
382 doc=stripIfNotNone(config.get("doc")),
383 )