Coverage for python/lsst/daf/butler/core/ddl.py : 52%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21"""Classes for representing SQL data-definition language (DDL; "CREATE TABLE",
22etc.) in Python.
24This provides an extra layer on top of SQLAlchemy's classes for these concepts,
25because we need a level of indirection between logical tables and the actual
26SQL, and SQLAlchemy's DDL classes always map 1-1 to SQL.
28We've opted for the rather more obscure "ddl" as the name of this module
29instead of "schema" because the latter is too overloaded; in most SQL
30databases, a "schema" is also another term for a namespace.
31"""
32from __future__ import annotations
34__all__ = ("TableSpec", "FieldSpec", "ForeignKeySpec", "Base64Bytes", "Base64Region")
36from base64 import b64encode, b64decode
37from math import ceil
38from dataclasses import dataclass
39from typing import Optional, Tuple, Sequence, Set
41import sqlalchemy
43from lsst.sphgeom import ConvexPolygon
44from .config import Config
45from .exceptions import ValidationError
46from .utils import iterable, stripIfNotNone, NamedValueSet
49class SchemaValidationError(ValidationError):
50 """Exceptions used to indicate problems in Registry schema configuration.
51 """
53 @classmethod
54 def translate(cls, caught, message):
55 """A decorator that re-raises exceptions as `SchemaValidationError`.
57 Decorated functions must be class or instance methods, with a
58 ``config`` parameter as their first argument. This will be passed
59 to ``message.format()`` as a keyword argument, along with ``err``,
60 the original exception.
62 Parameters
63 ----------
64 caught : `type` (`Exception` subclass)
65 The type of exception to catch.
66 message : `str`
67 A `str.format` string that may contain named placeholders for
68 ``config``, ``err``, or any keyword-only argument accepted by
69 the decorated function.
70 """
71 def decorate(func):
72 def decorated(self, config, *args, **kwds):
73 try:
74 return func(self, config, *args, **kwds)
75 except caught as err:
76 raise cls(message.format(config=str(config), err=err))
77 return decorated
78 return decorate
81class Base64Bytes(sqlalchemy.TypeDecorator):
82 """A SQLAlchemy custom type that maps Python `bytes` to a base64-encoded
83 `sqlalchemy.String`.
84 """
86 impl = sqlalchemy.String
88 def __init__(self, nbytes, *args, **kwds):
89 length = 4*ceil(nbytes/3)
90 super().__init__(*args, length=length, **kwds)
91 self.nbytes = nbytes
93 def process_bind_param(self, value, dialect):
94 # 'value' is native `bytes`. We want to encode that to base64 `bytes`
95 # and then ASCII `str`, because `str` is what SQLAlchemy expects for
96 # String fields.
97 if value is None:
98 return None
99 if not isinstance(value, bytes):
100 raise TypeError(
101 f"Base64Bytes fields require 'bytes' values; got '{value}' with type {type(value)}."
102 )
103 return b64encode(value).decode("ascii")
105 def process_result_value(self, value, dialect):
106 # 'value' is a `str` that must be ASCII because it's base64-encoded.
107 # We want to transform that to base64-encoded `bytes` and then
108 # native `bytes`.
109 return b64decode(value.encode("ascii")) if value is not None else None
112class Base64Region(Base64Bytes):
113 """A SQLAlchemy custom type that maps Python `sphgeom.ConvexPolygon` to a
114 base64-encoded `sqlalchemy.String`.
115 """
117 def process_bind_param(self, value, dialect):
118 if value is None:
119 return None
120 return super().process_bind_param(value.encode(), dialect)
122 def process_result_value(self, value, dialect):
123 if value is None:
124 return None
125 return ConvexPolygon.decode(super().process_result_value(value, dialect))
128VALID_CONFIG_COLUMN_TYPES = {
129 "string": sqlalchemy.String,
130 "int": sqlalchemy.Integer,
131 "float": sqlalchemy.Float,
132 "region": Base64Region,
133 "bool": sqlalchemy.Boolean,
134 "blob": sqlalchemy.LargeBinary,
135 "datetime": sqlalchemy.DateTime,
136 "hash": Base64Bytes
137}
140@dataclass
141class FieldSpec:
142 """A struct-like class used to define a column in a logical `Registry`
143 table.
144 """
146 name: str
147 """Name of the column."""
149 dtype: type
150 """Type of the column; usually a `type` subclass provided by SQLAlchemy
151 that defines both a Python type and a corresponding precise SQL type.
152 """
154 length: Optional[int] = None
155 """Length of the type in the database, for variable-length types."""
157 nbytes: Optional[int] = None
158 """Natural length used for hash and encoded-region columns, to be converted
159 into the post-encoding length.
160 """
162 primaryKey: bool = False
163 """Whether this field is (part of) its table's primary key."""
165 autoincrement: bool = False
166 """Whether the database should insert automatically incremented values when
167 no value is provided in an INSERT.
168 """
170 nullable: bool = True
171 """Whether this field is allowed to be NULL."""
173 doc: Optional[str] = None
174 """Documentation for this field."""
176 def __eq__(self, other):
177 return self.name == other.name
179 def __hash__(self):
180 return hash(self.name)
182 @classmethod
183 @SchemaValidationError.translate(KeyError, "Missing key {err} in column config '{config}'.")
184 def fromConfig(cls, config: Config, **kwds) -> FieldSpec:
185 """Create a `FieldSpec` from a subset of a `SchemaConfig`.
187 Parameters
188 ----------
189 config: `Config`
190 Configuration describing the column. Nested configuration keys
191 correspond to `FieldSpec` attributes.
192 kwds
193 Additional keyword arguments that provide defaults for values
194 not present in config.
196 Returns
197 -------
198 spec: `FieldSpec`
199 Specification structure for the column.
201 Raises
202 ------
203 SchemaValidationError
204 Raised if configuration keys are missing or have invalid values.
205 """
206 dtype = VALID_CONFIG_COLUMN_TYPES.get(config["type"])
207 if dtype is None:
208 raise SchemaValidationError(f"Invalid field type string: '{config['type']}'.")
209 if not config["name"].islower():
210 raise SchemaValidationError(f"Column name '{config['name']}' is not all lowercase.")
211 self = cls(name=config["name"], dtype=dtype, **kwds)
212 self.length = config.get("length", self.length)
213 self.nbytes = config.get("nbytes", self.nbytes)
214 if self.length is not None and self.nbytes is not None:
215 raise SchemaValidationError(f"Both length and nbytes provided for field '{self.name}'.")
216 self.primaryKey = config.get("primaryKey", self.primaryKey)
217 self.autoincrement = config.get("autoincrement", self.autoincrement)
218 self.nullable = config.get("nullable", False if self.primaryKey else self.nullable)
219 self.doc = stripIfNotNone(config.get("doc", None))
220 return self
222 def getSizedColumnType(self) -> sqlalchemy.types.TypeEngine:
223 """Return a sized version of the column type, utilizing either (or
224 neither) of ``self.length`` and ``self.nbytes``.
226 Returns
227 -------
228 dtype : `sqlalchemy.types.TypeEngine`
229 A SQLAlchemy column type object.
230 """
231 if self.length is not None:
232 return self.dtype(length=self.length)
233 if self.nbytes is not None:
234 return self.dtype(nbytes=self.nbytes)
235 return self.dtype
238@dataclass
239class ForeignKeySpec:
240 """A struct-like class used to define a foreign key constraint in a logical
241 `Registry` table.
242 """
244 table: str
245 """Name of the target table."""
247 source: Tuple[str, ...]
248 """Tuple of source table column names."""
250 target: Tuple[str, ...]
251 """Tuple of target table column names."""
253 onDelete: Optional[str] = None
254 """SQL clause indicating how to handle deletes to the target table.
256 If not `None` (which indicates that a constraint violation exception should
257 be raised), should be either "SET NULL" or "CASCADE".
258 """
260 @classmethod
261 @SchemaValidationError.translate(KeyError, "Missing key {err} in foreignKey config '{config}'.")
262 def fromConfig(cls, config: Config) -> ForeignKeySpec:
263 """Create a `ForeignKeySpec` from a subset of a `SchemaConfig`.
265 Parameters
266 ----------
267 config: `Config`
268 Configuration describing the constraint. Nested configuration keys
269 correspond to `ForeignKeySpec` attributes.
271 Returns
272 -------
273 spec: `ForeignKeySpec`
274 Specification structure for the constraint.
276 Raises
277 ------
278 SchemaValidationError
279 Raised if configuration keys are missing or have invalid values.
280 """
281 return cls(table=config["table"],
282 source=tuple(iterable(config["source"])),
283 target=tuple(iterable(config["target"])),
284 onDelete=config.get("onDelete", None))
287@dataclass
288class TableSpec:
289 """A struct-like class used to define a table or table-like
290 query interface.
291 """
293 fields: NamedValueSet[FieldSpec]
294 """Specifications for the columns in this table."""
296 unique: Set[Tuple[str, ...]] = frozenset()
297 """Non-primary-key unique constraints for the table."""
299 indexes: Set[Tuple[str, ...]] = frozenset()
300 """Indexes for the table."""
302 foreignKeys: Sequence[ForeignKeySpec] = tuple()
303 """Foreign key constraints for the table."""
305 doc: Optional[str] = None
306 """Documentation for the table."""
308 def __post_init__(self):
309 self.fields = NamedValueSet(self.fields)
310 self.unique = set(self.unique)
311 self.indexes = set(self.indexes)
312 self.foreignKeys = list(self.foreignKeys)
314 @classmethod
315 @SchemaValidationError.translate(KeyError, "Missing key {err} in table config '{config}'.")
316 def fromConfig(cls, config: Config) -> TableSpec:
317 """Create a `ForeignKeySpec` from a subset of a `SchemaConfig`.
319 Parameters
320 ----------
321 config: `Config`
322 Configuration describing the constraint. Nested configuration keys
323 correspond to `TableSpec` attributes.
325 Returns
326 -------
327 spec: `TableSpec`
328 Specification structure for the table.
330 Raises
331 ------
332 SchemaValidationError
333 Raised if configuration keys are missing or have invalid values.
334 """
335 return cls(
336 fields=NamedValueSet(FieldSpec.fromConfig(c) for c in config["columns"]),
337 unique={tuple(u) for u in config.get("unique", ())},
338 foreignKeys=[ForeignKeySpec.fromConfig(c) for c in config.get("foreignKeys", ())],
339 sql=config.get("sql"),
340 doc=stripIfNotNone(config.get("doc")),
341 )