Coverage for python/lsst/daf/butler/core/_topology.py : 58%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = (
25 "SpatialRegionDatabaseRepresentation",
26 "TopologicalSpace",
27 "TopologicalFamily",
28 "TopologicalRelationshipEndpoint",
29 "TopologicalExtentDatabaseRepresentation",
30)
32from abc import ABC, abstractmethod
33import enum
34from typing import (
35 Any,
36 ClassVar,
37 Dict,
38 Generic,
39 Iterator,
40 Mapping,
41 Optional,
42 Tuple,
43 Type,
44 TypeVar,
45)
47import sqlalchemy
49import lsst.sphgeom
50from . import ddl
51from .named import NamedValueAbstractSet
52from .utils import immutable
55@enum.unique
56class TopologicalSpace(enum.Enum):
57 """Enumeration of the different categories of continuous-variable
58 relationships supported by the dimensions system.
60 Most dimension relationships are discrete, in that they are regular foreign
61 key relationships between tables. Those connected to a
62 `TopologicalSpace` are not - a row in a table instead occupies some
63 region in a continuous-variable space, and topological operators like
64 "overlaps" between regions in that space define the relationships between
65 rows.
66 """
68 SPATIAL = enum.auto()
69 """The (spherical) sky, using `lsst.sphgeom.Region` objects to represent
70 those regions in memory.
71 """
73 TEMPORAL = enum.auto()
74 """Time, using `Timespan` instances (with TAI endpoints) to represent
75 intervals in memory.
76 """
79@immutable
80class TopologicalFamily(ABC):
81 """A grouping of `TopologicalRelationshipEndpoint` objects whose regions
82 form a hierarchy in which one endpoint's rows always contain another's in a
83 predefined way.
85 This hierarchy means that endpoints in the same family do not generally
86 have to be have to be related using (e.g.) overlaps; instead, the regions
87 from one "best" endpoint from each family are related to the best endpoint
88 from each other family in a query.
90 Parameters
91 ----------
92 name : `str`
93 Unique string identifier for this family.
94 category : `TopologicalSpace`
95 Space in which the regions of this family live.
96 """
97 def __init__(
98 self,
99 name: str,
100 space: TopologicalSpace,
101 ):
102 self.name = name
103 self.space = space
105 def __eq__(self, other: Any) -> bool:
106 if isinstance(other, TopologicalFamily):
107 return self.space == other.space and self.name == other.name
108 return False
110 def __hash__(self) -> int:
111 return hash(self.name)
113 def __contains__(self, other: TopologicalRelationshipEndpoint) -> bool:
114 return other.topology.get(self.space) == self
116 @abstractmethod
117 def choose(self, endpoints: NamedValueAbstractSet[TopologicalRelationshipEndpoint]
118 ) -> TopologicalRelationshipEndpoint:
119 """Select the best member of this family to use in a query join or
120 data ID when more than one is present.
122 Usually this should correspond to the most fine-grained region.
124 Parameters
125 ----------
126 endpoints : `NamedValueAbstractSet` [`TopologicalRelationshipEndpoint`]
127 Endpoints to choose from. May include endpoints that are not
128 members of this family (which should be ignored).
130 Returns
131 -------
132 best : `TopologicalRelationshipEndpoint`
133 The best endpoint that is both a member of ``self`` and in
134 ``endpoints``.
135 """
136 raise NotImplementedError()
138 name: str
139 """Unique string identifier for this family (`str`).
140 """
142 space: TopologicalSpace
143 """Space in which the regions of this family live (`TopologicalSpace`).
144 """
147@immutable
148class TopologicalRelationshipEndpoint(ABC):
149 """An abstract base class whose instances represent a logical table that
150 may participate in overlap joins defined by a `TopologicalSpace`.
151 """
153 @property
154 @abstractmethod
155 def name(self) -> str:
156 """Unique string identifier for this endpoint (`str`).
157 """
158 raise NotImplementedError()
160 @property
161 @abstractmethod
162 def topology(self) -> Mapping[TopologicalSpace, TopologicalFamily]:
163 """The relationship families to which this endpoint belongs, keyed
164 by the category for that family.
165 """
166 raise NotImplementedError()
168 @property
169 def spatial(self) -> Optional[TopologicalFamily]:
170 """This endpoint's `~TopologicalSpace.SPATIAL` family.
171 """
172 return self.topology.get(TopologicalSpace.SPATIAL)
174 @property
175 def temporal(self) -> Optional[TopologicalFamily]:
176 """This endpoint's `~TopologicalSpace.TEMPORAL` family.
177 """
178 return self.topology.get(TopologicalSpace.TEMPORAL)
181_S = TypeVar("_S", bound="TopologicalExtentDatabaseRepresentation")
182_R = TypeVar("_R")
185class TopologicalExtentDatabaseRepresentation(Generic[_R]):
186 """An abstract base class whose subclasses provide a mapping from the
187 in-memory representation of a `TopologicalSpace` region to a
188 database-storage representation, and whose instances act like a
189 SQLAlchemy-based column expression.
190 """
192 NAME: ClassVar[str]
193 """Name to use for this logical column in tables (`str`).
195 If the representation actually uses multiple columns, this will just be
196 part of the names of those columns. Queries (and tables that represent
197 materialized queries) may use a different name (via the ``name`` parameters
198 to various methods) in order to disambiguate between the regions associated
199 with different tables.
200 """
202 SPACE: ClassVar[TopologicalSpace]
203 """Topological space in which the regions represented by this class exist.
204 """
206 @classmethod
207 @abstractmethod
208 def makeFieldSpecs(cls, nullable: bool, name: Optional[str] = None, **kwargs: Any
209 ) -> Tuple[ddl.FieldSpec, ...]:
210 """Make one or more `ddl.FieldSpec` objects that reflect the fields
211 that must be added to a table for this representation.
213 Parameters
214 ----------
215 nullable : `bool`
216 If `True`, the region is permitted to be logically ``NULL``
217 (mapped to `None` in Python), though the correspoding value(s) in
218 the database are implementation-defined. Nullable region fields
219 default to NULL, while others default to (-∞, ∞).
220 name : `str`, optional
221 Name for the logical column; a part of the name for multi-column
222 representations. Defaults to ``cls.NAME``.
223 **kwargs
224 Keyword arguments are forwarded to the `ddl.FieldSpec` constructor
225 for all fields; implementations only provide the ``name``,
226 ``dtype``, and ``default`` arguments themselves.
228 Returns
229 -------
230 specs : `tuple` [ `ddl.FieldSpec` ]
231 Field specification objects; length of the tuple is
232 subclass-dependent, but is guaranteed to match the length of the
233 return values of `getFieldNames` and `update`.
234 """
235 raise NotImplementedError()
237 @classmethod
238 @abstractmethod
239 def getFieldNames(cls, name: Optional[str] = None) -> Tuple[str, ...]:
240 """Return the actual field names used by this representation.
242 Parameters
243 ----------
244 name : `str`, optional
245 Name for the logical column; a part of the name for multi-column
246 representations. Defaults to ``cls.NAME``.
248 Returns
249 -------
250 names : `tuple` [ `str` ]
251 Field name(s). Guaranteed to be the same as the names of the field
252 specifications returned by `makeFieldSpecs`.
253 """
254 raise NotImplementedError()
256 @classmethod
257 @abstractmethod
258 def update(cls, extent: Optional[_R], name: Optional[str] = None,
259 result: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
260 """Add a region to a dictionary that represents a database row
261 in this representation.
263 Parameters
264 ----------
265 extent
266 An instance of the region type this class provides a database
267 representation for, or `None` for ``NULL``.
268 name : `str`, optional
269 Name for the logical column; a part of the name for multi-column
270 representations. Defaults to ``cls.NAME``.
271 result : `dict` [ `str`, `Any` ], optional
272 A dictionary representing a database row that fields should be
273 added to, or `None` to create and return a new one.
275 Returns
276 -------
277 result : `dict` [ `str`, `Any` ]
278 A dictionary containing this representation of a region. Exactly
279 the `dict` passed as ``result`` if that is not `None`.
280 """
281 raise NotImplementedError()
283 @classmethod
284 @abstractmethod
285 def extract(cls, mapping: Mapping[str, Any], name: Optional[str] = None) -> Optional[_R]:
286 """Extract a region from a dictionary that represents a
287 database row in this representation.
289 Parameters
290 ----------
291 mapping : `Mapping` [ `str`, `Any` ]
292 A dictionary representing a database row containing a `Timespan`
293 in this representation. Should have key(s) equal to the return
294 value of `getFieldNames`.
295 name : `str`, optional
296 Name for the logical column; a part of the name for multi-column
297 representations. Defaults to ``cls.NAME``.
299 Returns
300 -------
301 region
302 Python representation of the region.
303 """
304 raise NotImplementedError()
306 @classmethod
307 def hasExclusionConstraint(cls) -> bool:
308 """Return `True` if this representation supports exclusion constraints.
310 Returns
311 -------
312 supported : `bool`
313 If `True`, defining a constraint via `ddl.TableSpec.exclusion` that
314 includes the fields of this representation is allowed.
315 """
316 return False
318 @classmethod
319 @abstractmethod
320 def fromSelectable(cls: Type[_S], selectable: sqlalchemy.sql.FromClause,
321 name: Optional[str] = None) -> _S:
322 """Construct an instance that represents a logical column (which may
323 actually be backed by multiple columns) in the given table or subquery.
325 Parameters
326 ----------
327 selectable : `sqlalchemy.sql.FromClause`
328 SQLAlchemy object representing a table or subquery.
329 name : `str`, optional
330 Name for the logical column; a part of the name for multi-column
331 representations. Defaults to ``cls.NAME``.
333 Returns
334 -------
335 representation : `TopologicalExtentDatabaseRepresentation`
336 Object representing a logical column.
337 """
338 raise NotImplementedError()
340 @property
341 @abstractmethod
342 def name(self) -> str:
343 """Base logical name for the topological extent (`str`).
345 If the representation uses only one actual column, this should be the
346 full name of the column. In other cases it is an unspecified subset of
347 the column names.
348 """
349 raise NotImplementedError()
351 @abstractmethod
352 def isNull(self) -> sqlalchemy.sql.ColumnElement:
353 """Return a SQLAlchemy expression that tests whether this region is
354 logically ``NULL``.
356 Returns
357 -------
358 isnull : `sqlalchemy.sql.ColumnElement`
359 A boolean SQLAlchemy expression object.
360 """
361 raise NotImplementedError()
363 @abstractmethod
364 def flatten(self, name: Optional[str]) -> Iterator[sqlalchemy.sql.ColumnElement]:
365 """Return the actual column or columns that comprise this logical
366 column.
368 Parameters
369 ----------
370 name : `str`, optional
371 If provided, a name for the logical column that should be used to
372 label the columns. If not provided, the columns' native names will
373 be used.
375 Returns
376 -------
377 columns : `Iterator` [ `sqlalchemy.sql.ColumnElement` ]
378 The true column or columns that back this object.
379 """
380 raise NotImplementedError()
383class SpatialRegionDatabaseRepresentation(TopologicalExtentDatabaseRepresentation[lsst.sphgeom.Region]):
384 """An object that encapsulates how spatial regions on the sky are
385 represented in a database engine.
387 Instances should be constructed via `fromSelectable`, not by calling the
388 constructor directly.
390 Parameters
391 ----------
392 column : `sqlalchemy.sql.ColumnElement`
393 Column containing the opaque byte-string, with automatic conversion to
394 `lsst.sphgeom.Region` implemented via SQLAlchemy hooks.
395 name : `str`
396 Name of the column.
398 Notes
399 -----
400 Unlike `TimespanDatabaseRepresentation`, this is a concrete class, because
401 we currently do not support any database-native spatial regions, and
402 instead rely on precomputed overlaps and opaque (to the database) byte
403 string columns. As a result, it also does not support any in-database
404 topological predicates.
406 If we add support for database-native regions in the future, this class may
407 become an ABC with multiple concrete implementations.
408 """
409 def __init__(self, column: sqlalchemy.sql.ColumnElement, name: str):
410 self.column = column
411 self._name = name
413 NAME: ClassVar[str] = "region"
414 SPACE: ClassVar[TopologicalSpace] = TopologicalSpace.SPATIAL
416 @classmethod
417 def makeFieldSpecs(cls, nullable: bool, name: Optional[str] = None, **kwargs: Any
418 ) -> Tuple[ddl.FieldSpec, ...]:
419 # Docstring inherited.
420 if name is None:
421 name = cls.NAME
422 # Most regions are small (they're quadrilaterals), but visit ones can
423 # be quite large because they have a complicated boundary. For HSC,
424 # that's about ~1400 bytes, and I've just rounded up to the nearest
425 # power of two. Given what we now know about variable-length TEXT
426 # having no performance penalties in PostgreSQL and SQLite vs.
427 # fixed-length strings, there's probably a variable-length bytes type
428 # we should be using instead, but that's a schema change and hence
429 # something we won't be doing anytime soon.
430 return (ddl.FieldSpec(name, nbytes=2048, dtype=ddl.Base64Region),)
432 @classmethod
433 def getFieldNames(cls, name: Optional[str] = None) -> Tuple[str, ...]:
434 # Docstring inherited.
435 if name is None:
436 name = cls.NAME
437 return (name,)
439 @classmethod
440 def update(cls, extent: Optional[lsst.sphgeom.Region], name: Optional[str] = None,
441 result: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
442 # Docstring inherited.
443 if name is None:
444 name = cls.NAME
445 if result is None:
446 result = {}
447 result[name] = extent
448 return result
450 @classmethod
451 def extract(cls, mapping: Mapping[str, Any], name: Optional[str] = None) -> Optional[lsst.sphgeom.Region]:
452 # Docstring inherited.
453 if name is None:
454 name = cls.NAME
455 return mapping[name]
457 @classmethod
458 def fromSelectable(cls: Type[SpatialRegionDatabaseRepresentation], selectable: sqlalchemy.sql.FromClause,
459 name: Optional[str] = None) -> SpatialRegionDatabaseRepresentation:
460 # Docstring inherited
461 if name is None:
462 name = cls.NAME
463 return cls(selectable.columns[name], name)
465 @property
466 def name(self) -> str:
467 # Docstring inherited
468 return self._name
470 def isNull(self) -> sqlalchemy.sql.ColumnElement:
471 # Docstring inherited
472 return self.column.is_(None)
474 def flatten(self, name: Optional[str]) -> Iterator[sqlalchemy.sql.ColumnElement]:
475 # Docstring inherited
476 yield self.column