Coverage for python/lsst/daf/butler/core/_topology.py : 61%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = (
25 "SpatialRegionDatabaseRepresentation",
26 "TopologicalSpace",
27 "TopologicalFamily",
28 "TopologicalRelationshipEndpoint",
29 "TopologicalExtentDatabaseRepresentation",
30)
32from abc import ABC, abstractmethod
33import enum
34from typing import (
35 Any,
36 ClassVar,
37 Dict,
38 Generic,
39 Iterator,
40 Mapping,
41 Optional,
42 Tuple,
43 Type,
44 TypeVar,
45)
47import sqlalchemy
49from lsst.utils.classes import immutable
50import lsst.sphgeom
51from . import ddl
52from .named import NamedValueAbstractSet
55@enum.unique
56class TopologicalSpace(enum.Enum):
57 """Enumeration of continuous-variable relationships for dimensions.
59 Most dimension relationships are discrete, in that they are regular foreign
60 key relationships between tables. Those connected to a
61 `TopologicalSpace` are not - a row in a table instead occupies some
62 region in a continuous-variable space, and topological operators like
63 "overlaps" between regions in that space define the relationships between
64 rows.
65 """
67 SPATIAL = enum.auto()
68 """The (spherical) sky, using `lsst.sphgeom.Region` objects to represent
69 those regions in memory.
70 """
72 TEMPORAL = enum.auto()
73 """Time, using `Timespan` instances (with TAI endpoints) to represent
74 intervals in memory.
75 """
78@immutable
79class TopologicalFamily(ABC):
80 """A grouping of `TopologicalRelationshipEndpoint` objects.
82 These regions form a hierarchy in which one endpoint's rows always contain
83 another's in a predefined way.
85 This hierarchy means that endpoints in the same family do not generally
86 have to be have to be related using (e.g.) overlaps; instead, the regions
87 from one "best" endpoint from each family are related to the best endpoint
88 from each other family in a query.
90 Parameters
91 ----------
92 name : `str`
93 Unique string identifier for this family.
94 category : `TopologicalSpace`
95 Space in which the regions of this family live.
96 """
98 def __init__(
99 self,
100 name: str,
101 space: TopologicalSpace,
102 ):
103 self.name = name
104 self.space = space
106 def __eq__(self, other: Any) -> bool:
107 if isinstance(other, TopologicalFamily):
108 return self.space == other.space and self.name == other.name
109 return False
111 def __hash__(self) -> int:
112 return hash(self.name)
114 def __contains__(self, other: TopologicalRelationshipEndpoint) -> bool:
115 return other.topology.get(self.space) == self
117 @abstractmethod
118 def choose(self, endpoints: NamedValueAbstractSet[TopologicalRelationshipEndpoint]
119 ) -> TopologicalRelationshipEndpoint:
120 """Select the best member of this family to use.
122 These are to be used in a query join or data ID when more than one
123 is present.
125 Usually this should correspond to the most fine-grained region.
127 Parameters
128 ----------
129 endpoints : `NamedValueAbstractSet` [`TopologicalRelationshipEndpoint`]
130 Endpoints to choose from. May include endpoints that are not
131 members of this family (which should be ignored).
133 Returns
134 -------
135 best : `TopologicalRelationshipEndpoint`
136 The best endpoint that is both a member of ``self`` and in
137 ``endpoints``.
138 """
139 raise NotImplementedError()
141 name: str
142 """Unique string identifier for this family (`str`).
143 """
145 space: TopologicalSpace
146 """Space in which the regions of this family live (`TopologicalSpace`).
147 """
150@immutable
151class TopologicalRelationshipEndpoint(ABC):
152 """Representation of a logical table that can participate in overlap joins.
154 An abstract base class whose instances represent a logical table that
155 may participate in overlap joins defined by a `TopologicalSpace`.
156 """
158 @property
159 @abstractmethod
160 def name(self) -> str:
161 """Return unique string identifier for this endpoint (`str`)."""
162 raise NotImplementedError()
164 @property
165 @abstractmethod
166 def topology(self) -> Mapping[TopologicalSpace, TopologicalFamily]:
167 """Return the relationship families to which this endpoint belongs.
169 It is keyed by the category for that family.
170 """
171 raise NotImplementedError()
173 @property
174 def spatial(self) -> Optional[TopologicalFamily]:
175 """Return this endpoint's `~TopologicalSpace.SPATIAL` family."""
176 return self.topology.get(TopologicalSpace.SPATIAL)
178 @property
179 def temporal(self) -> Optional[TopologicalFamily]:
180 """Return this endpoint's `~TopologicalSpace.TEMPORAL` family."""
181 return self.topology.get(TopologicalSpace.TEMPORAL)
184_S = TypeVar("_S", bound="TopologicalExtentDatabaseRepresentation")
185_R = TypeVar("_R")
188class TopologicalExtentDatabaseRepresentation(Generic[_R]):
189 """Mapping of in-memory representation of a region to DB representation.
191 An abstract base class whose subclasses provide a mapping from the
192 in-memory representation of a `TopologicalSpace` region to a
193 database-storage representation, and whose instances act like a
194 SQLAlchemy-based column expression.
195 """
197 NAME: ClassVar[str]
198 """Name to use for this logical column in tables (`str`).
200 If the representation actually uses multiple columns, this will just be
201 part of the names of those columns. Queries (and tables that represent
202 materialized queries) may use a different name (via the ``name`` parameters
203 to various methods) in order to disambiguate between the regions associated
204 with different tables.
205 """
207 SPACE: ClassVar[TopologicalSpace]
208 """Topological space where regions represented by this class exist.
209 """
211 @classmethod
212 @abstractmethod
213 def makeFieldSpecs(cls, nullable: bool, name: Optional[str] = None, **kwargs: Any
214 ) -> Tuple[ddl.FieldSpec, ...]:
215 """Make objects that relfect the fields that must be added to table.
217 Makes one or more `ddl.FieldSpec` objects that reflect the fields
218 that must be added to a table for this representation.
220 Parameters
221 ----------
222 nullable : `bool`
223 If `True`, the region is permitted to be logically ``NULL``
224 (mapped to `None` in Python), though the correspoding value(s) in
225 the database are implementation-defined. Nullable region fields
226 default to NULL, while others default to (-∞, ∞).
227 name : `str`, optional
228 Name for the logical column; a part of the name for multi-column
229 representations. Defaults to ``cls.NAME``.
230 **kwargs
231 Keyword arguments are forwarded to the `ddl.FieldSpec` constructor
232 for all fields; implementations only provide the ``name``,
233 ``dtype``, and ``default`` arguments themselves.
235 Returns
236 -------
237 specs : `tuple` [ `ddl.FieldSpec` ]
238 Field specification objects; length of the tuple is
239 subclass-dependent, but is guaranteed to match the length of the
240 return values of `getFieldNames` and `update`.
241 """
242 raise NotImplementedError()
244 @classmethod
245 @abstractmethod
246 def getFieldNames(cls, name: Optional[str] = None) -> Tuple[str, ...]:
247 """Return the actual field names used by this representation.
249 Parameters
250 ----------
251 name : `str`, optional
252 Name for the logical column; a part of the name for multi-column
253 representations. Defaults to ``cls.NAME``.
255 Returns
256 -------
257 names : `tuple` [ `str` ]
258 Field name(s). Guaranteed to be the same as the names of the field
259 specifications returned by `makeFieldSpecs`.
260 """
261 raise NotImplementedError()
263 @classmethod
264 @abstractmethod
265 def update(cls, extent: Optional[_R], name: Optional[str] = None,
266 result: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
267 """Add a region to a dictionary.
269 This region represents a database row in this representation.
271 Parameters
272 ----------
273 extent
274 An instance of the region type this class provides a database
275 representation for, or `None` for ``NULL``.
276 name : `str`, optional
277 Name for the logical column; a part of the name for multi-column
278 representations. Defaults to ``cls.NAME``.
279 result : `dict` [ `str`, `Any` ], optional
280 A dictionary representing a database row that fields should be
281 added to, or `None` to create and return a new one.
283 Returns
284 -------
285 result : `dict` [ `str`, `Any` ]
286 A dictionary containing this representation of a region. Exactly
287 the `dict` passed as ``result`` if that is not `None`.
288 """
289 raise NotImplementedError()
291 @classmethod
292 @abstractmethod
293 def extract(cls, mapping: Mapping[str, Any], name: Optional[str] = None) -> Optional[_R]:
294 """Extract a region from a dictionary.
296 This region represents a database row in this representation.
298 Parameters
299 ----------
300 mapping : `Mapping` [ `str`, `Any` ]
301 A dictionary representing a database row containing a `Timespan`
302 in this representation. Should have key(s) equal to the return
303 value of `getFieldNames`.
304 name : `str`, optional
305 Name for the logical column; a part of the name for multi-column
306 representations. Defaults to ``cls.NAME``.
308 Returns
309 -------
310 region
311 Python representation of the region.
312 """
313 raise NotImplementedError()
315 @classmethod
316 def hasExclusionConstraint(cls) -> bool:
317 """Return `True` if this representation supports exclusion constraints.
319 Returns
320 -------
321 supported : `bool`
322 If `True`, defining a constraint via `ddl.TableSpec.exclusion` that
323 includes the fields of this representation is allowed.
324 """
325 return False
327 @classmethod
328 @abstractmethod
329 def fromSelectable(cls: Type[_S], selectable: sqlalchemy.sql.FromClause,
330 name: Optional[str] = None) -> _S:
331 """Construct representation of a column in the table or subquery.
333 Constructs an instance that represents a logical column (which may
334 actually be backed by multiple columns) in the given table or subquery.
336 Parameters
337 ----------
338 selectable : `sqlalchemy.sql.FromClause`
339 SQLAlchemy object representing a table or subquery.
340 name : `str`, optional
341 Name for the logical column; a part of the name for multi-column
342 representations. Defaults to ``cls.NAME``.
344 Returns
345 -------
346 representation : `TopologicalExtentDatabaseRepresentation`
347 Object representing a logical column.
348 """
349 raise NotImplementedError()
351 @property
352 @abstractmethod
353 def name(self) -> str:
354 """Return base logical name for the topological extent (`str`).
356 If the representation uses only one actual column, this should be the
357 full name of the column. In other cases it is an unspecified subset of
358 the column names.
359 """
360 raise NotImplementedError()
362 @abstractmethod
363 def isNull(self) -> sqlalchemy.sql.ColumnElement:
364 """Return expression that tests where region is ``NULL``.
366 Returns a SQLAlchemy expression that tests whether this region is
367 logically ``NULL``.
369 Returns
370 -------
371 isnull : `sqlalchemy.sql.ColumnElement`
372 A boolean SQLAlchemy expression object.
373 """
374 raise NotImplementedError()
376 @abstractmethod
377 def flatten(self, name: Optional[str]) -> Iterator[sqlalchemy.sql.ColumnElement]:
378 """Return the actual column(s) that comprise this logical column.
380 Parameters
381 ----------
382 name : `str`, optional
383 If provided, a name for the logical column that should be used to
384 label the columns. If not provided, the columns' native names will
385 be used.
387 Returns
388 -------
389 columns : `Iterator` [ `sqlalchemy.sql.ColumnElement` ]
390 The true column or columns that back this object.
391 """
392 raise NotImplementedError()
395class SpatialRegionDatabaseRepresentation(TopologicalExtentDatabaseRepresentation[lsst.sphgeom.Region]):
396 """Class reflecting how spatial regions are represented inside the DB.
398 An instance of this class encapsulates how spatial regions on the sky are
399 represented in a database engine.
401 Instances should be constructed via `fromSelectable`, not by calling the
402 constructor directly.
404 Parameters
405 ----------
406 column : `sqlalchemy.sql.ColumnElement`
407 Column containing the opaque byte-string, with automatic conversion to
408 `lsst.sphgeom.Region` implemented via SQLAlchemy hooks.
409 name : `str`
410 Name of the column.
412 Notes
413 -----
414 Unlike `TimespanDatabaseRepresentation`, this is a concrete class, because
415 we currently do not support any database-native spatial regions, and
416 instead rely on precomputed overlaps and opaque (to the database) byte
417 string columns. As a result, it also does not support any in-database
418 topological predicates.
420 If we add support for database-native regions in the future, this class may
421 become an ABC with multiple concrete implementations.
422 """
424 def __init__(self, column: sqlalchemy.sql.ColumnElement, name: str):
425 self.column = column
426 self._name = name
428 NAME: ClassVar[str] = "region"
429 SPACE: ClassVar[TopologicalSpace] = TopologicalSpace.SPATIAL
431 @classmethod
432 def makeFieldSpecs(cls, nullable: bool, name: Optional[str] = None, **kwargs: Any
433 ) -> Tuple[ddl.FieldSpec, ...]:
434 # Docstring inherited.
435 if name is None:
436 name = cls.NAME
437 # Most regions are small (they're quadrilaterals), but visit ones can
438 # be quite large because they have a complicated boundary. For HSC,
439 # that's about ~1400 bytes, and I've just rounded up to the nearest
440 # power of two. Given what we now know about variable-length TEXT
441 # having no performance penalties in PostgreSQL and SQLite vs.
442 # fixed-length strings, there's probably a variable-length bytes type
443 # we should be using instead, but that's a schema change and hence
444 # something we won't be doing anytime soon.
445 return (ddl.FieldSpec(name, nbytes=2048, dtype=ddl.Base64Region),)
447 @classmethod
448 def getFieldNames(cls, name: Optional[str] = None) -> Tuple[str, ...]:
449 # Docstring inherited.
450 if name is None:
451 name = cls.NAME
452 return (name,)
454 @classmethod
455 def update(cls, extent: Optional[lsst.sphgeom.Region], name: Optional[str] = None,
456 result: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
457 # Docstring inherited.
458 if name is None:
459 name = cls.NAME
460 if result is None:
461 result = {}
462 result[name] = extent
463 return result
465 @classmethod
466 def extract(cls, mapping: Mapping[str, Any], name: Optional[str] = None) -> Optional[lsst.sphgeom.Region]:
467 # Docstring inherited.
468 if name is None:
469 name = cls.NAME
470 return mapping[name]
472 @classmethod
473 def fromSelectable(cls: Type[SpatialRegionDatabaseRepresentation], selectable: sqlalchemy.sql.FromClause,
474 name: Optional[str] = None) -> SpatialRegionDatabaseRepresentation:
475 # Docstring inherited
476 if name is None:
477 name = cls.NAME
478 return cls(selectable.columns[name], name)
480 @property
481 def name(self) -> str:
482 # Docstring inherited
483 return self._name
485 def isNull(self) -> sqlalchemy.sql.ColumnElement:
486 # Docstring inherited
487 return self.column.is_(None)
489 def flatten(self, name: Optional[str]) -> Iterator[sqlalchemy.sql.ColumnElement]:
490 # Docstring inherited
491 yield self.column