Coverage for python/lsst/daf/butler/core/_topology.py: 63%
133 statements
« prev ^ index » next coverage.py v6.4.4, created at 2022-08-31 10:07 +0000
« prev ^ index » next coverage.py v6.4.4, created at 2022-08-31 10:07 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = (
25 "SpatialRegionDatabaseRepresentation",
26 "TopologicalSpace",
27 "TopologicalFamily",
28 "TopologicalRelationshipEndpoint",
29 "TopologicalExtentDatabaseRepresentation",
30)
32import enum
33from abc import ABC, abstractmethod
34from typing import Any, ClassVar, Dict, Generic, Iterator, Mapping, Optional, Tuple, Type, TypeVar
36import lsst.sphgeom
37import sqlalchemy
38from lsst.utils.classes import immutable
40from . import ddl
41from .named import NamedValueAbstractSet
44@enum.unique
45class TopologicalSpace(enum.Enum):
46 """Enumeration of continuous-variable relationships for dimensions.
48 Most dimension relationships are discrete, in that they are regular foreign
49 key relationships between tables. Those connected to a
50 `TopologicalSpace` are not - a row in a table instead occupies some
51 region in a continuous-variable space, and topological operators like
52 "overlaps" between regions in that space define the relationships between
53 rows.
54 """
56 SPATIAL = enum.auto()
57 """The (spherical) sky, using `lsst.sphgeom.Region` objects to represent
58 those regions in memory.
59 """
61 TEMPORAL = enum.auto()
62 """Time, using `Timespan` instances (with TAI endpoints) to represent
63 intervals in memory.
64 """
67@immutable
68class TopologicalFamily(ABC):
69 """A grouping of `TopologicalRelationshipEndpoint` objects.
71 These regions form a hierarchy in which one endpoint's rows always contain
72 another's in a predefined way.
74 This hierarchy means that endpoints in the same family do not generally
75 have to be have to be related using (e.g.) overlaps; instead, the regions
76 from one "best" endpoint from each family are related to the best endpoint
77 from each other family in a query.
79 Parameters
80 ----------
81 name : `str`
82 Unique string identifier for this family.
83 category : `TopologicalSpace`
84 Space in which the regions of this family live.
85 """
87 def __init__(
88 self,
89 name: str,
90 space: TopologicalSpace,
91 ):
92 self.name = name
93 self.space = space
95 def __eq__(self, other: Any) -> bool:
96 if isinstance(other, TopologicalFamily):
97 return self.space == other.space and self.name == other.name
98 return False
100 def __hash__(self) -> int:
101 return hash(self.name)
103 def __contains__(self, other: TopologicalRelationshipEndpoint) -> bool:
104 return other.topology.get(self.space) == self
106 @abstractmethod
107 def choose(
108 self, endpoints: NamedValueAbstractSet[TopologicalRelationshipEndpoint]
109 ) -> TopologicalRelationshipEndpoint:
110 """Select the best member of this family to use.
112 These are to be used in a query join or data ID when more than one
113 is present.
115 Usually this should correspond to the most fine-grained region.
117 Parameters
118 ----------
119 endpoints : `NamedValueAbstractSet` [`TopologicalRelationshipEndpoint`]
120 Endpoints to choose from. May include endpoints that are not
121 members of this family (which should be ignored).
123 Returns
124 -------
125 best : `TopologicalRelationshipEndpoint`
126 The best endpoint that is both a member of ``self`` and in
127 ``endpoints``.
128 """
129 raise NotImplementedError()
131 name: str
132 """Unique string identifier for this family (`str`).
133 """
135 space: TopologicalSpace
136 """Space in which the regions of this family live (`TopologicalSpace`).
137 """
140@immutable
141class TopologicalRelationshipEndpoint(ABC):
142 """Representation of a logical table that can participate in overlap joins.
144 An abstract base class whose instances represent a logical table that
145 may participate in overlap joins defined by a `TopologicalSpace`.
146 """
148 @property
149 @abstractmethod
150 def name(self) -> str:
151 """Return unique string identifier for this endpoint (`str`)."""
152 raise NotImplementedError()
154 @property
155 @abstractmethod
156 def topology(self) -> Mapping[TopologicalSpace, TopologicalFamily]:
157 """Return the relationship families to which this endpoint belongs.
159 It is keyed by the category for that family.
160 """
161 raise NotImplementedError()
163 @property
164 def spatial(self) -> Optional[TopologicalFamily]:
165 """Return this endpoint's `~TopologicalSpace.SPATIAL` family."""
166 return self.topology.get(TopologicalSpace.SPATIAL)
168 @property
169 def temporal(self) -> Optional[TopologicalFamily]:
170 """Return this endpoint's `~TopologicalSpace.TEMPORAL` family."""
171 return self.topology.get(TopologicalSpace.TEMPORAL)
174_S = TypeVar("_S", bound="TopologicalExtentDatabaseRepresentation")
175_R = TypeVar("_R")
178class TopologicalExtentDatabaseRepresentation(Generic[_R]):
179 """Mapping of in-memory representation of a region to DB representation.
181 An abstract base class whose subclasses provide a mapping from the
182 in-memory representation of a `TopologicalSpace` region to a
183 database-storage representation, and whose instances act like a
184 SQLAlchemy-based column expression.
185 """
187 NAME: ClassVar[str]
188 """Name to use for this logical column in tables (`str`).
190 If the representation actually uses multiple columns, this will just be
191 part of the names of those columns. Queries (and tables that represent
192 materialized queries) may use a different name (via the ``name`` parameters
193 to various methods) in order to disambiguate between the regions associated
194 with different tables.
195 """
197 SPACE: ClassVar[TopologicalSpace]
198 """Topological space where regions represented by this class exist.
199 """
201 @classmethod
202 @abstractmethod
203 def makeFieldSpecs(
204 cls, nullable: bool, name: Optional[str] = None, **kwargs: Any
205 ) -> Tuple[ddl.FieldSpec, ...]:
206 """Make objects that reflect the fields that must be added to table.
208 Makes one or more `ddl.FieldSpec` objects that reflect the fields
209 that must be added to a table for this representation.
211 Parameters
212 ----------
213 nullable : `bool`
214 If `True`, the region is permitted to be logically ``NULL``
215 (mapped to `None` in Python), though the correspoding value(s) in
216 the database are implementation-defined. Nullable region fields
217 default to NULL, while others default to (-∞, ∞).
218 name : `str`, optional
219 Name for the logical column; a part of the name for multi-column
220 representations. Defaults to ``cls.NAME``.
221 **kwargs
222 Keyword arguments are forwarded to the `ddl.FieldSpec` constructor
223 for all fields; implementations only provide the ``name``,
224 ``dtype``, and ``default`` arguments themselves.
226 Returns
227 -------
228 specs : `tuple` [ `ddl.FieldSpec` ]
229 Field specification objects; length of the tuple is
230 subclass-dependent, but is guaranteed to match the length of the
231 return values of `getFieldNames` and `update`.
232 """
233 raise NotImplementedError()
235 @classmethod
236 @abstractmethod
237 def getFieldNames(cls, name: Optional[str] = None) -> Tuple[str, ...]:
238 """Return the actual field names used by this representation.
240 Parameters
241 ----------
242 name : `str`, optional
243 Name for the logical column; a part of the name for multi-column
244 representations. Defaults to ``cls.NAME``.
246 Returns
247 -------
248 names : `tuple` [ `str` ]
249 Field name(s). Guaranteed to be the same as the names of the field
250 specifications returned by `makeFieldSpecs`.
251 """
252 raise NotImplementedError()
254 @classmethod
255 @abstractmethod
256 def update(
257 cls, extent: Optional[_R], name: Optional[str] = None, result: Optional[Dict[str, Any]] = None
258 ) -> Dict[str, Any]:
259 """Add a region to a dictionary.
261 This region represents a database row in this representation.
263 Parameters
264 ----------
265 extent
266 An instance of the region type this class provides a database
267 representation for, or `None` for ``NULL``.
268 name : `str`, optional
269 Name for the logical column; a part of the name for multi-column
270 representations. Defaults to ``cls.NAME``.
271 result : `dict` [ `str`, `Any` ], optional
272 A dictionary representing a database row that fields should be
273 added to, or `None` to create and return a new one.
275 Returns
276 -------
277 result : `dict` [ `str`, `Any` ]
278 A dictionary containing this representation of a region. Exactly
279 the `dict` passed as ``result`` if that is not `None`.
280 """
281 raise NotImplementedError()
283 @classmethod
284 @abstractmethod
285 def extract(cls, mapping: Mapping[str, Any], name: Optional[str] = None) -> Optional[_R]:
286 """Extract a region from a dictionary.
288 This region represents a database row in this representation.
290 Parameters
291 ----------
292 mapping : `Mapping` [ `str`, `Any` ]
293 A dictionary representing a database row containing a `Timespan`
294 in this representation. Should have key(s) equal to the return
295 value of `getFieldNames`.
296 name : `str`, optional
297 Name for the logical column; a part of the name for multi-column
298 representations. Defaults to ``cls.NAME``.
300 Returns
301 -------
302 region
303 Python representation of the region.
304 """
305 raise NotImplementedError()
307 @classmethod
308 def hasExclusionConstraint(cls) -> bool:
309 """Return `True` if this representation supports exclusion constraints.
311 Returns
312 -------
313 supported : `bool`
314 If `True`, defining a constraint via `ddl.TableSpec.exclusion` that
315 includes the fields of this representation is allowed.
316 """
317 return False
319 @classmethod
320 @abstractmethod
321 def fromSelectable(
322 cls: Type[_S], selectable: sqlalchemy.sql.FromClause, name: Optional[str] = None
323 ) -> _S:
324 """Construct representation of a column in the table or subquery.
326 Constructs an instance that represents a logical column (which may
327 actually be backed by multiple columns) in the given table or subquery.
329 Parameters
330 ----------
331 selectable : `sqlalchemy.sql.FromClause`
332 SQLAlchemy object representing a table or subquery.
333 name : `str`, optional
334 Name for the logical column; a part of the name for multi-column
335 representations. Defaults to ``cls.NAME``.
337 Returns
338 -------
339 representation : `TopologicalExtentDatabaseRepresentation`
340 Object representing a logical column.
341 """
342 raise NotImplementedError()
344 @property
345 @abstractmethod
346 def name(self) -> str:
347 """Return base logical name for the topological extent (`str`).
349 If the representation uses only one actual column, this should be the
350 full name of the column. In other cases it is an unspecified subset of
351 the column names.
352 """
353 raise NotImplementedError()
355 @abstractmethod
356 def isNull(self) -> sqlalchemy.sql.ColumnElement:
357 """Return expression that tests where region is ``NULL``.
359 Returns a SQLAlchemy expression that tests whether this region is
360 logically ``NULL``.
362 Returns
363 -------
364 isnull : `sqlalchemy.sql.ColumnElement`
365 A boolean SQLAlchemy expression object.
366 """
367 raise NotImplementedError()
369 @abstractmethod
370 def flatten(self, name: Optional[str]) -> Iterator[sqlalchemy.sql.ColumnElement]:
371 """Return the actual column(s) that comprise this logical column.
373 Parameters
374 ----------
375 name : `str`, optional
376 If provided, a name for the logical column that should be used to
377 label the columns. If not provided, the columns' native names will
378 be used.
380 Returns
381 -------
382 columns : `Iterator` [ `sqlalchemy.sql.ColumnElement` ]
383 The true column or columns that back this object.
384 """
385 raise NotImplementedError()
388class SpatialRegionDatabaseRepresentation(TopologicalExtentDatabaseRepresentation[lsst.sphgeom.Region]):
389 """Class reflecting how spatial regions are represented inside the DB.
391 An instance of this class encapsulates how spatial regions on the sky are
392 represented in a database engine.
394 Instances should be constructed via `fromSelectable`, not by calling the
395 constructor directly.
397 Parameters
398 ----------
399 column : `sqlalchemy.sql.ColumnElement`
400 Column containing the opaque byte-string, with automatic conversion to
401 `lsst.sphgeom.Region` implemented via SQLAlchemy hooks.
402 name : `str`
403 Name of the column.
405 Notes
406 -----
407 Unlike `TimespanDatabaseRepresentation`, this is a concrete class, because
408 we currently do not support any database-native spatial regions, and
409 instead rely on precomputed overlaps and opaque (to the database) byte
410 string columns. As a result, it also does not support any in-database
411 topological predicates.
413 If we add support for database-native regions in the future, this class may
414 become an ABC with multiple concrete implementations.
415 """
417 def __init__(self, column: sqlalchemy.sql.ColumnElement, name: str):
418 self.column = column
419 self._name = name
421 NAME: ClassVar[str] = "region"
422 SPACE: ClassVar[TopologicalSpace] = TopologicalSpace.SPATIAL
424 @classmethod
425 def makeFieldSpecs(
426 cls, nullable: bool, name: Optional[str] = None, **kwargs: Any
427 ) -> Tuple[ddl.FieldSpec, ...]:
428 # Docstring inherited.
429 if name is None:
430 name = cls.NAME
431 # Most regions are small (they're quadrilaterals), but visit ones can
432 # be quite large because they have a complicated boundary. For HSC,
433 # that's about ~1400 bytes, and I've just rounded up to the nearest
434 # power of two. Given what we now know about variable-length TEXT
435 # having no performance penalties in PostgreSQL and SQLite vs.
436 # fixed-length strings, there's probably a variable-length bytes type
437 # we should be using instead, but that's a schema change and hence
438 # something we won't be doing anytime soon.
439 return (ddl.FieldSpec(name, nbytes=2048, dtype=ddl.Base64Region),)
441 @classmethod
442 def getFieldNames(cls, name: Optional[str] = None) -> Tuple[str, ...]:
443 # Docstring inherited.
444 if name is None:
445 name = cls.NAME
446 return (name,)
448 @classmethod
449 def update(
450 cls,
451 extent: Optional[lsst.sphgeom.Region],
452 name: Optional[str] = None,
453 result: Optional[Dict[str, Any]] = None,
454 ) -> Dict[str, Any]:
455 # Docstring inherited.
456 if name is None:
457 name = cls.NAME
458 if result is None:
459 result = {}
460 result[name] = extent
461 return result
463 @classmethod
464 def extract(cls, mapping: Mapping[str, Any], name: Optional[str] = None) -> Optional[lsst.sphgeom.Region]:
465 # Docstring inherited.
466 if name is None:
467 name = cls.NAME
468 return mapping[name]
470 @classmethod
471 def fromSelectable(
472 cls: Type[SpatialRegionDatabaseRepresentation],
473 selectable: sqlalchemy.sql.FromClause,
474 name: Optional[str] = None,
475 ) -> SpatialRegionDatabaseRepresentation:
476 # Docstring inherited
477 if name is None:
478 name = cls.NAME
479 return cls(selectable.columns[name], name)
481 @property
482 def name(self) -> str:
483 # Docstring inherited
484 return self._name
486 def isNull(self) -> sqlalchemy.sql.ColumnElement:
487 # Docstring inherited
488 return self.column.is_(None)
490 def flatten(self, name: Optional[str]) -> Iterator[sqlalchemy.sql.ColumnElement]:
491 # Docstring inherited
492 yield self.column