Coverage for python/lsst/daf/butler/timespan_database_representation.py: 65%
156 statements
« prev ^ index » next coverage.py v7.4.3, created at 2024-03-12 10:07 +0000
« prev ^ index » next coverage.py v7.4.3, created at 2024-03-12 10:07 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29__all__ = ("TimespanDatabaseRepresentation",)
31import enum
32from abc import ABC, abstractmethod
33from collections.abc import Callable, Mapping
34from typing import TYPE_CHECKING, Any, ClassVar, TypeVar, Union
36import astropy.time
37import astropy.utils.exceptions
38import sqlalchemy
40# As of astropy 4.2, the erfa interface is shipped independently and
41# ErfaWarning is no longer an AstropyWarning
42try:
43 import erfa
44except ImportError:
45 erfa = None
48from . import ddl
49from ._timespan import Timespan
50from .time_utils import TimeConverter
52if TYPE_CHECKING: # Imports needed only for type annotations; may be circular.
53 pass
56class _SpecialTimespanBound(enum.Enum):
57 """Enumeration to provide a singleton value for empty timespan bounds.
59 This enum's only member should generally be accessed via the
60 `Timespan.EMPTY` alias.
61 """
63 EMPTY = enum.auto()
64 """The value used for both `Timespan.begin` and `Timespan.end` for empty
65 Timespans that contain no points.
66 """
69TimespanBound = Union[astropy.time.Time, _SpecialTimespanBound, None]
72_S = TypeVar("_S", bound="TimespanDatabaseRepresentation")
75class TimespanDatabaseRepresentation(ABC):
76 """An interface for representing a timespan in a database.
78 Notes
79 -----
80 Much of this class's interface is comprised of classmethods. Instances
81 can be constructed via the `from_columns` or `fromLiteral` methods as a
82 way to include timespan overlap operations in query JOIN or WHERE clauses.
84 `TimespanDatabaseRepresentation` implementations are guaranteed to use the
85 same interval definitions and edge-case behavior as the `Timespan` class.
86 They are also guaranteed to round-trip `Timespan` instances exactly.
87 """
89 NAME: ClassVar[str] = "timespan"
91 Compound: ClassVar[type[TimespanDatabaseRepresentation]]
92 """A concrete subclass of `TimespanDatabaseRepresentation` that simply
93 uses two separate fields for the begin (inclusive) and end (exclusive)
94 endpoints.
96 This implementation should be compatible with any SQL database, and should
97 generally be used when a database-specific implementation is not available.
98 """
100 __slots__ = ()
102 @classmethod
103 @abstractmethod
104 def makeFieldSpecs(
105 cls, nullable: bool, name: str | None = None, **kwargs: Any
106 ) -> tuple[ddl.FieldSpec, ...]:
107 """Make objects that reflect the fields that must be added to table.
109 Makes one or more `ddl.FieldSpec` objects that reflect the fields
110 that must be added to a table for this representation.
112 Parameters
113 ----------
114 nullable : `bool`
115 If `True`, the timespan is permitted to be logically ``NULL``
116 (mapped to `None` in Python), though the corresponding value(s) in
117 the database are implementation-defined. Nullable timespan fields
118 default to NULL, while others default to (-∞, ∞).
119 name : `str`, optional
120 Name for the logical column; a part of the name for multi-column
121 representations. Defaults to ``cls.NAME``.
122 **kwargs
123 Keyword arguments are forwarded to the `ddl.FieldSpec` constructor
124 for all fields; implementations only provide the ``name``,
125 ``dtype``, and ``default`` arguments themselves.
127 Returns
128 -------
129 specs : `tuple` [ `ddl.FieldSpec` ]
130 Field specification objects; length of the tuple is
131 subclass-dependent, but is guaranteed to match the length of the
132 return values of `getFieldNames` and `update`.
133 """
134 raise NotImplementedError()
136 @classmethod
137 @abstractmethod
138 def getFieldNames(cls, name: str | None = None) -> tuple[str, ...]:
139 """Return the actual field names used by this representation.
141 Parameters
142 ----------
143 name : `str`, optional
144 Name for the logical column; a part of the name for multi-column
145 representations. Defaults to ``cls.NAME``.
147 Returns
148 -------
149 names : `tuple` [ `str` ]
150 Field name(s). Guaranteed to be the same as the names of the field
151 specifications returned by `makeFieldSpecs`.
152 """
153 raise NotImplementedError()
155 @classmethod
156 @abstractmethod
157 def fromLiteral(cls: type[_S], timespan: Timespan | None) -> _S:
158 """Construct a database timespan from a literal `Timespan` instance.
160 Parameters
161 ----------
162 timespan : `Timespan` or `None`
163 Literal timespan to convert, or `None` to make logically ``NULL``
164 timespan.
166 Returns
167 -------
168 tsRepr : `TimespanDatabaseRepresentation`
169 A timespan expression object backed by `sqlalchemy.sql.literal`
170 column expressions.
171 """
172 raise NotImplementedError()
174 @classmethod
175 @abstractmethod
176 def from_columns(cls: type[_S], columns: sqlalchemy.sql.ColumnCollection, name: str | None = None) -> _S:
177 """Construct a database timespan from the columns of a table or
178 subquery.
180 Parameters
181 ----------
182 columns : `sqlalchemy.sql.ColumnCollections`
183 SQLAlchemy container for raw columns.
184 name : `str`, optional
185 Name for the logical column; a part of the name for multi-column
186 representations. Defaults to ``cls.NAME``.
188 Returns
189 -------
190 tsRepr : `TimespanDatabaseRepresentation`
191 A timespan expression object backed by `sqlalchemy.sql.literal`
192 column expressions.
193 """
194 raise NotImplementedError()
196 @classmethod
197 @abstractmethod
198 def update(
199 cls, timespan: Timespan | None, name: str | None = None, result: dict[str, Any] | None = None
200 ) -> dict[str, Any]:
201 """Add a timespan value to a dictionary that represents a database row.
203 Parameters
204 ----------
205 timespan : `Timespan` or `None`
206 A timespan literal, or `None` for ``NULL``.
207 name : `str`, optional
208 Name for the logical column; a part of the name for multi-column
209 representations. Defaults to ``cls.NAME``.
210 result : `dict` [ `str`, `Any` ], optional
211 A dictionary representing a database row that fields should be
212 added to, or `None` to create and return a new one.
214 Returns
215 -------
216 result : `dict` [ `str`, `Any` ]
217 A dictionary containing this representation of a timespan. Exactly
218 the `dict` passed as ``result`` if that is not `None`.
219 """
220 raise NotImplementedError()
222 @classmethod
223 @abstractmethod
224 def extract(cls, mapping: Mapping[Any, Any], name: str | None = None) -> Timespan | None:
225 """Extract a timespan from a dictionary that represents a database row.
227 Parameters
228 ----------
229 mapping : `~collections.abc.Mapping` [ `Any`, `Any` ]
230 A dictionary representing a database row containing a `Timespan`
231 in this representation. Should have key(s) equal to the return
232 value of `getFieldNames`.
233 name : `str`, optional
234 Name for the logical column; a part of the name for multi-column
235 representations. Defaults to ``cls.NAME``.
237 Returns
238 -------
239 timespan : `Timespan` or `None`
240 Python representation of the timespan.
241 """
242 raise NotImplementedError()
244 @classmethod
245 def hasExclusionConstraint(cls) -> bool:
246 """Return `True` if this representation supports exclusion constraints.
248 Returns
249 -------
250 supported : `bool`
251 If `True`, defining a constraint via `ddl.TableSpec.exclusion` that
252 includes the fields of this representation is allowed.
253 """
254 return False
256 @property
257 @abstractmethod
258 def name(self) -> str:
259 """Return base logical name for the timespan column or expression
260 (`str`).
262 If the representation uses only one actual column, this should be the
263 full name of the column. In other cases it is an unspecified
264 common subset of the column names.
265 """
266 raise NotImplementedError()
268 @abstractmethod
269 def isNull(self) -> sqlalchemy.sql.ColumnElement:
270 """Return expression that tests whether the timespan is ``NULL``.
272 Returns a SQLAlchemy expression that tests whether this region is
273 logically ``NULL``.
275 Returns
276 -------
277 isnull : `sqlalchemy.sql.ColumnElement`
278 A boolean SQLAlchemy expression object.
279 """
280 raise NotImplementedError()
282 @abstractmethod
283 def flatten(self, name: str | None = None) -> tuple[sqlalchemy.sql.ColumnElement, ...]:
284 """Return the actual column(s) that comprise this logical column.
286 Parameters
287 ----------
288 name : `str`, optional
289 If provided, a name for the logical column that should be used to
290 label the columns. If not provided, the columns' native names will
291 be used.
293 Returns
294 -------
295 columns : `tuple` [ `sqlalchemy.sql.ColumnElement` ]
296 The true column or columns that back this object.
297 """
298 raise NotImplementedError()
300 @abstractmethod
301 def isEmpty(self) -> sqlalchemy.sql.ColumnElement:
302 """Return a boolean SQLAlchemy expression for testing empty timespans.
304 Returns
305 -------
306 empty : `sqlalchemy.sql.ColumnElement`
307 A boolean SQLAlchemy expression object.
308 """
309 raise NotImplementedError()
311 @abstractmethod
312 def __lt__(self: _S, other: _S | sqlalchemy.sql.ColumnElement) -> sqlalchemy.sql.ColumnElement:
313 """Return SQLAlchemy expression for testing less than.
315 Returns a SQLAlchemy expression representing a test for whether an
316 in-database timespan is strictly less than another timespan or a time
317 point.
319 Parameters
320 ----------
321 other : ``type(self)`` or `sqlalchemy.sql.ColumnElement`
322 The timespan or time to relate to ``self``; either an instance of
323 the same `TimespanDatabaseRepresentation` subclass as ``self``, or
324 a SQL column expression representing an `astropy.time.Time`.
326 Returns
327 -------
328 less : `sqlalchemy.sql.ColumnElement`
329 A boolean SQLAlchemy expression object.
331 Notes
332 -----
333 See `Timespan.__lt__` for edge-case behavior.
334 """
335 raise NotImplementedError()
337 @abstractmethod
338 def __gt__(self: _S, other: _S | sqlalchemy.sql.ColumnElement) -> sqlalchemy.sql.ColumnElement:
339 """Return a SQLAlchemy expression for testing greater than.
341 Returns a SQLAlchemy expression representing a test for whether an
342 in-database timespan is strictly greater than another timespan or a
343 time point.
345 Parameters
346 ----------
347 other : ``type(self)`` or `sqlalchemy.sql.ColumnElement`
348 The timespan or time to relate to ``self``; either an instance of
349 the same `TimespanDatabaseRepresentation` subclass as ``self``, or
350 a SQL column expression representing an `astropy.time.Time`.
352 Returns
353 -------
354 greater : `sqlalchemy.sql.ColumnElement`
355 A boolean SQLAlchemy expression object.
357 Notes
358 -----
359 See `Timespan.__gt__` for edge-case behavior.
360 """
361 raise NotImplementedError()
363 @abstractmethod
364 def overlaps(self: _S, other: _S | sqlalchemy.sql.ColumnElement) -> sqlalchemy.sql.ColumnElement:
365 """Return a SQLAlchemy expression representing timespan overlaps.
367 Parameters
368 ----------
369 other : ``type(self)``
370 The timespan or time to overlap ``self`` with. If a single time,
371 this is a synonym for `contains`.
373 Returns
374 -------
375 overlap : `sqlalchemy.sql.ColumnElement`
376 A boolean SQLAlchemy expression object.
378 Notes
379 -----
380 See `Timespan.overlaps` for edge-case behavior.
381 """
382 raise NotImplementedError()
384 @abstractmethod
385 def contains(self: _S, other: _S | sqlalchemy.sql.ColumnElement) -> sqlalchemy.sql.ColumnElement:
386 """Return a SQLAlchemy expression representing containment.
388 Returns a test for whether an in-database timespan contains another
389 timespan or a time point.
391 Parameters
392 ----------
393 other : ``type(self)`` or `sqlalchemy.sql.ColumnElement`
394 The timespan or time to relate to ``self``; either an instance of
395 the same `TimespanDatabaseRepresentation` subclass as ``self``, or
396 a SQL column expression representing an `astropy.time.Time`.
398 Returns
399 -------
400 contains : `sqlalchemy.sql.ColumnElement`
401 A boolean SQLAlchemy expression object.
403 Notes
404 -----
405 See `Timespan.contains` for edge-case behavior.
406 """
407 raise NotImplementedError()
409 @abstractmethod
410 def lower(self: _S) -> sqlalchemy.sql.ColumnElement:
411 """Return a SQLAlchemy expression representing a lower bound of a
412 timespan.
414 Returns
415 -------
416 lower : `sqlalchemy.sql.ColumnElement`
417 A SQLAlchemy expression for a lower bound.
419 Notes
420 -----
421 If database holds ``NULL`` for a timespan then the returned expression
422 should evaluate to 0. Main purpose of this and `upper` method is to use
423 them in generating SQL, in particular ORDER BY clause, to guarantee a
424 predictable ordering. It may potentially be used for transforming
425 boolean user expressions into SQL, but it will likely require extra
426 attention to ordering issues.
427 """
428 raise NotImplementedError()
430 @abstractmethod
431 def upper(self: _S) -> sqlalchemy.sql.ColumnElement:
432 """Return a SQLAlchemy expression representing an upper bound of a
433 timespan.
435 Returns
436 -------
437 upper : `sqlalchemy.sql.ColumnElement`
438 A SQLAlchemy expression for an upper bound.
440 Notes
441 -----
442 If database holds ``NULL`` for a timespan then the returned expression
443 should evaluate to 0. Also see notes for `lower` method.
444 """
445 raise NotImplementedError()
447 @abstractmethod
448 def apply_any_aggregate(
449 self, func: Callable[[sqlalchemy.ColumnElement[Any]], sqlalchemy.ColumnElement[Any]]
450 ) -> TimespanDatabaseRepresentation:
451 """Apply the given ANY_VALUE aggregate function (or equivalent) to
452 the timespan column(s).
454 Parameters
455 ----------
456 func : `~collections.abc.Callable`
457 Callable that takes a `sqlalchemy.ColumnElement` and returns a
458 `sqlalchemy.ColumnElement`.
460 Returns
461 -------
462 timespan : `TimespanDatabaseRepresentation`
463 A timespan database representation usable in the SELECT clause of
464 a query with GROUP BY where it does not matter which of the grouped
465 values is selected.
466 """
467 raise NotImplementedError()
470class _CompoundTimespanDatabaseRepresentation(TimespanDatabaseRepresentation):
471 """Representation of a time span as two separate fields.
473 An implementation of `TimespanDatabaseRepresentation` that simply stores
474 the endpoints in two separate fields.
476 This type should generally be accessed via
477 `TimespanDatabaseRepresentation.Compound`, and should be constructed only
478 via the `from_columns` and `fromLiteral` methods.
480 Parameters
481 ----------
482 nsec : `tuple` of `sqlalchemy.sql.ColumnElement`
483 Tuple of SQLAlchemy objects representing the lower (inclusive) and
484 upper (exclusive) bounds, as 64-bit integer columns containing
485 nanoseconds.
486 name : `str`, optional
487 Name for the logical column; a part of the name for multi-column
488 representations. Defaults to ``cls.NAME``.
490 Notes
491 -----
492 ``NULL`` timespans are represented by having both fields set to ``NULL``;
493 setting only one to ``NULL`` is considered a corrupted state that should
494 only be possible if this interface is circumvented. `Timespan` instances
495 with one or both of `~Timespan.begin` and `~Timespan.end` set to `None`
496 are set to fields mapped to the minimum and maximum value constants used
497 by our integer-time mapping.
498 """
500 def __init__(self, nsec: tuple[sqlalchemy.sql.ColumnElement, sqlalchemy.sql.ColumnElement], name: str):
501 self._nsec = nsec
502 self._name = name
504 __slots__ = ("_nsec", "_name")
506 @classmethod
507 def makeFieldSpecs(
508 cls, nullable: bool, name: str | None = None, **kwargs: Any
509 ) -> tuple[ddl.FieldSpec, ...]:
510 # Docstring inherited.
511 if name is None:
512 name = cls.NAME
513 return (
514 ddl.FieldSpec(
515 f"{name}_begin",
516 dtype=sqlalchemy.BigInteger,
517 nullable=nullable,
518 default=(None if nullable else sqlalchemy.sql.text(str(TimeConverter().min_nsec))),
519 **kwargs,
520 ),
521 ddl.FieldSpec(
522 f"{name}_end",
523 dtype=sqlalchemy.BigInteger,
524 nullable=nullable,
525 default=(None if nullable else sqlalchemy.sql.text(str(TimeConverter().max_nsec))),
526 **kwargs,
527 ),
528 )
530 @classmethod
531 def getFieldNames(cls, name: str | None = None) -> tuple[str, ...]:
532 # Docstring inherited.
533 if name is None:
534 name = cls.NAME
535 return (f"{name}_begin", f"{name}_end")
537 @classmethod
538 def update(
539 cls, extent: Timespan | None, name: str | None = None, result: dict[str, Any] | None = None
540 ) -> dict[str, Any]:
541 # Docstring inherited.
542 if name is None:
543 name = cls.NAME
544 if result is None:
545 result = {}
546 if extent is None:
547 begin_nsec = None
548 end_nsec = None
549 else:
550 begin_nsec = extent._nsec[0]
551 end_nsec = extent._nsec[1]
552 result[f"{name}_begin"] = begin_nsec
553 result[f"{name}_end"] = end_nsec
554 return result
556 @classmethod
557 def extract(cls, mapping: Mapping[str, Any], name: str | None = None) -> Timespan | None:
558 # Docstring inherited.
559 if name is None:
560 name = cls.NAME
561 begin_nsec = mapping[f"{name}_begin"]
562 end_nsec = mapping[f"{name}_end"]
563 if begin_nsec is None:
564 if end_nsec is not None:
565 raise RuntimeError(
566 f"Corrupted timespan extracted: begin is NULL, but end is {end_nsec}ns -> "
567 f"{TimeConverter().nsec_to_astropy(end_nsec).tai.isot}."
568 )
569 return None
570 elif end_nsec is None:
571 raise RuntimeError(
572 f"Corrupted timespan extracted: end is NULL, but begin is {begin_nsec}ns -> "
573 f"{TimeConverter().nsec_to_astropy(begin_nsec).tai.isot}."
574 )
575 return Timespan(None, None, _nsec=(begin_nsec, end_nsec))
577 @classmethod
578 def from_columns(
579 cls, columns: sqlalchemy.sql.ColumnCollection, name: str | None = None
580 ) -> _CompoundTimespanDatabaseRepresentation:
581 # Docstring inherited.
582 if name is None:
583 name = cls.NAME
584 return cls(nsec=(columns[f"{name}_begin"], columns[f"{name}_end"]), name=name)
586 @classmethod
587 def fromLiteral(cls, timespan: Timespan | None) -> _CompoundTimespanDatabaseRepresentation:
588 # Docstring inherited.
589 if timespan is None:
590 return cls(nsec=(sqlalchemy.sql.null(), sqlalchemy.sql.null()), name=cls.NAME)
591 return cls(
592 nsec=(sqlalchemy.sql.literal(timespan._nsec[0]), sqlalchemy.sql.literal(timespan._nsec[1])),
593 name=cls.NAME,
594 )
596 @property
597 def name(self) -> str:
598 # Docstring inherited.
599 return self._name
601 def isNull(self) -> sqlalchemy.sql.ColumnElement:
602 # Docstring inherited.
603 return self._nsec[0].is_(None)
605 def isEmpty(self) -> sqlalchemy.sql.ColumnElement:
606 # Docstring inherited.
607 return self._nsec[0] >= self._nsec[1]
609 def __lt__(
610 self, other: _CompoundTimespanDatabaseRepresentation | sqlalchemy.sql.ColumnElement
611 ) -> sqlalchemy.sql.ColumnElement:
612 # Docstring inherited.
613 # See comments in Timespan.__lt__ for why we use these exact
614 # expressions.
615 if isinstance(other, sqlalchemy.sql.ColumnElement):
616 return sqlalchemy.sql.and_(self._nsec[1] <= other, self._nsec[0] < other)
617 else:
618 return sqlalchemy.sql.and_(self._nsec[1] <= other._nsec[0], self._nsec[0] < other._nsec[1])
620 def __gt__(
621 self, other: _CompoundTimespanDatabaseRepresentation | sqlalchemy.sql.ColumnElement
622 ) -> sqlalchemy.sql.ColumnElement:
623 # Docstring inherited.
624 # See comments in Timespan.__gt__ for why we use these exact
625 # expressions.
626 if isinstance(other, sqlalchemy.sql.ColumnElement):
627 return sqlalchemy.sql.and_(self._nsec[0] > other, self._nsec[1] > other)
628 else:
629 return sqlalchemy.sql.and_(self._nsec[0] >= other._nsec[1], self._nsec[1] > other._nsec[0])
631 def overlaps(
632 self, other: _CompoundTimespanDatabaseRepresentation | sqlalchemy.sql.ColumnElement
633 ) -> sqlalchemy.sql.ColumnElement:
634 # Docstring inherited.
635 if isinstance(other, sqlalchemy.sql.ColumnElement):
636 return self.contains(other)
637 return sqlalchemy.sql.and_(self._nsec[1] > other._nsec[0], other._nsec[1] > self._nsec[0])
639 def contains(
640 self, other: _CompoundTimespanDatabaseRepresentation | sqlalchemy.sql.ColumnElement
641 ) -> sqlalchemy.sql.ColumnElement:
642 # Docstring inherited.
643 if isinstance(other, sqlalchemy.sql.ColumnElement):
644 return sqlalchemy.sql.and_(self._nsec[0] <= other, self._nsec[1] > other)
645 else:
646 return sqlalchemy.sql.and_(self._nsec[0] <= other._nsec[0], self._nsec[1] >= other._nsec[1])
648 def lower(self) -> sqlalchemy.sql.ColumnElement:
649 # Docstring inherited.
650 return sqlalchemy.sql.functions.coalesce(self._nsec[0], sqlalchemy.sql.literal(0))
652 def upper(self) -> sqlalchemy.sql.ColumnElement:
653 # Docstring inherited.
654 return sqlalchemy.sql.functions.coalesce(self._nsec[1], sqlalchemy.sql.literal(0))
656 def flatten(self, name: str | None = None) -> tuple[sqlalchemy.sql.ColumnElement, ...]:
657 # Docstring inherited.
658 if name is None:
659 return self._nsec
660 else:
661 return (
662 self._nsec[0].label(f"{name}_begin"),
663 self._nsec[1].label(f"{name}_end"),
664 )
666 def apply_any_aggregate(
667 self,
668 func: Callable[[sqlalchemy.ColumnElement[Any]], sqlalchemy.ColumnElement[Any]],
669 ) -> TimespanDatabaseRepresentation:
670 # Docstring inherited.
671 return _CompoundTimespanDatabaseRepresentation(
672 nsec=(func(self._nsec[0]), func(self._nsec[1])), name=self._name
673 )
676TimespanDatabaseRepresentation.Compound = _CompoundTimespanDatabaseRepresentation