Coverage for python/lsst/daf/butler/timespan_database_representation.py: 64%
149 statements
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-07 02:46 -0700
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-07 02:46 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29__all__ = ("TimespanDatabaseRepresentation",)
31from abc import ABC, abstractmethod
32from collections.abc import Callable, Mapping
33from typing import TYPE_CHECKING, Any, ClassVar, TypeVar
35import sqlalchemy
37# As of astropy 4.2, the erfa interface is shipped independently and
38# ErfaWarning is no longer an AstropyWarning
39try:
40 import erfa
41except ImportError:
42 erfa = None
45from . import ddl
46from ._timespan import Timespan
47from .time_utils import TimeConverter
49if TYPE_CHECKING: # Imports needed only for type annotations; may be circular.
50 pass
53_S = TypeVar("_S", bound="TimespanDatabaseRepresentation")
56class TimespanDatabaseRepresentation(ABC):
57 """An interface for representing a timespan in a database.
59 Notes
60 -----
61 Much of this class's interface is comprised of classmethods. Instances
62 can be constructed via the `from_columns` or `fromLiteral` methods as a
63 way to include timespan overlap operations in query JOIN or WHERE clauses.
65 `TimespanDatabaseRepresentation` implementations are guaranteed to use the
66 same interval definitions and edge-case behavior as the `Timespan` class.
67 They are also guaranteed to round-trip `Timespan` instances exactly.
68 """
70 NAME: ClassVar[str] = "timespan"
72 Compound: ClassVar[type[TimespanDatabaseRepresentation]]
73 """A concrete subclass of `TimespanDatabaseRepresentation` that simply
74 uses two separate fields for the begin (inclusive) and end (exclusive)
75 endpoints.
77 This implementation should be compatible with any SQL database, and should
78 generally be used when a database-specific implementation is not available.
79 """
81 __slots__ = ()
83 @classmethod
84 @abstractmethod
85 def makeFieldSpecs(
86 cls, nullable: bool, name: str | None = None, **kwargs: Any
87 ) -> tuple[ddl.FieldSpec, ...]:
88 """Make objects that reflect the fields that must be added to table.
90 Makes one or more `ddl.FieldSpec` objects that reflect the fields
91 that must be added to a table for this representation.
93 Parameters
94 ----------
95 nullable : `bool`
96 If `True`, the timespan is permitted to be logically ``NULL``
97 (mapped to `None` in Python), though the corresponding value(s) in
98 the database are implementation-defined. Nullable timespan fields
99 default to NULL, while others default to (-∞, ∞).
100 name : `str`, optional
101 Name for the logical column; a part of the name for multi-column
102 representations. Defaults to ``cls.NAME``.
103 **kwargs
104 Keyword arguments are forwarded to the `ddl.FieldSpec` constructor
105 for all fields; implementations only provide the ``name``,
106 ``dtype``, and ``default`` arguments themselves.
108 Returns
109 -------
110 specs : `tuple` [ `ddl.FieldSpec` ]
111 Field specification objects; length of the tuple is
112 subclass-dependent, but is guaranteed to match the length of the
113 return values of `getFieldNames` and `update`.
114 """
115 raise NotImplementedError()
117 @classmethod
118 @abstractmethod
119 def getFieldNames(cls, name: str | None = None) -> tuple[str, ...]:
120 """Return the actual field names used by this representation.
122 Parameters
123 ----------
124 name : `str`, optional
125 Name for the logical column; a part of the name for multi-column
126 representations. Defaults to ``cls.NAME``.
128 Returns
129 -------
130 names : `tuple` [ `str` ]
131 Field name(s). Guaranteed to be the same as the names of the field
132 specifications returned by `makeFieldSpecs`.
133 """
134 raise NotImplementedError()
136 @classmethod
137 @abstractmethod
138 def fromLiteral(cls: type[_S], timespan: Timespan | None) -> _S:
139 """Construct a database timespan from a literal `Timespan` instance.
141 Parameters
142 ----------
143 timespan : `Timespan` or `None`
144 Literal timespan to convert, or `None` to make logically ``NULL``
145 timespan.
147 Returns
148 -------
149 tsRepr : `TimespanDatabaseRepresentation`
150 A timespan expression object backed by `sqlalchemy.sql.literal`
151 column expressions.
152 """
153 raise NotImplementedError()
155 @classmethod
156 @abstractmethod
157 def from_columns(cls: type[_S], columns: sqlalchemy.sql.ColumnCollection, name: str | None = None) -> _S:
158 """Construct a database timespan from the columns of a table or
159 subquery.
161 Parameters
162 ----------
163 columns : `sqlalchemy.sql.ColumnCollections`
164 SQLAlchemy container for raw columns.
165 name : `str`, optional
166 Name for the logical column; a part of the name for multi-column
167 representations. Defaults to ``cls.NAME``.
169 Returns
170 -------
171 tsRepr : `TimespanDatabaseRepresentation`
172 A timespan expression object backed by `sqlalchemy.sql.literal`
173 column expressions.
174 """
175 raise NotImplementedError()
177 @classmethod
178 @abstractmethod
179 def update(
180 cls, timespan: Timespan | None, name: str | None = None, result: dict[str, Any] | None = None
181 ) -> dict[str, Any]:
182 """Add a timespan value to a dictionary that represents a database row.
184 Parameters
185 ----------
186 timespan : `Timespan` or `None`
187 A timespan literal, or `None` for ``NULL``.
188 name : `str`, optional
189 Name for the logical column; a part of the name for multi-column
190 representations. Defaults to ``cls.NAME``.
191 result : `dict` [ `str`, `Any` ], optional
192 A dictionary representing a database row that fields should be
193 added to, or `None` to create and return a new one.
195 Returns
196 -------
197 result : `dict` [ `str`, `Any` ]
198 A dictionary containing this representation of a timespan. Exactly
199 the `dict` passed as ``result`` if that is not `None`.
200 """
201 raise NotImplementedError()
203 @classmethod
204 @abstractmethod
205 def extract(cls, mapping: Mapping[Any, Any], name: str | None = None) -> Timespan | None:
206 """Extract a timespan from a dictionary that represents a database row.
208 Parameters
209 ----------
210 mapping : `~collections.abc.Mapping` [ `Any`, `Any` ]
211 A dictionary representing a database row containing a `Timespan`
212 in this representation. Should have key(s) equal to the return
213 value of `getFieldNames`.
214 name : `str`, optional
215 Name for the logical column; a part of the name for multi-column
216 representations. Defaults to ``cls.NAME``.
218 Returns
219 -------
220 timespan : `Timespan` or `None`
221 Python representation of the timespan.
222 """
223 raise NotImplementedError()
225 @classmethod
226 def hasExclusionConstraint(cls) -> bool:
227 """Return `True` if this representation supports exclusion constraints.
229 Returns
230 -------
231 supported : `bool`
232 If `True`, defining a constraint via `ddl.TableSpec.exclusion` that
233 includes the fields of this representation is allowed.
234 """
235 return False
237 @property
238 @abstractmethod
239 def name(self) -> str:
240 """Return base logical name for the timespan column or expression
241 (`str`).
243 If the representation uses only one actual column, this should be the
244 full name of the column. In other cases it is an unspecified
245 common subset of the column names.
246 """
247 raise NotImplementedError()
249 @abstractmethod
250 def isNull(self) -> sqlalchemy.sql.ColumnElement:
251 """Return expression that tests whether the timespan is ``NULL``.
253 Returns a SQLAlchemy expression that tests whether this region is
254 logically ``NULL``.
256 Returns
257 -------
258 isnull : `sqlalchemy.sql.ColumnElement`
259 A boolean SQLAlchemy expression object.
260 """
261 raise NotImplementedError()
263 @abstractmethod
264 def flatten(self, name: str | None = None) -> tuple[sqlalchemy.sql.ColumnElement, ...]:
265 """Return the actual column(s) that comprise this logical column.
267 Parameters
268 ----------
269 name : `str`, optional
270 If provided, a name for the logical column that should be used to
271 label the columns. If not provided, the columns' native names will
272 be used.
274 Returns
275 -------
276 columns : `tuple` [ `sqlalchemy.sql.ColumnElement` ]
277 The true column or columns that back this object.
278 """
279 raise NotImplementedError()
281 @abstractmethod
282 def isEmpty(self) -> sqlalchemy.sql.ColumnElement:
283 """Return a boolean SQLAlchemy expression for testing empty timespans.
285 Returns
286 -------
287 empty : `sqlalchemy.sql.ColumnElement`
288 A boolean SQLAlchemy expression object.
289 """
290 raise NotImplementedError()
292 @abstractmethod
293 def __lt__(self: _S, other: _S | sqlalchemy.sql.ColumnElement) -> sqlalchemy.sql.ColumnElement:
294 """Return SQLAlchemy expression for testing less than.
296 Returns a SQLAlchemy expression representing a test for whether an
297 in-database timespan is strictly less than another timespan or a time
298 point.
300 Parameters
301 ----------
302 other : ``type(self)`` or `sqlalchemy.sql.ColumnElement`
303 The timespan or time to relate to ``self``; either an instance of
304 the same `TimespanDatabaseRepresentation` subclass as ``self``, or
305 a SQL column expression representing an `astropy.time.Time`.
307 Returns
308 -------
309 less : `sqlalchemy.sql.ColumnElement`
310 A boolean SQLAlchemy expression object.
312 Notes
313 -----
314 See `Timespan.__lt__` for edge-case behavior.
315 """
316 raise NotImplementedError()
318 @abstractmethod
319 def __gt__(self: _S, other: _S | sqlalchemy.sql.ColumnElement) -> sqlalchemy.sql.ColumnElement:
320 """Return a SQLAlchemy expression for testing greater than.
322 Returns a SQLAlchemy expression representing a test for whether an
323 in-database timespan is strictly greater than another timespan or a
324 time point.
326 Parameters
327 ----------
328 other : ``type(self)`` or `sqlalchemy.sql.ColumnElement`
329 The timespan or time to relate to ``self``; either an instance of
330 the same `TimespanDatabaseRepresentation` subclass as ``self``, or
331 a SQL column expression representing an `astropy.time.Time`.
333 Returns
334 -------
335 greater : `sqlalchemy.sql.ColumnElement`
336 A boolean SQLAlchemy expression object.
338 Notes
339 -----
340 See `Timespan.__gt__` for edge-case behavior.
341 """
342 raise NotImplementedError()
344 @abstractmethod
345 def overlaps(self: _S, other: _S | sqlalchemy.sql.ColumnElement) -> sqlalchemy.sql.ColumnElement:
346 """Return a SQLAlchemy expression representing timespan overlaps.
348 Parameters
349 ----------
350 other : ``type(self)``
351 The timespan or time to overlap ``self`` with. If a single time,
352 this is a synonym for `contains`.
354 Returns
355 -------
356 overlap : `sqlalchemy.sql.ColumnElement`
357 A boolean SQLAlchemy expression object.
359 Notes
360 -----
361 See `Timespan.overlaps` for edge-case behavior.
362 """
363 raise NotImplementedError()
365 @abstractmethod
366 def contains(self: _S, other: _S | sqlalchemy.sql.ColumnElement) -> sqlalchemy.sql.ColumnElement:
367 """Return a SQLAlchemy expression representing containment.
369 Returns a test for whether an in-database timespan contains another
370 timespan or a time point.
372 Parameters
373 ----------
374 other : ``type(self)`` or `sqlalchemy.sql.ColumnElement`
375 The timespan or time to relate to ``self``; either an instance of
376 the same `TimespanDatabaseRepresentation` subclass as ``self``, or
377 a SQL column expression representing an `astropy.time.Time`.
379 Returns
380 -------
381 contains : `sqlalchemy.sql.ColumnElement`
382 A boolean SQLAlchemy expression object.
384 Notes
385 -----
386 See `Timespan.contains` for edge-case behavior.
387 """
388 raise NotImplementedError()
390 @abstractmethod
391 def lower(self: _S) -> sqlalchemy.sql.ColumnElement:
392 """Return a SQLAlchemy expression representing a lower bound of a
393 timespan.
395 Returns
396 -------
397 lower : `sqlalchemy.sql.ColumnElement`
398 A SQLAlchemy expression for a lower bound.
400 Notes
401 -----
402 If database holds ``NULL`` for a timespan then the returned expression
403 should evaluate to 0. Main purpose of this and `upper` method is to use
404 them in generating SQL, in particular ORDER BY clause, to guarantee a
405 predictable ordering. It may potentially be used for transforming
406 boolean user expressions into SQL, but it will likely require extra
407 attention to ordering issues.
408 """
409 raise NotImplementedError()
411 @abstractmethod
412 def upper(self: _S) -> sqlalchemy.sql.ColumnElement:
413 """Return a SQLAlchemy expression representing an upper bound of a
414 timespan.
416 Returns
417 -------
418 upper : `sqlalchemy.sql.ColumnElement`
419 A SQLAlchemy expression for an upper bound.
421 Notes
422 -----
423 If database holds ``NULL`` for a timespan then the returned expression
424 should evaluate to 0. Also see notes for `lower` method.
425 """
426 raise NotImplementedError()
428 @abstractmethod
429 def apply_any_aggregate(
430 self, func: Callable[[sqlalchemy.ColumnElement[Any]], sqlalchemy.ColumnElement[Any]]
431 ) -> TimespanDatabaseRepresentation:
432 """Apply the given ANY_VALUE aggregate function (or equivalent) to
433 the timespan column(s).
435 Parameters
436 ----------
437 func : `~collections.abc.Callable`
438 Callable that takes a `sqlalchemy.ColumnElement` and returns a
439 `sqlalchemy.ColumnElement`.
441 Returns
442 -------
443 timespan : `TimespanDatabaseRepresentation`
444 A timespan database representation usable in the SELECT clause of
445 a query with GROUP BY where it does not matter which of the grouped
446 values is selected.
447 """
448 raise NotImplementedError()
451class _CompoundTimespanDatabaseRepresentation(TimespanDatabaseRepresentation):
452 """Representation of a time span as two separate fields.
454 An implementation of `TimespanDatabaseRepresentation` that simply stores
455 the endpoints in two separate fields.
457 This type should generally be accessed via
458 `TimespanDatabaseRepresentation.Compound`, and should be constructed only
459 via the `from_columns` and `fromLiteral` methods.
461 Parameters
462 ----------
463 nsec : `tuple` of `sqlalchemy.sql.ColumnElement`
464 Tuple of SQLAlchemy objects representing the lower (inclusive) and
465 upper (exclusive) bounds, as 64-bit integer columns containing
466 nanoseconds.
467 name : `str`, optional
468 Name for the logical column; a part of the name for multi-column
469 representations. Defaults to ``cls.NAME``.
471 Notes
472 -----
473 ``NULL`` timespans are represented by having both fields set to ``NULL``;
474 setting only one to ``NULL`` is considered a corrupted state that should
475 only be possible if this interface is circumvented. `Timespan` instances
476 with one or both of `~Timespan.begin` and `~Timespan.end` set to `None`
477 are set to fields mapped to the minimum and maximum value constants used
478 by our integer-time mapping.
479 """
481 def __init__(self, nsec: tuple[sqlalchemy.sql.ColumnElement, sqlalchemy.sql.ColumnElement], name: str):
482 self._nsec = nsec
483 self._name = name
485 __slots__ = ("_nsec", "_name")
487 @classmethod
488 def makeFieldSpecs(
489 cls, nullable: bool, name: str | None = None, **kwargs: Any
490 ) -> tuple[ddl.FieldSpec, ...]:
491 # Docstring inherited.
492 if name is None:
493 name = cls.NAME
494 return (
495 ddl.FieldSpec(
496 f"{name}_begin",
497 dtype=sqlalchemy.BigInteger,
498 nullable=nullable,
499 default=(None if nullable else sqlalchemy.sql.text(str(TimeConverter().min_nsec))),
500 **kwargs,
501 ),
502 ddl.FieldSpec(
503 f"{name}_end",
504 dtype=sqlalchemy.BigInteger,
505 nullable=nullable,
506 default=(None if nullable else sqlalchemy.sql.text(str(TimeConverter().max_nsec))),
507 **kwargs,
508 ),
509 )
511 @classmethod
512 def getFieldNames(cls, name: str | None = None) -> tuple[str, ...]:
513 # Docstring inherited.
514 if name is None:
515 name = cls.NAME
516 return (f"{name}_begin", f"{name}_end")
518 @classmethod
519 def update(
520 cls, extent: Timespan | None, name: str | None = None, result: dict[str, Any] | None = None
521 ) -> dict[str, Any]:
522 # Docstring inherited.
523 if name is None:
524 name = cls.NAME
525 if result is None:
526 result = {}
527 if extent is None:
528 begin_nsec = None
529 end_nsec = None
530 else:
531 begin_nsec = extent.nsec[0]
532 end_nsec = extent.nsec[1]
533 result[f"{name}_begin"] = begin_nsec
534 result[f"{name}_end"] = end_nsec
535 return result
537 @classmethod
538 def extract(cls, mapping: Mapping[str, Any], name: str | None = None) -> Timespan | None:
539 # Docstring inherited.
540 if name is None:
541 name = cls.NAME
542 begin_nsec = mapping[f"{name}_begin"]
543 end_nsec = mapping[f"{name}_end"]
544 if begin_nsec is None:
545 if end_nsec is not None:
546 raise RuntimeError(
547 f"Corrupted timespan extracted: begin is NULL, but end is {end_nsec}ns -> "
548 f"{TimeConverter().nsec_to_astropy(end_nsec).tai.isot}."
549 )
550 return None
551 elif end_nsec is None:
552 raise RuntimeError(
553 f"Corrupted timespan extracted: end is NULL, but begin is {begin_nsec}ns -> "
554 f"{TimeConverter().nsec_to_astropy(begin_nsec).tai.isot}."
555 )
556 return Timespan(None, None, _nsec=(begin_nsec, end_nsec))
558 @classmethod
559 def from_columns(
560 cls, columns: sqlalchemy.sql.ColumnCollection, name: str | None = None
561 ) -> _CompoundTimespanDatabaseRepresentation:
562 # Docstring inherited.
563 if name is None:
564 name = cls.NAME
565 return cls(nsec=(columns[f"{name}_begin"], columns[f"{name}_end"]), name=name)
567 @classmethod
568 def fromLiteral(cls, timespan: Timespan | None) -> _CompoundTimespanDatabaseRepresentation:
569 # Docstring inherited.
570 if timespan is None:
571 return cls(nsec=(sqlalchemy.sql.null(), sqlalchemy.sql.null()), name=cls.NAME)
572 return cls(
573 nsec=(sqlalchemy.sql.literal(timespan.nsec[0]), sqlalchemy.sql.literal(timespan.nsec[1])),
574 name=cls.NAME,
575 )
577 @property
578 def name(self) -> str:
579 # Docstring inherited.
580 return self._name
582 def isNull(self) -> sqlalchemy.sql.ColumnElement:
583 # Docstring inherited.
584 return self._nsec[0].is_(None)
586 def isEmpty(self) -> sqlalchemy.sql.ColumnElement:
587 # Docstring inherited.
588 return self._nsec[0] >= self._nsec[1]
590 def __lt__(
591 self, other: _CompoundTimespanDatabaseRepresentation | sqlalchemy.sql.ColumnElement
592 ) -> sqlalchemy.sql.ColumnElement:
593 # Docstring inherited.
594 # See comments in Timespan.__lt__ for why we use these exact
595 # expressions.
596 if isinstance(other, sqlalchemy.sql.ColumnElement):
597 return sqlalchemy.sql.and_(self._nsec[1] <= other, self._nsec[0] < other)
598 else:
599 return sqlalchemy.sql.and_(self._nsec[1] <= other._nsec[0], self._nsec[0] < other._nsec[1])
601 def __gt__(
602 self, other: _CompoundTimespanDatabaseRepresentation | sqlalchemy.sql.ColumnElement
603 ) -> sqlalchemy.sql.ColumnElement:
604 # Docstring inherited.
605 # See comments in Timespan.__gt__ for why we use these exact
606 # expressions.
607 if isinstance(other, sqlalchemy.sql.ColumnElement):
608 return sqlalchemy.sql.and_(self._nsec[0] > other, self._nsec[1] > other)
609 else:
610 return sqlalchemy.sql.and_(self._nsec[0] >= other._nsec[1], self._nsec[1] > other._nsec[0])
612 def overlaps(
613 self, other: _CompoundTimespanDatabaseRepresentation | sqlalchemy.sql.ColumnElement
614 ) -> sqlalchemy.sql.ColumnElement:
615 # Docstring inherited.
616 if isinstance(other, sqlalchemy.sql.ColumnElement):
617 return self.contains(other)
618 return sqlalchemy.sql.and_(self._nsec[1] > other._nsec[0], other._nsec[1] > self._nsec[0])
620 def contains(
621 self, other: _CompoundTimespanDatabaseRepresentation | sqlalchemy.sql.ColumnElement
622 ) -> sqlalchemy.sql.ColumnElement:
623 # Docstring inherited.
624 if isinstance(other, sqlalchemy.sql.ColumnElement):
625 return sqlalchemy.sql.and_(self._nsec[0] <= other, self._nsec[1] > other)
626 else:
627 return sqlalchemy.sql.and_(self._nsec[0] <= other._nsec[0], self._nsec[1] >= other._nsec[1])
629 def lower(self) -> sqlalchemy.sql.ColumnElement:
630 # Docstring inherited.
631 return sqlalchemy.sql.functions.coalesce(self._nsec[0], sqlalchemy.sql.literal(0))
633 def upper(self) -> sqlalchemy.sql.ColumnElement:
634 # Docstring inherited.
635 return sqlalchemy.sql.functions.coalesce(self._nsec[1], sqlalchemy.sql.literal(0))
637 def flatten(self, name: str | None = None) -> tuple[sqlalchemy.sql.ColumnElement, ...]:
638 # Docstring inherited.
639 if name is None:
640 return self._nsec
641 else:
642 return (
643 self._nsec[0].label(f"{name}_begin"),
644 self._nsec[1].label(f"{name}_end"),
645 )
647 def apply_any_aggregate(
648 self,
649 func: Callable[[sqlalchemy.ColumnElement[Any]], sqlalchemy.ColumnElement[Any]],
650 ) -> TimespanDatabaseRepresentation:
651 # Docstring inherited.
652 return _CompoundTimespanDatabaseRepresentation(
653 nsec=(func(self._nsec[0]), func(self._nsec[1])), name=self._name
654 )
657TimespanDatabaseRepresentation.Compound = _CompoundTimespanDatabaseRepresentation