Coverage for python/lsst/dax/apdb/apdb.py: 84%
108 statements
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-03 10:52 +0000
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-03 10:52 +0000
1# This file is part of dax_apdb.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ["ApdbConfig", "Apdb", "ApdbInsertId", "ApdbTableData"]
26import os
27from abc import ABC, abstractmethod
28from collections.abc import Iterable, Mapping
29from dataclasses import dataclass
30from typing import TYPE_CHECKING
31from uuid import UUID, uuid4
33import lsst.daf.base as dafBase
34import pandas
35from felis.simple import Table
36from lsst.pex.config import Config, ConfigurableField, Field
37from lsst.sphgeom import Region
39from .apdbSchema import ApdbTables
41if TYPE_CHECKING: 41 ↛ 42line 41 didn't jump to line 42, because the condition on line 41 was never true
42 from .apdbMetadata import ApdbMetadata
43 from .versionTuple import VersionTuple
46def _data_file_name(basename: str) -> str:
47 """Return path name of a data file in sdm_schemas package."""
48 return os.path.join("${SDM_SCHEMAS_DIR}", "yml", basename)
51class ApdbConfig(Config):
52 """Part of Apdb configuration common to all implementations."""
54 read_sources_months = Field[int](doc="Number of months of history to read from DiaSource", default=12)
55 read_forced_sources_months = Field[int](
56 doc="Number of months of history to read from DiaForcedSource", default=12
57 )
58 schema_file = Field[str](
59 doc="Location of (YAML) configuration file with standard schema", default=_data_file_name("apdb.yaml")
60 )
61 schema_name = Field[str](doc="Name of the schema in YAML configuration file.", default="ApdbSchema")
62 extra_schema_file = Field[str](
63 doc="Location of (YAML) configuration file with extra schema, "
64 "definitions in this file are merged with the definitions in "
65 "'schema_file', extending or replacing parts of the schema.",
66 default=None,
67 optional=True,
68 deprecated="This field is deprecated, its value is not used.",
69 )
70 use_insert_id = Field[bool](
71 doc=(
72 "If True, make and fill additional tables used for getHistory methods. "
73 "Databases created with earlier versions of APDB may not have these tables, "
74 "and corresponding methods will not work for them."
75 ),
76 default=False,
77 )
80class ApdbTableData(ABC):
81 """Abstract class for representing table data."""
83 @abstractmethod
84 def column_names(self) -> list[str]:
85 """Return ordered sequence of column names in the table.
87 Returns
88 -------
89 names : `list` [`str`]
90 Column names.
91 """
92 raise NotImplementedError()
94 @abstractmethod
95 def rows(self) -> Iterable[tuple]:
96 """Return table rows, each row is a tuple of values.
98 Returns
99 -------
100 rows : `iterable` [`tuple`]
101 Iterable of tuples.
102 """
103 raise NotImplementedError()
106@dataclass(frozen=True)
107class ApdbInsertId:
108 """Class used to identify single insert operation.
110 Instances of this class are used to identify the units of transfer from
111 APDB to PPDB. Usually single `ApdbInsertId` corresponds to a single call to
112 `store` method.
113 """
115 id: UUID
116 insert_time: dafBase.DateTime
117 """Time of this insert, usually corresponds to visit time
118 (`dafBase.DateTime`).
119 """
121 @classmethod
122 def new_insert_id(cls, insert_time: dafBase.DateTime) -> ApdbInsertId:
123 """Generate new unique insert identifier."""
124 return ApdbInsertId(id=uuid4(), insert_time=insert_time)
127class Apdb(ABC):
128 """Abstract interface for APDB."""
130 ConfigClass = ApdbConfig
132 @classmethod
133 @abstractmethod
134 def apdbImplementationVersion(cls) -> VersionTuple:
135 """Return version number for current APDB implementation.
137 Returns
138 -------
139 version : `VersionTuple`
140 Version of the code defined in implementation class.
141 """
142 raise NotImplementedError()
144 @abstractmethod
145 def apdbSchemaVersion(self) -> VersionTuple:
146 """Return schema version number as defined in config file.
148 Returns
149 -------
150 version : `VersionTuple`
151 Version of the schema defined in schema config file.
152 """
153 raise NotImplementedError()
155 @abstractmethod
156 def tableDef(self, table: ApdbTables) -> Table | None:
157 """Return table schema definition for a given table.
159 Parameters
160 ----------
161 table : `ApdbTables`
162 One of the known APDB tables.
164 Returns
165 -------
166 tableSchema : `felis.simple.Table` or `None`
167 Table schema description, `None` is returned if table is not
168 defined by this implementation.
169 """
170 raise NotImplementedError()
172 @abstractmethod
173 def makeSchema(self, drop: bool = False) -> None:
174 """Create or re-create whole database schema.
176 Parameters
177 ----------
178 drop : `bool`
179 If True then drop all tables before creating new ones.
180 """
181 raise NotImplementedError()
183 @abstractmethod
184 def getDiaObjects(self, region: Region) -> pandas.DataFrame:
185 """Return catalog of DiaObject instances from a given region.
187 This method returns only the last version of each DiaObject. Some
188 records in a returned catalog may be outside the specified region, it
189 is up to a client to ignore those records or cleanup the catalog before
190 futher use.
192 Parameters
193 ----------
194 region : `lsst.sphgeom.Region`
195 Region to search for DIAObjects.
197 Returns
198 -------
199 catalog : `pandas.DataFrame`
200 Catalog containing DiaObject records for a region that may be a
201 superset of the specified region.
202 """
203 raise NotImplementedError()
205 @abstractmethod
206 def getDiaSources(
207 self, region: Region, object_ids: Iterable[int] | None, visit_time: dafBase.DateTime
208 ) -> pandas.DataFrame | None:
209 """Return catalog of DiaSource instances from a given region.
211 Parameters
212 ----------
213 region : `lsst.sphgeom.Region`
214 Region to search for DIASources.
215 object_ids : iterable [ `int` ], optional
216 List of DiaObject IDs to further constrain the set of returned
217 sources. If `None` then returned sources are not constrained. If
218 list is empty then empty catalog is returned with a correct
219 schema.
220 visit_time : `lsst.daf.base.DateTime`
221 Time of the current visit.
223 Returns
224 -------
225 catalog : `pandas.DataFrame`, or `None`
226 Catalog containing DiaSource records. `None` is returned if
227 ``read_sources_months`` configuration parameter is set to 0.
229 Notes
230 -----
231 This method returns DiaSource catalog for a region with additional
232 filtering based on DiaObject IDs. Only a subset of DiaSource history
233 is returned limited by ``read_sources_months`` config parameter, w.r.t.
234 ``visit_time``. If ``object_ids`` is empty then an empty catalog is
235 always returned with the correct schema (columns/types). If
236 ``object_ids`` is `None` then no filtering is performed and some of the
237 returned records may be outside the specified region.
238 """
239 raise NotImplementedError()
241 @abstractmethod
242 def getDiaForcedSources(
243 self, region: Region, object_ids: Iterable[int] | None, visit_time: dafBase.DateTime
244 ) -> pandas.DataFrame | None:
245 """Return catalog of DiaForcedSource instances from a given region.
247 Parameters
248 ----------
249 region : `lsst.sphgeom.Region`
250 Region to search for DIASources.
251 object_ids : iterable [ `int` ], optional
252 List of DiaObject IDs to further constrain the set of returned
253 sources. If list is empty then empty catalog is returned with a
254 correct schema. If `None` then returned sources are not
255 constrained. Some implementations may not support latter case.
256 visit_time : `lsst.daf.base.DateTime`
257 Time of the current visit.
259 Returns
260 -------
261 catalog : `pandas.DataFrame`, or `None`
262 Catalog containing DiaSource records. `None` is returned if
263 ``read_forced_sources_months`` configuration parameter is set to 0.
265 Raises
266 ------
267 NotImplementedError
268 May be raised by some implementations if ``object_ids`` is `None`.
270 Notes
271 -----
272 This method returns DiaForcedSource catalog for a region with
273 additional filtering based on DiaObject IDs. Only a subset of DiaSource
274 history is returned limited by ``read_forced_sources_months`` config
275 parameter, w.r.t. ``visit_time``. If ``object_ids`` is empty then an
276 empty catalog is always returned with the correct schema
277 (columns/types). If ``object_ids`` is `None` then no filtering is
278 performed and some of the returned records may be outside the specified
279 region.
280 """
281 raise NotImplementedError()
283 @abstractmethod
284 def containsVisitDetector(self, visit: int, detector: int) -> bool:
285 """Test whether data for a given visit-detector is present in the APDB.
287 Parameters
288 ----------
289 visit, detector : `int`
290 The ID of the visit-detector to search for.
292 Returns
293 -------
294 present : `bool`
295 `True` if some DiaObject, DiaSource, or DiaForcedSource records
296 exist for the specified observation, `False` otherwise.
297 """
298 raise NotImplementedError()
300 @abstractmethod
301 def getInsertIds(self) -> list[ApdbInsertId] | None:
302 """Return collection of insert identifiers known to the database.
304 Returns
305 -------
306 ids : `list` [`ApdbInsertId`] or `None`
307 List of identifiers, they may be time-ordered if database supports
308 ordering. `None` is returned if database is not configured to store
309 insert identifiers.
310 """
311 raise NotImplementedError()
313 @abstractmethod
314 def deleteInsertIds(self, ids: Iterable[ApdbInsertId]) -> None:
315 """Remove insert identifiers from the database.
317 Parameters
318 ----------
319 ids : `iterable` [`ApdbInsertId`]
320 Insert identifiers, can include items returned from `getInsertIds`.
322 Notes
323 -----
324 This method causes Apdb to forget about specified identifiers. If there
325 are any auxiliary data associated with the identifiers, it is also
326 removed from database (but data in regular tables is not removed).
327 This method should be called after successful transfer of data from
328 APDB to PPDB to free space used by history.
329 """
330 raise NotImplementedError()
332 @abstractmethod
333 def getDiaObjectsHistory(self, ids: Iterable[ApdbInsertId]) -> ApdbTableData:
334 """Return catalog of DiaObject instances from a given time period
335 including the history of each DiaObject.
337 Parameters
338 ----------
339 ids : `iterable` [`ApdbInsertId`]
340 Insert identifiers, can include items returned from `getInsertIds`.
342 Returns
343 -------
344 data : `ApdbTableData`
345 Catalog containing DiaObject records. In addition to all regular
346 columns it will contain ``insert_id`` column.
348 Notes
349 -----
350 This part of API may not be very stable and can change before the
351 implementation finalizes.
352 """
353 raise NotImplementedError()
355 @abstractmethod
356 def getDiaSourcesHistory(self, ids: Iterable[ApdbInsertId]) -> ApdbTableData:
357 """Return catalog of DiaSource instances from a given time period.
359 Parameters
360 ----------
361 ids : `iterable` [`ApdbInsertId`]
362 Insert identifiers, can include items returned from `getInsertIds`.
364 Returns
365 -------
366 data : `ApdbTableData`
367 Catalog containing DiaSource records. In addition to all regular
368 columns it will contain ``insert_id`` column.
370 Notes
371 -----
372 This part of API may not be very stable and can change before the
373 implementation finalizes.
374 """
375 raise NotImplementedError()
377 @abstractmethod
378 def getDiaForcedSourcesHistory(self, ids: Iterable[ApdbInsertId]) -> ApdbTableData:
379 """Return catalog of DiaForcedSource instances from a given time
380 period.
382 Parameters
383 ----------
384 ids : `iterable` [`ApdbInsertId`]
385 Insert identifiers, can include items returned from `getInsertIds`.
387 Returns
388 -------
389 data : `ApdbTableData`
390 Catalog containing DiaForcedSource records. In addition to all
391 regular columns it will contain ``insert_id`` column.
393 Notes
394 -----
395 This part of API may not be very stable and can change before the
396 implementation finalizes.
397 """
398 raise NotImplementedError()
400 @abstractmethod
401 def getSSObjects(self) -> pandas.DataFrame:
402 """Return catalog of SSObject instances.
404 Returns
405 -------
406 catalog : `pandas.DataFrame`
407 Catalog containing SSObject records, all existing records are
408 returned.
409 """
410 raise NotImplementedError()
412 @abstractmethod
413 def store(
414 self,
415 visit_time: dafBase.DateTime,
416 objects: pandas.DataFrame,
417 sources: pandas.DataFrame | None = None,
418 forced_sources: pandas.DataFrame | None = None,
419 ) -> None:
420 """Store all three types of catalogs in the database.
422 Parameters
423 ----------
424 visit_time : `lsst.daf.base.DateTime`
425 Time of the visit.
426 objects : `pandas.DataFrame`
427 Catalog with DiaObject records.
428 sources : `pandas.DataFrame`, optional
429 Catalog with DiaSource records.
430 forced_sources : `pandas.DataFrame`, optional
431 Catalog with DiaForcedSource records.
433 Notes
434 -----
435 This methods takes DataFrame catalogs, their schema must be
436 compatible with the schema of APDB table:
438 - column names must correspond to database table columns
439 - types and units of the columns must match database definitions,
440 no unit conversion is performed presently
441 - columns that have default values in database schema can be
442 omitted from catalog
443 - this method knows how to fill interval-related columns of DiaObject
444 (validityStart, validityEnd) they do not need to appear in a
445 catalog
446 - source catalogs have ``diaObjectId`` column associating sources
447 with objects
448 """
449 raise NotImplementedError()
451 @abstractmethod
452 def storeSSObjects(self, objects: pandas.DataFrame) -> None:
453 """Store or update SSObject catalog.
455 Parameters
456 ----------
457 objects : `pandas.DataFrame`
458 Catalog with SSObject records.
460 Notes
461 -----
462 If SSObjects with matching IDs already exist in the database, their
463 records will be updated with the information from provided records.
464 """
465 raise NotImplementedError()
467 @abstractmethod
468 def reassignDiaSources(self, idMap: Mapping[int, int]) -> None:
469 """Associate DiaSources with SSObjects, dis-associating them
470 from DiaObjects.
472 Parameters
473 ----------
474 idMap : `Mapping`
475 Maps DiaSource IDs to their new SSObject IDs.
477 Raises
478 ------
479 ValueError
480 Raised if DiaSource ID does not exist in the database.
481 """
482 raise NotImplementedError()
484 @abstractmethod
485 def dailyJob(self) -> None:
486 """Implement daily activities like cleanup/vacuum.
488 What should be done during daily activities is determined by
489 specific implementation.
490 """
491 raise NotImplementedError()
493 @abstractmethod
494 def countUnassociatedObjects(self) -> int:
495 """Return the number of DiaObjects that have only one DiaSource
496 associated with them.
498 Used as part of ap_verify metrics.
500 Returns
501 -------
502 count : `int`
503 Number of DiaObjects with exactly one associated DiaSource.
505 Notes
506 -----
507 This method can be very inefficient or slow in some implementations.
508 """
509 raise NotImplementedError()
511 @classmethod
512 def makeField(cls, doc: str) -> ConfigurableField:
513 """Make a `~lsst.pex.config.ConfigurableField` for Apdb.
515 Parameters
516 ----------
517 doc : `str`
518 Help text for the field.
520 Returns
521 -------
522 configurableField : `lsst.pex.config.ConfigurableField`
523 A `~lsst.pex.config.ConfigurableField` for Apdb.
524 """
525 return ConfigurableField(doc=doc, target=cls)
527 @property
528 @abstractmethod
529 def metadata(self) -> ApdbMetadata:
530 """Object controlling access to APDB metadata (`ApdbMetadata`)."""
531 raise NotImplementedError()