Coverage for python/lsst/dax/apdb/apdb.py : 81%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of dax_apdb.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ["ApdbConfig", "Apdb"]
26from abc import ABC, abstractmethod
27import os
28import pandas
29from typing import Iterable, Optional
31import lsst.daf.base as dafBase
32from lsst.pex.config import Config, ConfigurableField, Field
33from lsst.sphgeom import Region
36def _data_file_name(basename: str) -> str:
37 """Return path name of a data file in dax_apdb package.
38 """
39 return os.path.join("${DAX_APDB_DIR}", "data", basename)
42class ApdbConfig(Config):
43 """Part of Apdb configuration common to all implementations.
44 """
45 read_sources_months = Field(dtype=int,
46 doc="Number of months of history to read from DiaSource",
47 default=12)
48 read_forced_sources_months = Field(dtype=int,
49 doc="Number of months of history to read from DiaForcedSource",
50 default=12)
51 schema_file = Field(dtype=str,
52 doc="Location of (YAML) configuration file with standard schema",
53 default=_data_file_name("apdb-schema.yaml"))
54 extra_schema_file = Field(dtype=str,
55 doc="Location of (YAML) configuration file with extra schema",
56 default=_data_file_name("apdb-schema-extra.yaml"))
59class Apdb(ABC):
60 """Abstract interface for APDB.
61 """
63 ConfigClass = ApdbConfig
65 @abstractmethod
66 def makeSchema(self, drop: bool = False) -> None:
67 """Create or re-create whole database schema.
69 Parameters
70 ----------
71 drop : `bool`
72 If True then drop all tables before creating new ones.
73 """
74 raise NotImplementedError()
76 @abstractmethod
77 def getDiaObjects(self, region: Region) -> pandas.DataFrame:
78 """Returns catalog of DiaObject instances from a given region.
80 This method returns only the last version of each DiaObject. Some
81 records in a returned catalog may be outside the specified region, it
82 is up to a client to ignore those records or cleanup the catalog before
83 futher use.
85 Parameters
86 ----------
87 region : `lsst.sphgeom.Region`
88 Region to search for DIAObjects.
90 Returns
91 -------
92 catalog : `pandas.DataFrame`
93 Catalog containing DiaObject records for a region that may be a
94 superset of the specified region.
95 """
96 raise NotImplementedError()
98 @abstractmethod
99 def getDiaSources(self, region: Region,
100 object_ids: Optional[Iterable[int]],
101 visit_time: dafBase.DateTime) -> Optional[pandas.DataFrame]:
102 """Return catalog of DiaSource instances from a given region.
104 Parameters
105 ----------
106 region : `lsst.sphgeom.Region`
107 Region to search for DIASources.
108 object_ids : iterable [ `int` ], optional
109 List of DiaObject IDs to further constrain the set of returned
110 sources. If `None` then returned sources are not constrained. If
111 list is empty then empty catalog is returned with a correct
112 schema.
113 visit_time : `lsst.daf.base.DateTime`
114 Time of the current visit.
116 Returns
117 -------
118 catalog : `pandas.DataFrame`, or `None`
119 Catalog containing DiaSource records. `None` is returned if
120 ``read_sources_months`` configuration parameter is set to 0.
122 Notes
123 -----
124 This method returns DiaSource catalog for a region with additional
125 filtering based on DiaObject IDs. Only a subset of DiaSource history
126 is returned limited by ``read_sources_months`` config parameter, w.r.t.
127 ``visit_time``. If ``object_ids`` is empty then an empty catalog is
128 always returned with the correct schema (columns/types). If
129 ``object_ids`` is `None` then no filtering is performed and some of the
130 returned records may be outside the specified region.
131 """
132 raise NotImplementedError()
134 @abstractmethod
135 def getDiaForcedSources(self, region: Region,
136 object_ids: Optional[Iterable[int]],
137 visit_time: dafBase.DateTime) -> Optional[pandas.DataFrame]:
138 """Return catalog of DiaForcedSource instances from a given region.
140 Parameters
141 ----------
142 region : `lsst.sphgeom.Region`
143 Region to search for DIASources.
144 object_ids : iterable [ `int` ], optional
145 List of DiaObject IDs to further constrain the set of returned
146 sources. If list is empty then empty catalog is returned with a
147 correct schema. If `None` then returned sources are not
148 constrained. Some implementations may not support latter case.
149 visit_time : `lsst.daf.base.DateTime`
150 Time of the current visit.
152 Returns
153 -------
154 catalog : `pandas.DataFrame`, or `None`
155 Catalog containing DiaSource records. `None` is returned if
156 ``read_forced_sources_months`` configuration parameter is set to 0.
158 Raises
159 ------
160 NotImplementedError
161 May be raised by some implementations if ``object_ids`` is `None`.
163 Notes
164 -----
165 This method returns DiaForcedSource catalog for a region with additional
166 filtering based on DiaObject IDs. Only a subset of DiaSource history
167 is returned limited by ``read_forced_sources_months`` config parameter,
168 w.r.t. ``visit_time``. If ``object_ids`` is empty then an empty catalog
169 is always returned with the correct schema (columns/types). If
170 ``object_ids`` is `None` then no filtering is performed and some of the
171 returned records may be outside the specified region.
172 """
173 raise NotImplementedError()
175 @abstractmethod
176 def store(self,
177 visit_time: dafBase.DateTime,
178 objects: pandas.DataFrame,
179 sources: Optional[pandas.DataFrame] = None,
180 forced_sources: Optional[pandas.DataFrame] = None) -> None:
181 """Store all three types of catalogs in the database.
183 Parameters
184 ----------
185 visit_time : `lsst.daf.base.DateTime`
186 Time of the visit.
187 objects : `pandas.DataFrame`
188 Catalog with DiaObject records.
189 sources : `pandas.DataFrame`, optional
190 Catalog with DiaSource records.
191 forced_sources : `pandas.DataFrame`, optional
192 Catalog with DiaForcedSource records.
194 Notes
195 -----
196 This methods takes DataFrame catalogs, their schema must be
197 compatible with the schema of APDB table:
199 - column names must correspond to database table columns
200 - types and units of the columns must match database definitions,
201 no unit conversion is performed presently
202 - columns that have default values in database schema can be
203 omitted from catalog
204 - this method knows how to fill interval-related columns of DiaObject
205 (validityStart, validityEnd) they do not need to appear in a
206 catalog
207 - source catalogs have ``diaObjectId`` column associating sources
208 with objects
209 """
210 raise NotImplementedError()
212 @abstractmethod
213 def dailyJob(self) -> None:
214 """Implement daily activities like cleanup/vacuum.
216 What should be done during daily activities is determined by
217 specific implementation.
218 """
219 raise NotImplementedError()
221 @abstractmethod
222 def countUnassociatedObjects(self) -> int:
223 """Return the number of DiaObjects that have only one DiaSource
224 associated with them.
226 Used as part of ap_verify metrics.
228 Returns
229 -------
230 count : `int`
231 Number of DiaObjects with exactly one associated DiaSource.
233 Notes
234 -----
235 This method can be very inefficient or slow in some implementations.
236 """
237 raise NotImplementedError()
239 @classmethod
240 def makeField(cls, doc: str) -> ConfigurableField:
241 """Make a `~lsst.pex.config.ConfigurableField` for Apdb.
243 Parameters
244 ----------
245 doc : `str`
246 Help text for the field.
248 Returns
249 -------
250 configurableField : `lsst.pex.config.ConfigurableField`
251 A `~lsst.pex.config.ConfigurableField` for Apdb.
252 """
253 return ConfigurableField(doc=doc, target=cls)