Coverage for python/lsst/dax/apdb/apdb.py : 80%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of dax_apdb.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ["ApdbConfig", "Apdb"]
26from abc import ABC, abstractmethod
27import os
28import pandas
29from typing import Iterable, Optional
31import lsst.daf.base as dafBase
32from lsst.pex.config import Config, ConfigurableField, Field
33from lsst.sphgeom import Region
34from .apdbSchema import ApdbTables, TableDef
37def _data_file_name(basename: str) -> str:
38 """Return path name of a data file in dax_apdb package.
39 """
40 return os.path.join("${DAX_APDB_DIR}", "data", basename)
43class ApdbConfig(Config):
44 """Part of Apdb configuration common to all implementations.
45 """
46 read_sources_months = Field(
47 dtype=int,
48 doc="Number of months of history to read from DiaSource",
49 default=12
50 )
51 read_forced_sources_months = Field(
52 dtype=int,
53 doc="Number of months of history to read from DiaForcedSource",
54 default=12
55 )
56 schema_file = Field(
57 dtype=str,
58 doc="Location of (YAML) configuration file with standard schema",
59 default=_data_file_name("apdb-schema.yaml")
60 )
61 extra_schema_file = Field(
62 dtype=str,
63 doc="Location of (YAML) configuration file with extra schema, "
64 "definitions in this file are merged with the definitions in "
65 "'schema_file', extending or replacing parts of the schema.",
66 default=_data_file_name("apdb-schema-extra.yaml")
67 )
70class Apdb(ABC):
71 """Abstract interface for APDB.
72 """
74 ConfigClass = ApdbConfig
76 @abstractmethod
77 def tableDef(self, table: ApdbTables) -> Optional[TableDef]:
78 """Return table schema definition for a given table.
80 Parameters
81 ----------
82 table : `ApdbTables`
83 One of the known APDB tables.
85 Returns
86 -------
87 tableSchema : `TableDef` or `None`
88 Table schema description, `None` is returned if table is not
89 defined by this implementation.
90 """
91 raise NotImplementedError()
93 @abstractmethod
94 def makeSchema(self, drop: bool = False) -> None:
95 """Create or re-create whole database schema.
97 Parameters
98 ----------
99 drop : `bool`
100 If True then drop all tables before creating new ones.
101 """
102 raise NotImplementedError()
104 @abstractmethod
105 def getDiaObjects(self, region: Region) -> pandas.DataFrame:
106 """Returns catalog of DiaObject instances from a given region.
108 This method returns only the last version of each DiaObject. Some
109 records in a returned catalog may be outside the specified region, it
110 is up to a client to ignore those records or cleanup the catalog before
111 futher use.
113 Parameters
114 ----------
115 region : `lsst.sphgeom.Region`
116 Region to search for DIAObjects.
118 Returns
119 -------
120 catalog : `pandas.DataFrame`
121 Catalog containing DiaObject records for a region that may be a
122 superset of the specified region.
123 """
124 raise NotImplementedError()
126 @abstractmethod
127 def getDiaSources(self, region: Region,
128 object_ids: Optional[Iterable[int]],
129 visit_time: dafBase.DateTime) -> Optional[pandas.DataFrame]:
130 """Return catalog of DiaSource instances from a given region.
132 Parameters
133 ----------
134 region : `lsst.sphgeom.Region`
135 Region to search for DIASources.
136 object_ids : iterable [ `int` ], optional
137 List of DiaObject IDs to further constrain the set of returned
138 sources. If `None` then returned sources are not constrained. If
139 list is empty then empty catalog is returned with a correct
140 schema.
141 visit_time : `lsst.daf.base.DateTime`
142 Time of the current visit.
144 Returns
145 -------
146 catalog : `pandas.DataFrame`, or `None`
147 Catalog containing DiaSource records. `None` is returned if
148 ``read_sources_months`` configuration parameter is set to 0.
150 Notes
151 -----
152 This method returns DiaSource catalog for a region with additional
153 filtering based on DiaObject IDs. Only a subset of DiaSource history
154 is returned limited by ``read_sources_months`` config parameter, w.r.t.
155 ``visit_time``. If ``object_ids`` is empty then an empty catalog is
156 always returned with the correct schema (columns/types). If
157 ``object_ids`` is `None` then no filtering is performed and some of the
158 returned records may be outside the specified region.
159 """
160 raise NotImplementedError()
162 @abstractmethod
163 def getDiaForcedSources(self, region: Region,
164 object_ids: Optional[Iterable[int]],
165 visit_time: dafBase.DateTime) -> Optional[pandas.DataFrame]:
166 """Return catalog of DiaForcedSource instances from a given region.
168 Parameters
169 ----------
170 region : `lsst.sphgeom.Region`
171 Region to search for DIASources.
172 object_ids : iterable [ `int` ], optional
173 List of DiaObject IDs to further constrain the set of returned
174 sources. If list is empty then empty catalog is returned with a
175 correct schema. If `None` then returned sources are not
176 constrained. Some implementations may not support latter case.
177 visit_time : `lsst.daf.base.DateTime`
178 Time of the current visit.
180 Returns
181 -------
182 catalog : `pandas.DataFrame`, or `None`
183 Catalog containing DiaSource records. `None` is returned if
184 ``read_forced_sources_months`` configuration parameter is set to 0.
186 Raises
187 ------
188 NotImplementedError
189 May be raised by some implementations if ``object_ids`` is `None`.
191 Notes
192 -----
193 This method returns DiaForcedSource catalog for a region with additional
194 filtering based on DiaObject IDs. Only a subset of DiaSource history
195 is returned limited by ``read_forced_sources_months`` config parameter,
196 w.r.t. ``visit_time``. If ``object_ids`` is empty then an empty catalog
197 is always returned with the correct schema (columns/types). If
198 ``object_ids`` is `None` then no filtering is performed and some of the
199 returned records may be outside the specified region.
200 """
201 raise NotImplementedError()
203 @abstractmethod
204 def store(self,
205 visit_time: dafBase.DateTime,
206 objects: pandas.DataFrame,
207 sources: Optional[pandas.DataFrame] = None,
208 forced_sources: Optional[pandas.DataFrame] = None) -> None:
209 """Store all three types of catalogs in the database.
211 Parameters
212 ----------
213 visit_time : `lsst.daf.base.DateTime`
214 Time of the visit.
215 objects : `pandas.DataFrame`
216 Catalog with DiaObject records.
217 sources : `pandas.DataFrame`, optional
218 Catalog with DiaSource records.
219 forced_sources : `pandas.DataFrame`, optional
220 Catalog with DiaForcedSource records.
222 Notes
223 -----
224 This methods takes DataFrame catalogs, their schema must be
225 compatible with the schema of APDB table:
227 - column names must correspond to database table columns
228 - types and units of the columns must match database definitions,
229 no unit conversion is performed presently
230 - columns that have default values in database schema can be
231 omitted from catalog
232 - this method knows how to fill interval-related columns of DiaObject
233 (validityStart, validityEnd) they do not need to appear in a
234 catalog
235 - source catalogs have ``diaObjectId`` column associating sources
236 with objects
237 """
238 raise NotImplementedError()
240 @abstractmethod
241 def dailyJob(self) -> None:
242 """Implement daily activities like cleanup/vacuum.
244 What should be done during daily activities is determined by
245 specific implementation.
246 """
247 raise NotImplementedError()
249 @abstractmethod
250 def countUnassociatedObjects(self) -> int:
251 """Return the number of DiaObjects that have only one DiaSource
252 associated with them.
254 Used as part of ap_verify metrics.
256 Returns
257 -------
258 count : `int`
259 Number of DiaObjects with exactly one associated DiaSource.
261 Notes
262 -----
263 This method can be very inefficient or slow in some implementations.
264 """
265 raise NotImplementedError()
267 @classmethod
268 def makeField(cls, doc: str) -> ConfigurableField:
269 """Make a `~lsst.pex.config.ConfigurableField` for Apdb.
271 Parameters
272 ----------
273 doc : `str`
274 Help text for the field.
276 Returns
277 -------
278 configurableField : `lsst.pex.config.ConfigurableField`
279 A `~lsst.pex.config.ConfigurableField` for Apdb.
280 """
281 return ConfigurableField(doc=doc, target=cls)