Coverage for python/lsst/dax/apdb/apdb.py: 78%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of dax_apdb.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ["ApdbConfig", "Apdb"]
26from abc import ABC, abstractmethod
27import os
28import pandas
29from typing import Iterable, Mapping, Optional
31import lsst.daf.base as dafBase
32from lsst.pex.config import Config, ConfigurableField, Field
33from lsst.sphgeom import Region
34from .apdbSchema import ApdbTables, TableDef
37def _data_file_name(basename: str) -> str:
38 """Return path name of a data file in dax_apdb package.
39 """
40 return os.path.join("${DAX_APDB_DIR}", "data", basename)
43class ApdbConfig(Config):
44 """Part of Apdb configuration common to all implementations.
45 """
46 read_sources_months = Field(
47 dtype=int,
48 doc="Number of months of history to read from DiaSource",
49 default=12
50 )
51 read_forced_sources_months = Field(
52 dtype=int,
53 doc="Number of months of history to read from DiaForcedSource",
54 default=12
55 )
56 schema_file = Field(
57 dtype=str,
58 doc="Location of (YAML) configuration file with standard schema",
59 default=_data_file_name("apdb-schema.yaml")
60 )
61 extra_schema_file = Field(
62 dtype=str,
63 doc="Location of (YAML) configuration file with extra schema, "
64 "definitions in this file are merged with the definitions in "
65 "'schema_file', extending or replacing parts of the schema.",
66 default=_data_file_name("apdb-schema-extra.yaml")
67 )
70class Apdb(ABC):
71 """Abstract interface for APDB.
72 """
74 ConfigClass = ApdbConfig
76 @abstractmethod
77 def tableDef(self, table: ApdbTables) -> Optional[TableDef]:
78 """Return table schema definition for a given table.
80 Parameters
81 ----------
82 table : `ApdbTables`
83 One of the known APDB tables.
85 Returns
86 -------
87 tableSchema : `TableDef` or `None`
88 Table schema description, `None` is returned if table is not
89 defined by this implementation.
90 """
91 raise NotImplementedError()
93 @abstractmethod
94 def makeSchema(self, drop: bool = False) -> None:
95 """Create or re-create whole database schema.
97 Parameters
98 ----------
99 drop : `bool`
100 If True then drop all tables before creating new ones.
101 """
102 raise NotImplementedError()
104 @abstractmethod
105 def getDiaObjects(self, region: Region) -> pandas.DataFrame:
106 """Returns catalog of DiaObject instances from a given region.
108 This method returns only the last version of each DiaObject. Some
109 records in a returned catalog may be outside the specified region, it
110 is up to a client to ignore those records or cleanup the catalog before
111 futher use.
113 Parameters
114 ----------
115 region : `lsst.sphgeom.Region`
116 Region to search for DIAObjects.
118 Returns
119 -------
120 catalog : `pandas.DataFrame`
121 Catalog containing DiaObject records for a region that may be a
122 superset of the specified region.
123 """
124 raise NotImplementedError()
126 @abstractmethod
127 def getDiaSources(self, region: Region,
128 object_ids: Optional[Iterable[int]],
129 visit_time: dafBase.DateTime) -> Optional[pandas.DataFrame]:
130 """Return catalog of DiaSource instances from a given region.
132 Parameters
133 ----------
134 region : `lsst.sphgeom.Region`
135 Region to search for DIASources.
136 object_ids : iterable [ `int` ], optional
137 List of DiaObject IDs to further constrain the set of returned
138 sources. If `None` then returned sources are not constrained. If
139 list is empty then empty catalog is returned with a correct
140 schema.
141 visit_time : `lsst.daf.base.DateTime`
142 Time of the current visit.
144 Returns
145 -------
146 catalog : `pandas.DataFrame`, or `None`
147 Catalog containing DiaSource records. `None` is returned if
148 ``read_sources_months`` configuration parameter is set to 0.
150 Notes
151 -----
152 This method returns DiaSource catalog for a region with additional
153 filtering based on DiaObject IDs. Only a subset of DiaSource history
154 is returned limited by ``read_sources_months`` config parameter, w.r.t.
155 ``visit_time``. If ``object_ids`` is empty then an empty catalog is
156 always returned with the correct schema (columns/types). If
157 ``object_ids`` is `None` then no filtering is performed and some of the
158 returned records may be outside the specified region.
159 """
160 raise NotImplementedError()
162 @abstractmethod
163 def getDiaForcedSources(self, region: Region,
164 object_ids: Optional[Iterable[int]],
165 visit_time: dafBase.DateTime) -> Optional[pandas.DataFrame]:
166 """Return catalog of DiaForcedSource instances from a given region.
168 Parameters
169 ----------
170 region : `lsst.sphgeom.Region`
171 Region to search for DIASources.
172 object_ids : iterable [ `int` ], optional
173 List of DiaObject IDs to further constrain the set of returned
174 sources. If list is empty then empty catalog is returned with a
175 correct schema. If `None` then returned sources are not
176 constrained. Some implementations may not support latter case.
177 visit_time : `lsst.daf.base.DateTime`
178 Time of the current visit.
180 Returns
181 -------
182 catalog : `pandas.DataFrame`, or `None`
183 Catalog containing DiaSource records. `None` is returned if
184 ``read_forced_sources_months`` configuration parameter is set to 0.
186 Raises
187 ------
188 NotImplementedError
189 May be raised by some implementations if ``object_ids`` is `None`.
191 Notes
192 -----
193 This method returns DiaForcedSource catalog for a region with additional
194 filtering based on DiaObject IDs. Only a subset of DiaSource history
195 is returned limited by ``read_forced_sources_months`` config parameter,
196 w.r.t. ``visit_time``. If ``object_ids`` is empty then an empty catalog
197 is always returned with the correct schema (columns/types). If
198 ``object_ids`` is `None` then no filtering is performed and some of the
199 returned records may be outside the specified region.
200 """
201 raise NotImplementedError()
203 @abstractmethod
204 def getDiaObjectsHistory(self,
205 start_time: dafBase.DateTime,
206 end_time: Optional[dafBase.DateTime] = None,
207 region: Optional[Region] = None) -> pandas.DataFrame:
208 """Returns catalog of DiaObject instances from a given time period
209 including the history of each DiaObject.
211 Parameters
212 ----------
213 start_time : `dafBase.DateTime`
214 Starting time for DiaObject history search. DiaObject record is
215 selected when its ``validityStart`` falls into an interval
216 between ``start__time`` (inclusive) and ``end_time`` (exclusive).
217 end_time : `dafBase.DateTime`, optional
218 Upper limit on time for DiaObject history search, if not specified
219 then there is no restriction on upper limit.
220 region : `lsst.sphgeom.Region`, optional
221 Region to search for DiaObjects, if not specified then whole sky
222 is searched. If region is specified then some returned records may
223 fall outside of this region.
225 Returns
226 -------
227 catalog : `pandas.DataFrame`
228 Catalog containing DiaObject records.
230 Notes
231 -----
232 This part of API may not be very stable and can change before the
233 implementation finalizes.
234 """
235 raise NotImplementedError()
237 @abstractmethod
238 def getDiaSourcesHistory(self,
239 start_time: dafBase.DateTime,
240 end_time: Optional[dafBase.DateTime] = None,
241 region: Optional[Region] = None) -> pandas.DataFrame:
242 """Returns catalog of DiaSource instances from a given time period.
244 Parameters
245 ----------
246 start_time : `dafBase.DateTime`
247 Starting time for DiaSource history search. DiaSource record is
248 selected when its ``midPointTai`` falls into an interval between
249 ``start__time`` (inclusive) and ``end_time`` (exclusive).
250 end_time : `dafBase.DateTime`
251 Upper limit on time for DiaSource history search, if not specified
252 then there is no restriction on upper limit.
253 region : `lsst.sphgeom.Region`, optional
254 Region to search for DiaSources, if not specified then whole sky
255 is searched. If region is specified then some returned records may
256 fall outside of this region.
258 Returns
259 -------
260 catalog : `pandas.DataFrame`
261 Catalog containing DiaObject records.
263 Notes
264 -----
265 This part of API may not be very stable and can change before the
266 implementation finalizes.
267 """
268 raise NotImplementedError()
270 @abstractmethod
271 def getDiaForcedSourcesHistory(self,
272 start_time: dafBase.DateTime,
273 end_time: Optional[dafBase.DateTime] = None,
274 region: Optional[Region] = None) -> pandas.DataFrame:
275 """Returns catalog of DiaForcedSource instances from a given time
276 period.
278 Parameters
279 ----------
280 start_time : `dafBase.DateTime`
281 Starting time for DiaForcedSource history search. DiaForcedSource
282 record is selected when its ``midPointTai`` falls into an interval
283 between ``start__time`` (inclusive) and ``end_time`` (exclusive).
284 end_time : `dafBase.DateTime`
285 Upper limit on time for DiaForcedSource history search, if not
286 specified then there is no restriction on upper limit.
287 region : `lsst.sphgeom.Region`, optional
288 Region to search for DiaForcedSources, if not specified then whole
289 sky is searched. If region is specified then some returned records
290 may fall outside of this region.
292 Returns
293 -------
294 catalog : `pandas.DataFrame`
295 Catalog containing DiaObject records.
297 Notes
298 -----
299 This part of API may not be very stable and can change before the
300 implementation finalizes.
301 """
302 raise NotImplementedError()
304 @abstractmethod
305 def getSSObjects(self) -> pandas.DataFrame:
306 """Returns catalog of SSObject instances.
308 Returns
309 -------
310 catalog : `pandas.DataFrame`
311 Catalog containing SSObject records, all existing records are
312 returned.
313 """
314 raise NotImplementedError()
316 @abstractmethod
317 def store(self,
318 visit_time: dafBase.DateTime,
319 objects: pandas.DataFrame,
320 sources: Optional[pandas.DataFrame] = None,
321 forced_sources: Optional[pandas.DataFrame] = None) -> None:
322 """Store all three types of catalogs in the database.
324 Parameters
325 ----------
326 visit_time : `lsst.daf.base.DateTime`
327 Time of the visit.
328 objects : `pandas.DataFrame`
329 Catalog with DiaObject records.
330 sources : `pandas.DataFrame`, optional
331 Catalog with DiaSource records.
332 forced_sources : `pandas.DataFrame`, optional
333 Catalog with DiaForcedSource records.
335 Notes
336 -----
337 This methods takes DataFrame catalogs, their schema must be
338 compatible with the schema of APDB table:
340 - column names must correspond to database table columns
341 - types and units of the columns must match database definitions,
342 no unit conversion is performed presently
343 - columns that have default values in database schema can be
344 omitted from catalog
345 - this method knows how to fill interval-related columns of DiaObject
346 (validityStart, validityEnd) they do not need to appear in a
347 catalog
348 - source catalogs have ``diaObjectId`` column associating sources
349 with objects
350 """
351 raise NotImplementedError()
353 @abstractmethod
354 def storeSSObjects(self, objects: pandas.DataFrame) -> None:
355 """Store or update SSObject catalog.
357 Parameters
358 ----------
359 objects : `pandas.DataFrame`
360 Catalog with SSObject records.
362 Notes
363 -----
364 If SSObjects with matching IDs already exist in the database, their
365 records will be updated with the information from provided records.
366 """
367 raise NotImplementedError()
369 @abstractmethod
370 def reassignDiaSources(self, idMap: Mapping[int, int]) -> None:
371 """Associate DiaSources with SSObjects, dis-associating them
372 from DiaObjects.
374 Parameters
375 ----------
376 idMap : `Mapping`
377 Maps DiaSource IDs to their new SSObject IDs.
378 """
379 raise NotImplementedError()
381 @abstractmethod
382 def dailyJob(self) -> None:
383 """Implement daily activities like cleanup/vacuum.
385 What should be done during daily activities is determined by
386 specific implementation.
387 """
388 raise NotImplementedError()
390 @abstractmethod
391 def countUnassociatedObjects(self) -> int:
392 """Return the number of DiaObjects that have only one DiaSource
393 associated with them.
395 Used as part of ap_verify metrics.
397 Returns
398 -------
399 count : `int`
400 Number of DiaObjects with exactly one associated DiaSource.
402 Notes
403 -----
404 This method can be very inefficient or slow in some implementations.
405 """
406 raise NotImplementedError()
408 @classmethod
409 def makeField(cls, doc: str) -> ConfigurableField:
410 """Make a `~lsst.pex.config.ConfigurableField` for Apdb.
412 Parameters
413 ----------
414 doc : `str`
415 Help text for the field.
417 Returns
418 -------
419 configurableField : `lsst.pex.config.ConfigurableField`
420 A `~lsst.pex.config.ConfigurableField` for Apdb.
421 """
422 return ConfigurableField(doc=doc, target=cls)