Coverage for python/lsst/dax/apdb/apdb.py: 78%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of dax_apdb.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ["ApdbConfig", "Apdb"]
26from abc import ABC, abstractmethod
27import os
28import pandas
29from typing import Iterable, Mapping, Optional
31import lsst.daf.base as dafBase
32from lsst.pex.config import Config, ConfigurableField, Field
33from lsst.sphgeom import Region
34from .apdbSchema import ApdbTables, TableDef
37def _data_file_name(basename: str) -> str:
38 """Return path name of a data file in dax_apdb package.
39 """
40 return os.path.join("${DAX_APDB_DIR}", "data", basename)
43class ApdbConfig(Config):
44 """Part of Apdb configuration common to all implementations.
45 """
46 read_sources_months = Field(
47 dtype=int,
48 doc="Number of months of history to read from DiaSource",
49 default=12
50 )
51 read_forced_sources_months = Field(
52 dtype=int,
53 doc="Number of months of history to read from DiaForcedSource",
54 default=12
55 )
56 schema_file = Field(
57 dtype=str,
58 doc="Location of (YAML) configuration file with standard schema",
59 default=_data_file_name("apdb-schema.yaml")
60 )
61 extra_schema_file = Field(
62 dtype=str,
63 doc="Location of (YAML) configuration file with extra schema, "
64 "definitions in this file are merged with the definitions in "
65 "'schema_file', extending or replacing parts of the schema.",
66 default=_data_file_name("apdb-schema-extra.yaml")
67 )
70class Apdb(ABC):
71 """Abstract interface for APDB.
72 """
74 ConfigClass = ApdbConfig
76 @abstractmethod
77 def tableDef(self, table: ApdbTables) -> Optional[TableDef]:
78 """Return table schema definition for a given table.
80 Parameters
81 ----------
82 table : `ApdbTables`
83 One of the known APDB tables.
85 Returns
86 -------
87 tableSchema : `TableDef` or `None`
88 Table schema description, `None` is returned if table is not
89 defined by this implementation.
90 """
91 raise NotImplementedError()
93 @abstractmethod
94 def makeSchema(self, drop: bool = False) -> None:
95 """Create or re-create whole database schema.
97 Parameters
98 ----------
99 drop : `bool`
100 If True then drop all tables before creating new ones.
101 """
102 raise NotImplementedError()
104 @abstractmethod
105 def getDiaObjects(self, region: Region) -> pandas.DataFrame:
106 """Returns catalog of DiaObject instances from a given region.
108 This method returns only the last version of each DiaObject. Some
109 records in a returned catalog may be outside the specified region, it
110 is up to a client to ignore those records or cleanup the catalog before
111 futher use.
113 Parameters
114 ----------
115 region : `lsst.sphgeom.Region`
116 Region to search for DIAObjects.
118 Returns
119 -------
120 catalog : `pandas.DataFrame`
121 Catalog containing DiaObject records for a region that may be a
122 superset of the specified region.
123 """
124 raise NotImplementedError()
126 @abstractmethod
127 def getDiaSources(self, region: Region,
128 object_ids: Optional[Iterable[int]],
129 visit_time: dafBase.DateTime) -> Optional[pandas.DataFrame]:
130 """Return catalog of DiaSource instances from a given region.
132 Parameters
133 ----------
134 region : `lsst.sphgeom.Region`
135 Region to search for DIASources.
136 object_ids : iterable [ `int` ], optional
137 List of DiaObject IDs to further constrain the set of returned
138 sources. If `None` then returned sources are not constrained. If
139 list is empty then empty catalog is returned with a correct
140 schema.
141 visit_time : `lsst.daf.base.DateTime`
142 Time of the current visit.
144 Returns
145 -------
146 catalog : `pandas.DataFrame`, or `None`
147 Catalog containing DiaSource records. `None` is returned if
148 ``read_sources_months`` configuration parameter is set to 0.
150 Notes
151 -----
152 This method returns DiaSource catalog for a region with additional
153 filtering based on DiaObject IDs. Only a subset of DiaSource history
154 is returned limited by ``read_sources_months`` config parameter, w.r.t.
155 ``visit_time``. If ``object_ids`` is empty then an empty catalog is
156 always returned with the correct schema (columns/types). If
157 ``object_ids`` is `None` then no filtering is performed and some of the
158 returned records may be outside the specified region.
159 """
160 raise NotImplementedError()
162 @abstractmethod
163 def getDiaForcedSources(self, region: Region,
164 object_ids: Optional[Iterable[int]],
165 visit_time: dafBase.DateTime) -> Optional[pandas.DataFrame]:
166 """Return catalog of DiaForcedSource instances from a given region.
168 Parameters
169 ----------
170 region : `lsst.sphgeom.Region`
171 Region to search for DIASources.
172 object_ids : iterable [ `int` ], optional
173 List of DiaObject IDs to further constrain the set of returned
174 sources. If list is empty then empty catalog is returned with a
175 correct schema. If `None` then returned sources are not
176 constrained. Some implementations may not support latter case.
177 visit_time : `lsst.daf.base.DateTime`
178 Time of the current visit.
180 Returns
181 -------
182 catalog : `pandas.DataFrame`, or `None`
183 Catalog containing DiaSource records. `None` is returned if
184 ``read_forced_sources_months`` configuration parameter is set to 0.
186 Raises
187 ------
188 NotImplementedError
189 May be raised by some implementations if ``object_ids`` is `None`.
191 Notes
192 -----
193 This method returns DiaForcedSource catalog for a region with additional
194 filtering based on DiaObject IDs. Only a subset of DiaSource history
195 is returned limited by ``read_forced_sources_months`` config parameter,
196 w.r.t. ``visit_time``. If ``object_ids`` is empty then an empty catalog
197 is always returned with the correct schema (columns/types). If
198 ``object_ids`` is `None` then no filtering is performed and some of the
199 returned records may be outside the specified region.
200 """
201 raise NotImplementedError()
203 @abstractmethod
204 def getDiaObjectsHistory(self,
205 start_time: dafBase.DateTime,
206 end_time: dafBase.DateTime,
207 region: Optional[Region] = None) -> pandas.DataFrame:
208 """Returns catalog of DiaObject instances from a given time period
209 including the history of each DiaObject.
211 Parameters
212 ----------
213 start_time : `dafBase.DateTime`
214 Starting time for DiaObject history search. DiaObject record is
215 selected when its ``validityStart`` falls into an interval
216 between ``start_time`` (inclusive) and ``end_time`` (exclusive).
217 end_time : `dafBase.DateTime`
218 Upper limit on time for DiaObject history search.
219 region : `lsst.sphgeom.Region`, optional
220 Region to search for DiaObjects, if not specified then whole sky
221 is searched. If region is specified then some returned records may
222 fall outside of this region.
224 Returns
225 -------
226 catalog : `pandas.DataFrame`
227 Catalog containing DiaObject records.
229 Notes
230 -----
231 This part of API may not be very stable and can change before the
232 implementation finalizes.
233 """
234 raise NotImplementedError()
236 @abstractmethod
237 def getDiaSourcesHistory(self,
238 start_time: dafBase.DateTime,
239 end_time: dafBase.DateTime,
240 region: Optional[Region] = None) -> pandas.DataFrame:
241 """Returns catalog of DiaSource instances from a given time period.
243 Parameters
244 ----------
245 start_time : `dafBase.DateTime`
246 Starting time for DiaSource history search. DiaSource record is
247 selected when its ``midPointTai`` falls into an interval between
248 ``start_time`` (inclusive) and ``end_time`` (exclusive).
249 end_time : `dafBase.DateTime`
250 Upper limit on time for DiaSource history search.
251 region : `lsst.sphgeom.Region`, optional
252 Region to search for DiaSources, if not specified then whole sky
253 is searched. If region is specified then some returned records may
254 fall outside of this region.
256 Returns
257 -------
258 catalog : `pandas.DataFrame`
259 Catalog containing DiaObject records.
261 Notes
262 -----
263 This part of API may not be very stable and can change before the
264 implementation finalizes.
265 """
266 raise NotImplementedError()
268 @abstractmethod
269 def getDiaForcedSourcesHistory(self,
270 start_time: dafBase.DateTime,
271 end_time: dafBase.DateTime,
272 region: Optional[Region] = None) -> pandas.DataFrame:
273 """Returns catalog of DiaForcedSource instances from a given time
274 period.
276 Parameters
277 ----------
278 start_time : `dafBase.DateTime`
279 Starting time for DiaForcedSource history search. DiaForcedSource
280 record is selected when its ``midPointTai`` falls into an interval
281 between ``start_time`` (inclusive) and ``end_time`` (exclusive).
282 end_time : `dafBase.DateTime`
283 Upper limit on time for DiaForcedSource history search.
284 region : `lsst.sphgeom.Region`, optional
285 Region to search for DiaForcedSources, if not specified then whole
286 sky is searched. If region is specified then some returned records
287 may fall outside of this region.
289 Returns
290 -------
291 catalog : `pandas.DataFrame`
292 Catalog containing DiaObject records.
294 Notes
295 -----
296 This part of API may not be very stable and can change before the
297 implementation finalizes. Some implementations may not support region
298 filtering, they will return records from the whole sky.
299 """
300 raise NotImplementedError()
302 @abstractmethod
303 def getSSObjects(self) -> pandas.DataFrame:
304 """Returns catalog of SSObject instances.
306 Returns
307 -------
308 catalog : `pandas.DataFrame`
309 Catalog containing SSObject records, all existing records are
310 returned.
311 """
312 raise NotImplementedError()
314 @abstractmethod
315 def store(self,
316 visit_time: dafBase.DateTime,
317 objects: pandas.DataFrame,
318 sources: Optional[pandas.DataFrame] = None,
319 forced_sources: Optional[pandas.DataFrame] = None) -> None:
320 """Store all three types of catalogs in the database.
322 Parameters
323 ----------
324 visit_time : `lsst.daf.base.DateTime`
325 Time of the visit.
326 objects : `pandas.DataFrame`
327 Catalog with DiaObject records.
328 sources : `pandas.DataFrame`, optional
329 Catalog with DiaSource records.
330 forced_sources : `pandas.DataFrame`, optional
331 Catalog with DiaForcedSource records.
333 Notes
334 -----
335 This methods takes DataFrame catalogs, their schema must be
336 compatible with the schema of APDB table:
338 - column names must correspond to database table columns
339 - types and units of the columns must match database definitions,
340 no unit conversion is performed presently
341 - columns that have default values in database schema can be
342 omitted from catalog
343 - this method knows how to fill interval-related columns of DiaObject
344 (validityStart, validityEnd) they do not need to appear in a
345 catalog
346 - source catalogs have ``diaObjectId`` column associating sources
347 with objects
348 """
349 raise NotImplementedError()
351 @abstractmethod
352 def storeSSObjects(self, objects: pandas.DataFrame) -> None:
353 """Store or update SSObject catalog.
355 Parameters
356 ----------
357 objects : `pandas.DataFrame`
358 Catalog with SSObject records.
360 Notes
361 -----
362 If SSObjects with matching IDs already exist in the database, their
363 records will be updated with the information from provided records.
364 """
365 raise NotImplementedError()
367 @abstractmethod
368 def reassignDiaSources(self, idMap: Mapping[int, int]) -> None:
369 """Associate DiaSources with SSObjects, dis-associating them
370 from DiaObjects.
372 Parameters
373 ----------
374 idMap : `Mapping`
375 Maps DiaSource IDs to their new SSObject IDs.
377 Raises
378 ------
379 ValueError
380 Raised if DiaSource ID does not exist in the database.
381 """
382 raise NotImplementedError()
384 @abstractmethod
385 def dailyJob(self) -> None:
386 """Implement daily activities like cleanup/vacuum.
388 What should be done during daily activities is determined by
389 specific implementation.
390 """
391 raise NotImplementedError()
393 @abstractmethod
394 def countUnassociatedObjects(self) -> int:
395 """Return the number of DiaObjects that have only one DiaSource
396 associated with them.
398 Used as part of ap_verify metrics.
400 Returns
401 -------
402 count : `int`
403 Number of DiaObjects with exactly one associated DiaSource.
405 Notes
406 -----
407 This method can be very inefficient or slow in some implementations.
408 """
409 raise NotImplementedError()
411 @classmethod
412 def makeField(cls, doc: str) -> ConfigurableField:
413 """Make a `~lsst.pex.config.ConfigurableField` for Apdb.
415 Parameters
416 ----------
417 doc : `str`
418 Help text for the field.
420 Returns
421 -------
422 configurableField : `lsst.pex.config.ConfigurableField`
423 A `~lsst.pex.config.ConfigurableField` for Apdb.
424 """
425 return ConfigurableField(doc=doc, target=cls)