Coverage for python/lsst/dax/apdb/apdb.py: 78%
65 statements
« prev ^ index » next coverage.py v6.4.1, created at 2022-06-25 08:48 +0000
« prev ^ index » next coverage.py v6.4.1, created at 2022-06-25 08:48 +0000
1# This file is part of dax_apdb.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ["ApdbConfig", "Apdb"]
26from abc import ABC, abstractmethod
27import os
28import pandas
29from typing import Iterable, Mapping, Optional
31import lsst.daf.base as dafBase
32from lsst.pex.config import Config, ConfigurableField, Field
33from lsst.sphgeom import Region
34from .apdbSchema import ApdbTables, TableDef
37def _data_file_name(basename: str) -> str:
38 """Return path name of a data file in sdm_schemas package.
39 """
40 return os.path.join("${SDM_SCHEMAS_DIR}", "yml", basename)
43class ApdbConfig(Config):
44 """Part of Apdb configuration common to all implementations.
45 """
46 read_sources_months = Field(
47 dtype=int,
48 doc="Number of months of history to read from DiaSource",
49 default=12
50 )
51 read_forced_sources_months = Field(
52 dtype=int,
53 doc="Number of months of history to read from DiaForcedSource",
54 default=12
55 )
56 schema_file = Field(
57 dtype=str,
58 doc="Location of (YAML) configuration file with standard schema",
59 default=_data_file_name("apdb.yaml")
60 )
61 schema_name = Field(
62 dtype=str,
63 doc="Name of the schema in YAML configuration file.",
64 default="ApdbSchema"
65 )
66 extra_schema_file = Field(
67 dtype=str,
68 doc="Location of (YAML) configuration file with extra schema, "
69 "definitions in this file are merged with the definitions in "
70 "'schema_file', extending or replacing parts of the schema.",
71 default=None,
72 optional=True,
73 deprecated="This field is deprecated, its value is not used."
74 )
77class Apdb(ABC):
78 """Abstract interface for APDB.
79 """
81 ConfigClass = ApdbConfig
83 @abstractmethod
84 def tableDef(self, table: ApdbTables) -> Optional[TableDef]:
85 """Return table schema definition for a given table.
87 Parameters
88 ----------
89 table : `ApdbTables`
90 One of the known APDB tables.
92 Returns
93 -------
94 tableSchema : `TableDef` or `None`
95 Table schema description, `None` is returned if table is not
96 defined by this implementation.
97 """
98 raise NotImplementedError()
100 @abstractmethod
101 def makeSchema(self, drop: bool = False) -> None:
102 """Create or re-create whole database schema.
104 Parameters
105 ----------
106 drop : `bool`
107 If True then drop all tables before creating new ones.
108 """
109 raise NotImplementedError()
111 @abstractmethod
112 def getDiaObjects(self, region: Region) -> pandas.DataFrame:
113 """Returns catalog of DiaObject instances from a given region.
115 This method returns only the last version of each DiaObject. Some
116 records in a returned catalog may be outside the specified region, it
117 is up to a client to ignore those records or cleanup the catalog before
118 futher use.
120 Parameters
121 ----------
122 region : `lsst.sphgeom.Region`
123 Region to search for DIAObjects.
125 Returns
126 -------
127 catalog : `pandas.DataFrame`
128 Catalog containing DiaObject records for a region that may be a
129 superset of the specified region.
130 """
131 raise NotImplementedError()
133 @abstractmethod
134 def getDiaSources(self, region: Region,
135 object_ids: Optional[Iterable[int]],
136 visit_time: dafBase.DateTime) -> Optional[pandas.DataFrame]:
137 """Return catalog of DiaSource instances from a given region.
139 Parameters
140 ----------
141 region : `lsst.sphgeom.Region`
142 Region to search for DIASources.
143 object_ids : iterable [ `int` ], optional
144 List of DiaObject IDs to further constrain the set of returned
145 sources. If `None` then returned sources are not constrained. If
146 list is empty then empty catalog is returned with a correct
147 schema.
148 visit_time : `lsst.daf.base.DateTime`
149 Time of the current visit.
151 Returns
152 -------
153 catalog : `pandas.DataFrame`, or `None`
154 Catalog containing DiaSource records. `None` is returned if
155 ``read_sources_months`` configuration parameter is set to 0.
157 Notes
158 -----
159 This method returns DiaSource catalog for a region with additional
160 filtering based on DiaObject IDs. Only a subset of DiaSource history
161 is returned limited by ``read_sources_months`` config parameter, w.r.t.
162 ``visit_time``. If ``object_ids`` is empty then an empty catalog is
163 always returned with the correct schema (columns/types). If
164 ``object_ids`` is `None` then no filtering is performed and some of the
165 returned records may be outside the specified region.
166 """
167 raise NotImplementedError()
169 @abstractmethod
170 def getDiaForcedSources(self, region: Region,
171 object_ids: Optional[Iterable[int]],
172 visit_time: dafBase.DateTime) -> Optional[pandas.DataFrame]:
173 """Return catalog of DiaForcedSource instances from a given region.
175 Parameters
176 ----------
177 region : `lsst.sphgeom.Region`
178 Region to search for DIASources.
179 object_ids : iterable [ `int` ], optional
180 List of DiaObject IDs to further constrain the set of returned
181 sources. If list is empty then empty catalog is returned with a
182 correct schema. If `None` then returned sources are not
183 constrained. Some implementations may not support latter case.
184 visit_time : `lsst.daf.base.DateTime`
185 Time of the current visit.
187 Returns
188 -------
189 catalog : `pandas.DataFrame`, or `None`
190 Catalog containing DiaSource records. `None` is returned if
191 ``read_forced_sources_months`` configuration parameter is set to 0.
193 Raises
194 ------
195 NotImplementedError
196 May be raised by some implementations if ``object_ids`` is `None`.
198 Notes
199 -----
200 This method returns DiaForcedSource catalog for a region with additional
201 filtering based on DiaObject IDs. Only a subset of DiaSource history
202 is returned limited by ``read_forced_sources_months`` config parameter,
203 w.r.t. ``visit_time``. If ``object_ids`` is empty then an empty catalog
204 is always returned with the correct schema (columns/types). If
205 ``object_ids`` is `None` then no filtering is performed and some of the
206 returned records may be outside the specified region.
207 """
208 raise NotImplementedError()
210 @abstractmethod
211 def getDiaObjectsHistory(self,
212 start_time: dafBase.DateTime,
213 end_time: dafBase.DateTime,
214 region: Optional[Region] = None) -> pandas.DataFrame:
215 """Returns catalog of DiaObject instances from a given time period
216 including the history of each DiaObject.
218 Parameters
219 ----------
220 start_time : `dafBase.DateTime`
221 Starting time for DiaObject history search. DiaObject record is
222 selected when its ``validityStart`` falls into an interval
223 between ``start_time`` (inclusive) and ``end_time`` (exclusive).
224 end_time : `dafBase.DateTime`
225 Upper limit on time for DiaObject history search.
226 region : `lsst.sphgeom.Region`, optional
227 Region to search for DiaObjects, if not specified then whole sky
228 is searched. If region is specified then some returned records may
229 fall outside of this region.
231 Returns
232 -------
233 catalog : `pandas.DataFrame`
234 Catalog containing DiaObject records.
236 Notes
237 -----
238 This part of API may not be very stable and can change before the
239 implementation finalizes.
240 """
241 raise NotImplementedError()
243 @abstractmethod
244 def getDiaSourcesHistory(self,
245 start_time: dafBase.DateTime,
246 end_time: dafBase.DateTime,
247 region: Optional[Region] = None) -> pandas.DataFrame:
248 """Returns catalog of DiaSource instances from a given time period.
250 Parameters
251 ----------
252 start_time : `dafBase.DateTime`
253 Starting time for DiaSource history search. DiaSource record is
254 selected when its ``midPointTai`` falls into an interval between
255 ``start_time`` (inclusive) and ``end_time`` (exclusive).
256 end_time : `dafBase.DateTime`
257 Upper limit on time for DiaSource history search.
258 region : `lsst.sphgeom.Region`, optional
259 Region to search for DiaSources, if not specified then whole sky
260 is searched. If region is specified then some returned records may
261 fall outside of this region.
263 Returns
264 -------
265 catalog : `pandas.DataFrame`
266 Catalog containing DiaObject records.
268 Notes
269 -----
270 This part of API may not be very stable and can change before the
271 implementation finalizes.
272 """
273 raise NotImplementedError()
275 @abstractmethod
276 def getDiaForcedSourcesHistory(self,
277 start_time: dafBase.DateTime,
278 end_time: dafBase.DateTime,
279 region: Optional[Region] = None) -> pandas.DataFrame:
280 """Returns catalog of DiaForcedSource instances from a given time
281 period.
283 Parameters
284 ----------
285 start_time : `dafBase.DateTime`
286 Starting time for DiaForcedSource history search. DiaForcedSource
287 record is selected when its ``midPointTai`` falls into an interval
288 between ``start_time`` (inclusive) and ``end_time`` (exclusive).
289 end_time : `dafBase.DateTime`
290 Upper limit on time for DiaForcedSource history search.
291 region : `lsst.sphgeom.Region`, optional
292 Region to search for DiaForcedSources, if not specified then whole
293 sky is searched. If region is specified then some returned records
294 may fall outside of this region.
296 Returns
297 -------
298 catalog : `pandas.DataFrame`
299 Catalog containing DiaObject records.
301 Notes
302 -----
303 This part of API may not be very stable and can change before the
304 implementation finalizes. Some implementations may not support region
305 filtering, they will return records from the whole sky.
306 """
307 raise NotImplementedError()
309 @abstractmethod
310 def getSSObjects(self) -> pandas.DataFrame:
311 """Returns catalog of SSObject instances.
313 Returns
314 -------
315 catalog : `pandas.DataFrame`
316 Catalog containing SSObject records, all existing records are
317 returned.
318 """
319 raise NotImplementedError()
321 @abstractmethod
322 def store(self,
323 visit_time: dafBase.DateTime,
324 objects: pandas.DataFrame,
325 sources: Optional[pandas.DataFrame] = None,
326 forced_sources: Optional[pandas.DataFrame] = None) -> None:
327 """Store all three types of catalogs in the database.
329 Parameters
330 ----------
331 visit_time : `lsst.daf.base.DateTime`
332 Time of the visit.
333 objects : `pandas.DataFrame`
334 Catalog with DiaObject records.
335 sources : `pandas.DataFrame`, optional
336 Catalog with DiaSource records.
337 forced_sources : `pandas.DataFrame`, optional
338 Catalog with DiaForcedSource records.
340 Notes
341 -----
342 This methods takes DataFrame catalogs, their schema must be
343 compatible with the schema of APDB table:
345 - column names must correspond to database table columns
346 - types and units of the columns must match database definitions,
347 no unit conversion is performed presently
348 - columns that have default values in database schema can be
349 omitted from catalog
350 - this method knows how to fill interval-related columns of DiaObject
351 (validityStart, validityEnd) they do not need to appear in a
352 catalog
353 - source catalogs have ``diaObjectId`` column associating sources
354 with objects
355 """
356 raise NotImplementedError()
358 @abstractmethod
359 def storeSSObjects(self, objects: pandas.DataFrame) -> None:
360 """Store or update SSObject catalog.
362 Parameters
363 ----------
364 objects : `pandas.DataFrame`
365 Catalog with SSObject records.
367 Notes
368 -----
369 If SSObjects with matching IDs already exist in the database, their
370 records will be updated with the information from provided records.
371 """
372 raise NotImplementedError()
374 @abstractmethod
375 def reassignDiaSources(self, idMap: Mapping[int, int]) -> None:
376 """Associate DiaSources with SSObjects, dis-associating them
377 from DiaObjects.
379 Parameters
380 ----------
381 idMap : `Mapping`
382 Maps DiaSource IDs to their new SSObject IDs.
384 Raises
385 ------
386 ValueError
387 Raised if DiaSource ID does not exist in the database.
388 """
389 raise NotImplementedError()
391 @abstractmethod
392 def dailyJob(self) -> None:
393 """Implement daily activities like cleanup/vacuum.
395 What should be done during daily activities is determined by
396 specific implementation.
397 """
398 raise NotImplementedError()
400 @abstractmethod
401 def countUnassociatedObjects(self) -> int:
402 """Return the number of DiaObjects that have only one DiaSource
403 associated with them.
405 Used as part of ap_verify metrics.
407 Returns
408 -------
409 count : `int`
410 Number of DiaObjects with exactly one associated DiaSource.
412 Notes
413 -----
414 This method can be very inefficient or slow in some implementations.
415 """
416 raise NotImplementedError()
418 @classmethod
419 def makeField(cls, doc: str) -> ConfigurableField:
420 """Make a `~lsst.pex.config.ConfigurableField` for Apdb.
422 Parameters
423 ----------
424 doc : `str`
425 Help text for the field.
427 Returns
428 -------
429 configurableField : `lsst.pex.config.ConfigurableField`
430 A `~lsst.pex.config.ConfigurableField` for Apdb.
431 """
432 return ConfigurableField(doc=doc, target=cls)