Coverage for python/lsst/dax/apdb/apdb.py: 78%
89 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-03-09 03:28 -0800
« prev ^ index » next coverage.py v6.5.0, created at 2023-03-09 03:28 -0800
1# This file is part of dax_apdb.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ["ApdbConfig", "Apdb", "ApdbInsertId", "ApdbTableData"]
26import os
27from abc import ABC, abstractmethod
28from collections.abc import Iterable, Mapping
29from dataclasses import dataclass
30from typing import Optional
31from uuid import UUID, uuid4
33import lsst.daf.base as dafBase
34import pandas
35from felis.simple import Table
36from lsst.pex.config import Config, ConfigurableField, Field
37from lsst.sphgeom import Region
39from .apdbSchema import ApdbTables
42def _data_file_name(basename: str) -> str:
43 """Return path name of a data file in sdm_schemas package."""
44 return os.path.join("${SDM_SCHEMAS_DIR}", "yml", basename)
47class ApdbConfig(Config):
48 """Part of Apdb configuration common to all implementations."""
50 read_sources_months = Field[int](doc="Number of months of history to read from DiaSource", default=12)
51 read_forced_sources_months = Field[int](
52 doc="Number of months of history to read from DiaForcedSource", default=12
53 )
54 schema_file = Field[str](
55 doc="Location of (YAML) configuration file with standard schema", default=_data_file_name("apdb.yaml")
56 )
57 schema_name = Field[str](doc="Name of the schema in YAML configuration file.", default="ApdbSchema")
58 extra_schema_file = Field[str](
59 doc="Location of (YAML) configuration file with extra schema, "
60 "definitions in this file are merged with the definitions in "
61 "'schema_file', extending or replacing parts of the schema.",
62 default=None,
63 optional=True,
64 deprecated="This field is deprecated, its value is not used.",
65 )
66 use_insert_id = Field[bool](
67 doc=(
68 "If True (default), make and fill additional tables used for getHistory methods. "
69 "Databases created with earlier versions of APDB may not have these tables, "
70 "and corresponding methods will not work for them."
71 ),
72 default=True,
73 )
76class ApdbTableData(ABC):
77 """Abstract class for representing table data."""
79 @abstractmethod
80 def column_names(self) -> list[str]:
81 """Return ordered sequence of column names in the table.
83 Returns
84 -------
85 names : `list` [`str`]
86 Column names.
87 """
88 raise NotImplementedError()
90 @abstractmethod
91 def rows(self) -> Iterable[tuple]:
92 """Return table rows, each row is a tuple of values.
94 Returns
95 -------
96 rows : `iterable` [`tuple`]
97 Iterable of tuples.
98 """
99 raise NotImplementedError()
102@dataclass(frozen=True)
103class ApdbInsertId:
104 """Class used to identify single insert operation.
106 Instances of this class are used to identify the units of transfer from
107 APDB to PPDB. Usually single `ApdbInsertId` corresponds to a single call to
108 `store` method.
109 """
111 id: UUID
113 @classmethod
114 def new_insert_id(cls) -> ApdbInsertId:
115 """Generate new unique insert identifier."""
116 return ApdbInsertId(id=uuid4())
119class Apdb(ABC):
120 """Abstract interface for APDB."""
122 ConfigClass = ApdbConfig
124 @abstractmethod
125 def tableDef(self, table: ApdbTables) -> Optional[Table]:
126 """Return table schema definition for a given table.
128 Parameters
129 ----------
130 table : `ApdbTables`
131 One of the known APDB tables.
133 Returns
134 -------
135 tableSchema : `felis.simple.Table` or `None`
136 Table schema description, `None` is returned if table is not
137 defined by this implementation.
138 """
139 raise NotImplementedError()
141 @abstractmethod
142 def makeSchema(self, drop: bool = False) -> None:
143 """Create or re-create whole database schema.
145 Parameters
146 ----------
147 drop : `bool`
148 If True then drop all tables before creating new ones.
149 """
150 raise NotImplementedError()
152 @abstractmethod
153 def getDiaObjects(self, region: Region) -> pandas.DataFrame:
154 """Returns catalog of DiaObject instances from a given region.
156 This method returns only the last version of each DiaObject. Some
157 records in a returned catalog may be outside the specified region, it
158 is up to a client to ignore those records or cleanup the catalog before
159 futher use.
161 Parameters
162 ----------
163 region : `lsst.sphgeom.Region`
164 Region to search for DIAObjects.
166 Returns
167 -------
168 catalog : `pandas.DataFrame`
169 Catalog containing DiaObject records for a region that may be a
170 superset of the specified region.
171 """
172 raise NotImplementedError()
174 @abstractmethod
175 def getDiaSources(
176 self, region: Region, object_ids: Optional[Iterable[int]], visit_time: dafBase.DateTime
177 ) -> Optional[pandas.DataFrame]:
178 """Return catalog of DiaSource instances from a given region.
180 Parameters
181 ----------
182 region : `lsst.sphgeom.Region`
183 Region to search for DIASources.
184 object_ids : iterable [ `int` ], optional
185 List of DiaObject IDs to further constrain the set of returned
186 sources. If `None` then returned sources are not constrained. If
187 list is empty then empty catalog is returned with a correct
188 schema.
189 visit_time : `lsst.daf.base.DateTime`
190 Time of the current visit.
192 Returns
193 -------
194 catalog : `pandas.DataFrame`, or `None`
195 Catalog containing DiaSource records. `None` is returned if
196 ``read_sources_months`` configuration parameter is set to 0.
198 Notes
199 -----
200 This method returns DiaSource catalog for a region with additional
201 filtering based on DiaObject IDs. Only a subset of DiaSource history
202 is returned limited by ``read_sources_months`` config parameter, w.r.t.
203 ``visit_time``. If ``object_ids`` is empty then an empty catalog is
204 always returned with the correct schema (columns/types). If
205 ``object_ids`` is `None` then no filtering is performed and some of the
206 returned records may be outside the specified region.
207 """
208 raise NotImplementedError()
210 @abstractmethod
211 def getDiaForcedSources(
212 self, region: Region, object_ids: Optional[Iterable[int]], visit_time: dafBase.DateTime
213 ) -> Optional[pandas.DataFrame]:
214 """Return catalog of DiaForcedSource instances from a given region.
216 Parameters
217 ----------
218 region : `lsst.sphgeom.Region`
219 Region to search for DIASources.
220 object_ids : iterable [ `int` ], optional
221 List of DiaObject IDs to further constrain the set of returned
222 sources. If list is empty then empty catalog is returned with a
223 correct schema. If `None` then returned sources are not
224 constrained. Some implementations may not support latter case.
225 visit_time : `lsst.daf.base.DateTime`
226 Time of the current visit.
228 Returns
229 -------
230 catalog : `pandas.DataFrame`, or `None`
231 Catalog containing DiaSource records. `None` is returned if
232 ``read_forced_sources_months`` configuration parameter is set to 0.
234 Raises
235 ------
236 NotImplementedError
237 May be raised by some implementations if ``object_ids`` is `None`.
239 Notes
240 -----
241 This method returns DiaForcedSource catalog for a region with additional
242 filtering based on DiaObject IDs. Only a subset of DiaSource history
243 is returned limited by ``read_forced_sources_months`` config parameter,
244 w.r.t. ``visit_time``. If ``object_ids`` is empty then an empty catalog
245 is always returned with the correct schema (columns/types). If
246 ``object_ids`` is `None` then no filtering is performed and some of the
247 returned records may be outside the specified region.
248 """
249 raise NotImplementedError()
251 @abstractmethod
252 def getInsertIds(self) -> list[ApdbInsertId] | None:
253 """Return collection of insert identifiers known to the database.
255 Returns
256 -------
257 ids : `list` [`ApdbInsertId`] or `None`
258 List of identifiers, they may be time-ordered if database supports
259 ordering. `None` is returned if database is not configured to store
260 insert identifiers.
261 """
262 raise NotImplementedError()
264 @abstractmethod
265 def deleteInsertIds(self, ids: Iterable[ApdbInsertId]) -> None:
266 """Remove insert identifiers from the database.
268 Parameters
269 -------
270 ids : `iterable` [`ApdbInsertId`]
271 Insert identifiers, can include items returned from `getInsertIds`.
273 Notes
274 -----
275 This method causes Apdb to forget about specified identifiers. If there
276 are any auxiliary data associated with the identifiers, it is also
277 removed from database (but data in regular tables is not removed).
278 This method should be called after successful transfer of data from
279 APDB to PPDB to free space used by history.
280 """
281 raise NotImplementedError()
283 @abstractmethod
284 def getDiaObjectsHistory(self, ids: Iterable[ApdbInsertId]) -> ApdbTableData:
285 """Returns catalog of DiaObject instances from a given time period
286 including the history of each DiaObject.
288 Parameters
289 ----------
290 ids : `iterable` [`ApdbInsertId`]
291 Insert identifiers, can include items returned from `getInsertIds`.
293 Returns
294 -------
295 data : `ApdbTableData`
296 Catalog containing DiaObject records. In addition to all regular
297 columns it will contain ``insert_id`` column.
299 Notes
300 -----
301 This part of API may not be very stable and can change before the
302 implementation finalizes.
303 """
304 raise NotImplementedError()
306 @abstractmethod
307 def getDiaSourcesHistory(self, ids: Iterable[ApdbInsertId]) -> ApdbTableData:
308 """Returns catalog of DiaSource instances from a given time period.
310 Parameters
311 ----------
312 ids : `iterable` [`ApdbInsertId`]
313 Insert identifiers, can include items returned from `getInsertIds`.
315 Returns
316 -------
317 data : `ApdbTableData`
318 Catalog containing DiaSource records. In addition to all regular
319 columns it will contain ``insert_id`` column.
321 Notes
322 -----
323 This part of API may not be very stable and can change before the
324 implementation finalizes.
325 """
326 raise NotImplementedError()
328 @abstractmethod
329 def getDiaForcedSourcesHistory(self, ids: Iterable[ApdbInsertId]) -> ApdbTableData:
330 """Returns catalog of DiaForcedSource instances from a given time
331 period.
333 Parameters
334 ----------
335 ids : `iterable` [`ApdbInsertId`]
336 Insert identifiers, can include items returned from `getInsertIds`.
338 Returns
339 -------
340 data : `ApdbTableData`
341 Catalog containing DiaForcedSource records. In addition to all
342 regular columns it will contain ``insert_id`` column.
344 Notes
345 -----
346 This part of API may not be very stable and can change before the
347 implementation finalizes.
348 """
349 raise NotImplementedError()
351 @abstractmethod
352 def getSSObjects(self) -> pandas.DataFrame:
353 """Returns catalog of SSObject instances.
355 Returns
356 -------
357 catalog : `pandas.DataFrame`
358 Catalog containing SSObject records, all existing records are
359 returned.
360 """
361 raise NotImplementedError()
363 @abstractmethod
364 def store(
365 self,
366 visit_time: dafBase.DateTime,
367 objects: pandas.DataFrame,
368 sources: Optional[pandas.DataFrame] = None,
369 forced_sources: Optional[pandas.DataFrame] = None,
370 ) -> None:
371 """Store all three types of catalogs in the database.
373 Parameters
374 ----------
375 visit_time : `lsst.daf.base.DateTime`
376 Time of the visit.
377 objects : `pandas.DataFrame`
378 Catalog with DiaObject records.
379 sources : `pandas.DataFrame`, optional
380 Catalog with DiaSource records.
381 forced_sources : `pandas.DataFrame`, optional
382 Catalog with DiaForcedSource records.
384 Notes
385 -----
386 This methods takes DataFrame catalogs, their schema must be
387 compatible with the schema of APDB table:
389 - column names must correspond to database table columns
390 - types and units of the columns must match database definitions,
391 no unit conversion is performed presently
392 - columns that have default values in database schema can be
393 omitted from catalog
394 - this method knows how to fill interval-related columns of DiaObject
395 (validityStart, validityEnd) they do not need to appear in a
396 catalog
397 - source catalogs have ``diaObjectId`` column associating sources
398 with objects
399 """
400 raise NotImplementedError()
402 @abstractmethod
403 def storeSSObjects(self, objects: pandas.DataFrame) -> None:
404 """Store or update SSObject catalog.
406 Parameters
407 ----------
408 objects : `pandas.DataFrame`
409 Catalog with SSObject records.
411 Notes
412 -----
413 If SSObjects with matching IDs already exist in the database, their
414 records will be updated with the information from provided records.
415 """
416 raise NotImplementedError()
418 @abstractmethod
419 def reassignDiaSources(self, idMap: Mapping[int, int]) -> None:
420 """Associate DiaSources with SSObjects, dis-associating them
421 from DiaObjects.
423 Parameters
424 ----------
425 idMap : `Mapping`
426 Maps DiaSource IDs to their new SSObject IDs.
428 Raises
429 ------
430 ValueError
431 Raised if DiaSource ID does not exist in the database.
432 """
433 raise NotImplementedError()
435 @abstractmethod
436 def dailyJob(self) -> None:
437 """Implement daily activities like cleanup/vacuum.
439 What should be done during daily activities is determined by
440 specific implementation.
441 """
442 raise NotImplementedError()
444 @abstractmethod
445 def countUnassociatedObjects(self) -> int:
446 """Return the number of DiaObjects that have only one DiaSource
447 associated with them.
449 Used as part of ap_verify metrics.
451 Returns
452 -------
453 count : `int`
454 Number of DiaObjects with exactly one associated DiaSource.
456 Notes
457 -----
458 This method can be very inefficient or slow in some implementations.
459 """
460 raise NotImplementedError()
462 @classmethod
463 def makeField(cls, doc: str) -> ConfigurableField:
464 """Make a `~lsst.pex.config.ConfigurableField` for Apdb.
466 Parameters
467 ----------
468 doc : `str`
469 Help text for the field.
471 Returns
472 -------
473 configurableField : `lsst.pex.config.ConfigurableField`
474 A `~lsst.pex.config.ConfigurableField` for Apdb.
475 """
476 return ConfigurableField(doc=doc, target=cls)