Coverage for python/lsst/dax/apdb/apdb.py: 85%
89 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-12 09:46 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-12 09:46 +0000
1# This file is part of dax_apdb.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ["ApdbConfig", "Apdb", "ApdbInsertId", "ApdbTableData"]
26import os
27from abc import ABC, abstractmethod
28from collections.abc import Iterable, Mapping
29from dataclasses import dataclass
30from typing import Optional
31from uuid import UUID, uuid4
33import lsst.daf.base as dafBase
34import pandas
35from felis.simple import Table
36from lsst.pex.config import Config, ConfigurableField, Field
37from lsst.sphgeom import Region
39from .apdbSchema import ApdbTables
42def _data_file_name(basename: str) -> str:
43 """Return path name of a data file in sdm_schemas package."""
44 return os.path.join("${SDM_SCHEMAS_DIR}", "yml", basename)
47class ApdbConfig(Config):
48 """Part of Apdb configuration common to all implementations."""
50 read_sources_months = Field[int](doc="Number of months of history to read from DiaSource", default=12)
51 read_forced_sources_months = Field[int](
52 doc="Number of months of history to read from DiaForcedSource", default=12
53 )
54 schema_file = Field[str](
55 doc="Location of (YAML) configuration file with standard schema", default=_data_file_name("apdb.yaml")
56 )
57 schema_name = Field[str](doc="Name of the schema in YAML configuration file.", default="ApdbSchema")
58 extra_schema_file = Field[str](
59 doc="Location of (YAML) configuration file with extra schema, "
60 "definitions in this file are merged with the definitions in "
61 "'schema_file', extending or replacing parts of the schema.",
62 default=None,
63 optional=True,
64 deprecated="This field is deprecated, its value is not used.",
65 )
66 use_insert_id = Field[bool](
67 doc=(
68 "If True, make and fill additional tables used for getHistory methods. "
69 "Databases created with earlier versions of APDB may not have these tables, "
70 "and corresponding methods will not work for them."
71 ),
72 default=False,
73 )
76class ApdbTableData(ABC):
77 """Abstract class for representing table data."""
79 @abstractmethod
80 def column_names(self) -> list[str]:
81 """Return ordered sequence of column names in the table.
83 Returns
84 -------
85 names : `list` [`str`]
86 Column names.
87 """
88 raise NotImplementedError()
90 @abstractmethod
91 def rows(self) -> Iterable[tuple]:
92 """Return table rows, each row is a tuple of values.
94 Returns
95 -------
96 rows : `iterable` [`tuple`]
97 Iterable of tuples.
98 """
99 raise NotImplementedError()
102@dataclass(frozen=True)
103class ApdbInsertId:
104 """Class used to identify single insert operation.
106 Instances of this class are used to identify the units of transfer from
107 APDB to PPDB. Usually single `ApdbInsertId` corresponds to a single call to
108 `store` method.
109 """
111 id: UUID
113 @classmethod
114 def new_insert_id(cls) -> ApdbInsertId:
115 """Generate new unique insert identifier."""
116 return ApdbInsertId(id=uuid4())
119class Apdb(ABC):
120 """Abstract interface for APDB."""
122 ConfigClass = ApdbConfig
124 @abstractmethod
125 def tableDef(self, table: ApdbTables) -> Optional[Table]:
126 """Return table schema definition for a given table.
128 Parameters
129 ----------
130 table : `ApdbTables`
131 One of the known APDB tables.
133 Returns
134 -------
135 tableSchema : `felis.simple.Table` or `None`
136 Table schema description, `None` is returned if table is not
137 defined by this implementation.
138 """
139 raise NotImplementedError()
141 @abstractmethod
142 def makeSchema(self, drop: bool = False) -> None:
143 """Create or re-create whole database schema.
145 Parameters
146 ----------
147 drop : `bool`
148 If True then drop all tables before creating new ones.
149 """
150 raise NotImplementedError()
152 @abstractmethod
153 def getDiaObjects(self, region: Region) -> pandas.DataFrame:
154 """Return catalog of DiaObject instances from a given region.
156 This method returns only the last version of each DiaObject. Some
157 records in a returned catalog may be outside the specified region, it
158 is up to a client to ignore those records or cleanup the catalog before
159 futher use.
161 Parameters
162 ----------
163 region : `lsst.sphgeom.Region`
164 Region to search for DIAObjects.
166 Returns
167 -------
168 catalog : `pandas.DataFrame`
169 Catalog containing DiaObject records for a region that may be a
170 superset of the specified region.
171 """
172 raise NotImplementedError()
174 @abstractmethod
175 def getDiaSources(
176 self, region: Region, object_ids: Optional[Iterable[int]], visit_time: dafBase.DateTime
177 ) -> Optional[pandas.DataFrame]:
178 """Return catalog of DiaSource instances from a given region.
180 Parameters
181 ----------
182 region : `lsst.sphgeom.Region`
183 Region to search for DIASources.
184 object_ids : iterable [ `int` ], optional
185 List of DiaObject IDs to further constrain the set of returned
186 sources. If `None` then returned sources are not constrained. If
187 list is empty then empty catalog is returned with a correct
188 schema.
189 visit_time : `lsst.daf.base.DateTime`
190 Time of the current visit.
192 Returns
193 -------
194 catalog : `pandas.DataFrame`, or `None`
195 Catalog containing DiaSource records. `None` is returned if
196 ``read_sources_months`` configuration parameter is set to 0.
198 Notes
199 -----
200 This method returns DiaSource catalog for a region with additional
201 filtering based on DiaObject IDs. Only a subset of DiaSource history
202 is returned limited by ``read_sources_months`` config parameter, w.r.t.
203 ``visit_time``. If ``object_ids`` is empty then an empty catalog is
204 always returned with the correct schema (columns/types). If
205 ``object_ids`` is `None` then no filtering is performed and some of the
206 returned records may be outside the specified region.
207 """
208 raise NotImplementedError()
210 @abstractmethod
211 def getDiaForcedSources(
212 self, region: Region, object_ids: Optional[Iterable[int]], visit_time: dafBase.DateTime
213 ) -> Optional[pandas.DataFrame]:
214 """Return catalog of DiaForcedSource instances from a given region.
216 Parameters
217 ----------
218 region : `lsst.sphgeom.Region`
219 Region to search for DIASources.
220 object_ids : iterable [ `int` ], optional
221 List of DiaObject IDs to further constrain the set of returned
222 sources. If list is empty then empty catalog is returned with a
223 correct schema. If `None` then returned sources are not
224 constrained. Some implementations may not support latter case.
225 visit_time : `lsst.daf.base.DateTime`
226 Time of the current visit.
228 Returns
229 -------
230 catalog : `pandas.DataFrame`, or `None`
231 Catalog containing DiaSource records. `None` is returned if
232 ``read_forced_sources_months`` configuration parameter is set to 0.
234 Raises
235 ------
236 NotImplementedError
237 May be raised by some implementations if ``object_ids`` is `None`.
239 Notes
240 -----
241 This method returns DiaForcedSource catalog for a region with
242 additional filtering based on DiaObject IDs. Only a subset of DiaSource
243 history is returned limited by ``read_forced_sources_months`` config
244 parameter, w.r.t. ``visit_time``. If ``object_ids`` is empty then an
245 empty catalog is always returned with the correct schema
246 (columns/types). If ``object_ids`` is `None` then no filtering is
247 performed and some of the returned records may be outside the specified
248 region.
249 """
250 raise NotImplementedError()
252 @abstractmethod
253 def getInsertIds(self) -> list[ApdbInsertId] | None:
254 """Return collection of insert identifiers known to the database.
256 Returns
257 -------
258 ids : `list` [`ApdbInsertId`] or `None`
259 List of identifiers, they may be time-ordered if database supports
260 ordering. `None` is returned if database is not configured to store
261 insert identifiers.
262 """
263 raise NotImplementedError()
265 @abstractmethod
266 def deleteInsertIds(self, ids: Iterable[ApdbInsertId]) -> None:
267 """Remove insert identifiers from the database.
269 Parameters
270 ----------
271 ids : `iterable` [`ApdbInsertId`]
272 Insert identifiers, can include items returned from `getInsertIds`.
274 Notes
275 -----
276 This method causes Apdb to forget about specified identifiers. If there
277 are any auxiliary data associated with the identifiers, it is also
278 removed from database (but data in regular tables is not removed).
279 This method should be called after successful transfer of data from
280 APDB to PPDB to free space used by history.
281 """
282 raise NotImplementedError()
284 @abstractmethod
285 def getDiaObjectsHistory(self, ids: Iterable[ApdbInsertId]) -> ApdbTableData:
286 """Return catalog of DiaObject instances from a given time period
287 including the history of each DiaObject.
289 Parameters
290 ----------
291 ids : `iterable` [`ApdbInsertId`]
292 Insert identifiers, can include items returned from `getInsertIds`.
294 Returns
295 -------
296 data : `ApdbTableData`
297 Catalog containing DiaObject records. In addition to all regular
298 columns it will contain ``insert_id`` column.
300 Notes
301 -----
302 This part of API may not be very stable and can change before the
303 implementation finalizes.
304 """
305 raise NotImplementedError()
307 @abstractmethod
308 def getDiaSourcesHistory(self, ids: Iterable[ApdbInsertId]) -> ApdbTableData:
309 """Return catalog of DiaSource instances from a given time period.
311 Parameters
312 ----------
313 ids : `iterable` [`ApdbInsertId`]
314 Insert identifiers, can include items returned from `getInsertIds`.
316 Returns
317 -------
318 data : `ApdbTableData`
319 Catalog containing DiaSource records. In addition to all regular
320 columns it will contain ``insert_id`` column.
322 Notes
323 -----
324 This part of API may not be very stable and can change before the
325 implementation finalizes.
326 """
327 raise NotImplementedError()
329 @abstractmethod
330 def getDiaForcedSourcesHistory(self, ids: Iterable[ApdbInsertId]) -> ApdbTableData:
331 """Return catalog of DiaForcedSource instances from a given time
332 period.
334 Parameters
335 ----------
336 ids : `iterable` [`ApdbInsertId`]
337 Insert identifiers, can include items returned from `getInsertIds`.
339 Returns
340 -------
341 data : `ApdbTableData`
342 Catalog containing DiaForcedSource records. In addition to all
343 regular columns it will contain ``insert_id`` column.
345 Notes
346 -----
347 This part of API may not be very stable and can change before the
348 implementation finalizes.
349 """
350 raise NotImplementedError()
352 @abstractmethod
353 def getSSObjects(self) -> pandas.DataFrame:
354 """Return catalog of SSObject instances.
356 Returns
357 -------
358 catalog : `pandas.DataFrame`
359 Catalog containing SSObject records, all existing records are
360 returned.
361 """
362 raise NotImplementedError()
364 @abstractmethod
365 def store(
366 self,
367 visit_time: dafBase.DateTime,
368 objects: pandas.DataFrame,
369 sources: Optional[pandas.DataFrame] = None,
370 forced_sources: Optional[pandas.DataFrame] = None,
371 ) -> None:
372 """Store all three types of catalogs in the database.
374 Parameters
375 ----------
376 visit_time : `lsst.daf.base.DateTime`
377 Time of the visit.
378 objects : `pandas.DataFrame`
379 Catalog with DiaObject records.
380 sources : `pandas.DataFrame`, optional
381 Catalog with DiaSource records.
382 forced_sources : `pandas.DataFrame`, optional
383 Catalog with DiaForcedSource records.
385 Notes
386 -----
387 This methods takes DataFrame catalogs, their schema must be
388 compatible with the schema of APDB table:
390 - column names must correspond to database table columns
391 - types and units of the columns must match database definitions,
392 no unit conversion is performed presently
393 - columns that have default values in database schema can be
394 omitted from catalog
395 - this method knows how to fill interval-related columns of DiaObject
396 (validityStart, validityEnd) they do not need to appear in a
397 catalog
398 - source catalogs have ``diaObjectId`` column associating sources
399 with objects
400 """
401 raise NotImplementedError()
403 @abstractmethod
404 def storeSSObjects(self, objects: pandas.DataFrame) -> None:
405 """Store or update SSObject catalog.
407 Parameters
408 ----------
409 objects : `pandas.DataFrame`
410 Catalog with SSObject records.
412 Notes
413 -----
414 If SSObjects with matching IDs already exist in the database, their
415 records will be updated with the information from provided records.
416 """
417 raise NotImplementedError()
419 @abstractmethod
420 def reassignDiaSources(self, idMap: Mapping[int, int]) -> None:
421 """Associate DiaSources with SSObjects, dis-associating them
422 from DiaObjects.
424 Parameters
425 ----------
426 idMap : `Mapping`
427 Maps DiaSource IDs to their new SSObject IDs.
429 Raises
430 ------
431 ValueError
432 Raised if DiaSource ID does not exist in the database.
433 """
434 raise NotImplementedError()
436 @abstractmethod
437 def dailyJob(self) -> None:
438 """Implement daily activities like cleanup/vacuum.
440 What should be done during daily activities is determined by
441 specific implementation.
442 """
443 raise NotImplementedError()
445 @abstractmethod
446 def countUnassociatedObjects(self) -> int:
447 """Return the number of DiaObjects that have only one DiaSource
448 associated with them.
450 Used as part of ap_verify metrics.
452 Returns
453 -------
454 count : `int`
455 Number of DiaObjects with exactly one associated DiaSource.
457 Notes
458 -----
459 This method can be very inefficient or slow in some implementations.
460 """
461 raise NotImplementedError()
463 @classmethod
464 def makeField(cls, doc: str) -> ConfigurableField:
465 """Make a `~lsst.pex.config.ConfigurableField` for Apdb.
467 Parameters
468 ----------
469 doc : `str`
470 Help text for the field.
472 Returns
473 -------
474 configurableField : `lsst.pex.config.ConfigurableField`
475 A `~lsst.pex.config.ConfigurableField` for Apdb.
476 """
477 return ConfigurableField(doc=doc, target=cls)