Coverage for python/lsst/dax/apdb/apdb.py: 85%
93 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-12 10:17 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-12 10:17 +0000
1# This file is part of dax_apdb.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ["ApdbConfig", "Apdb", "ApdbInsertId", "ApdbTableData"]
26import os
27from abc import ABC, abstractmethod
28from collections.abc import Iterable, Mapping
29from dataclasses import dataclass
30from uuid import UUID, uuid4
32import lsst.daf.base as dafBase
33import pandas
34from felis.simple import Table
35from lsst.pex.config import Config, ConfigurableField, Field
36from lsst.sphgeom import Region
38from .apdbSchema import ApdbTables
41def _data_file_name(basename: str) -> str:
42 """Return path name of a data file in sdm_schemas package."""
43 return os.path.join("${SDM_SCHEMAS_DIR}", "yml", basename)
46class ApdbConfig(Config):
47 """Part of Apdb configuration common to all implementations."""
49 read_sources_months = Field[int](doc="Number of months of history to read from DiaSource", default=12)
50 read_forced_sources_months = Field[int](
51 doc="Number of months of history to read from DiaForcedSource", default=12
52 )
53 schema_file = Field[str](
54 doc="Location of (YAML) configuration file with standard schema", default=_data_file_name("apdb.yaml")
55 )
56 schema_name = Field[str](doc="Name of the schema in YAML configuration file.", default="ApdbSchema")
57 extra_schema_file = Field[str](
58 doc="Location of (YAML) configuration file with extra schema, "
59 "definitions in this file are merged with the definitions in "
60 "'schema_file', extending or replacing parts of the schema.",
61 default=None,
62 optional=True,
63 deprecated="This field is deprecated, its value is not used.",
64 )
65 use_insert_id = Field[bool](
66 doc=(
67 "If True, make and fill additional tables used for getHistory methods. "
68 "Databases created with earlier versions of APDB may not have these tables, "
69 "and corresponding methods will not work for them."
70 ),
71 default=False,
72 )
75class ApdbTableData(ABC):
76 """Abstract class for representing table data."""
78 @abstractmethod
79 def column_names(self) -> list[str]:
80 """Return ordered sequence of column names in the table.
82 Returns
83 -------
84 names : `list` [`str`]
85 Column names.
86 """
87 raise NotImplementedError()
89 @abstractmethod
90 def rows(self) -> Iterable[tuple]:
91 """Return table rows, each row is a tuple of values.
93 Returns
94 -------
95 rows : `iterable` [`tuple`]
96 Iterable of tuples.
97 """
98 raise NotImplementedError()
101@dataclass(frozen=True)
102class ApdbInsertId:
103 """Class used to identify single insert operation.
105 Instances of this class are used to identify the units of transfer from
106 APDB to PPDB. Usually single `ApdbInsertId` corresponds to a single call to
107 `store` method.
108 """
110 id: UUID
111 insert_time: dafBase.DateTime
112 """Time of this insert, usually corresponds to visit time
113 (`dafBase.DateTime`).
114 """
116 @classmethod
117 def new_insert_id(cls, insert_time: dafBase.DateTime) -> ApdbInsertId:
118 """Generate new unique insert identifier."""
119 return ApdbInsertId(id=uuid4(), insert_time=insert_time)
122class Apdb(ABC):
123 """Abstract interface for APDB."""
125 ConfigClass = ApdbConfig
127 @abstractmethod
128 def tableDef(self, table: ApdbTables) -> Table | None:
129 """Return table schema definition for a given table.
131 Parameters
132 ----------
133 table : `ApdbTables`
134 One of the known APDB tables.
136 Returns
137 -------
138 tableSchema : `felis.simple.Table` or `None`
139 Table schema description, `None` is returned if table is not
140 defined by this implementation.
141 """
142 raise NotImplementedError()
144 @abstractmethod
145 def makeSchema(self, drop: bool = False) -> None:
146 """Create or re-create whole database schema.
148 Parameters
149 ----------
150 drop : `bool`
151 If True then drop all tables before creating new ones.
152 """
153 raise NotImplementedError()
155 @abstractmethod
156 def getDiaObjects(self, region: Region) -> pandas.DataFrame:
157 """Return catalog of DiaObject instances from a given region.
159 This method returns only the last version of each DiaObject. Some
160 records in a returned catalog may be outside the specified region, it
161 is up to a client to ignore those records or cleanup the catalog before
162 futher use.
164 Parameters
165 ----------
166 region : `lsst.sphgeom.Region`
167 Region to search for DIAObjects.
169 Returns
170 -------
171 catalog : `pandas.DataFrame`
172 Catalog containing DiaObject records for a region that may be a
173 superset of the specified region.
174 """
175 raise NotImplementedError()
177 @abstractmethod
178 def getDiaSources(
179 self, region: Region, object_ids: Iterable[int] | None, visit_time: dafBase.DateTime
180 ) -> pandas.DataFrame | None:
181 """Return catalog of DiaSource instances from a given region.
183 Parameters
184 ----------
185 region : `lsst.sphgeom.Region`
186 Region to search for DIASources.
187 object_ids : iterable [ `int` ], optional
188 List of DiaObject IDs to further constrain the set of returned
189 sources. If `None` then returned sources are not constrained. If
190 list is empty then empty catalog is returned with a correct
191 schema.
192 visit_time : `lsst.daf.base.DateTime`
193 Time of the current visit.
195 Returns
196 -------
197 catalog : `pandas.DataFrame`, or `None`
198 Catalog containing DiaSource records. `None` is returned if
199 ``read_sources_months`` configuration parameter is set to 0.
201 Notes
202 -----
203 This method returns DiaSource catalog for a region with additional
204 filtering based on DiaObject IDs. Only a subset of DiaSource history
205 is returned limited by ``read_sources_months`` config parameter, w.r.t.
206 ``visit_time``. If ``object_ids`` is empty then an empty catalog is
207 always returned with the correct schema (columns/types). If
208 ``object_ids`` is `None` then no filtering is performed and some of the
209 returned records may be outside the specified region.
210 """
211 raise NotImplementedError()
213 @abstractmethod
214 def getDiaForcedSources(
215 self, region: Region, object_ids: Iterable[int] | None, visit_time: dafBase.DateTime
216 ) -> pandas.DataFrame | None:
217 """Return catalog of DiaForcedSource instances from a given region.
219 Parameters
220 ----------
221 region : `lsst.sphgeom.Region`
222 Region to search for DIASources.
223 object_ids : iterable [ `int` ], optional
224 List of DiaObject IDs to further constrain the set of returned
225 sources. If list is empty then empty catalog is returned with a
226 correct schema. If `None` then returned sources are not
227 constrained. Some implementations may not support latter case.
228 visit_time : `lsst.daf.base.DateTime`
229 Time of the current visit.
231 Returns
232 -------
233 catalog : `pandas.DataFrame`, or `None`
234 Catalog containing DiaSource records. `None` is returned if
235 ``read_forced_sources_months`` configuration parameter is set to 0.
237 Raises
238 ------
239 NotImplementedError
240 May be raised by some implementations if ``object_ids`` is `None`.
242 Notes
243 -----
244 This method returns DiaForcedSource catalog for a region with
245 additional filtering based on DiaObject IDs. Only a subset of DiaSource
246 history is returned limited by ``read_forced_sources_months`` config
247 parameter, w.r.t. ``visit_time``. If ``object_ids`` is empty then an
248 empty catalog is always returned with the correct schema
249 (columns/types). If ``object_ids`` is `None` then no filtering is
250 performed and some of the returned records may be outside the specified
251 region.
252 """
253 raise NotImplementedError()
255 @abstractmethod
256 def containsVisitDetector(self, visit: int, detector: int) -> bool:
257 """Test whether data for a given visit-detector is present in the APDB.
259 Parameters
260 ----------
261 visit, detector : `int`
262 The ID of the visit-detector to search for.
264 Returns
265 -------
266 present : `bool`
267 `True` if some DiaObject, DiaSource, or DiaForcedSource records
268 exist for the specified observation, `False` otherwise.
269 """
270 raise NotImplementedError()
272 @abstractmethod
273 def getInsertIds(self) -> list[ApdbInsertId] | None:
274 """Return collection of insert identifiers known to the database.
276 Returns
277 -------
278 ids : `list` [`ApdbInsertId`] or `None`
279 List of identifiers, they may be time-ordered if database supports
280 ordering. `None` is returned if database is not configured to store
281 insert identifiers.
282 """
283 raise NotImplementedError()
285 @abstractmethod
286 def deleteInsertIds(self, ids: Iterable[ApdbInsertId]) -> None:
287 """Remove insert identifiers from the database.
289 Parameters
290 ----------
291 ids : `iterable` [`ApdbInsertId`]
292 Insert identifiers, can include items returned from `getInsertIds`.
294 Notes
295 -----
296 This method causes Apdb to forget about specified identifiers. If there
297 are any auxiliary data associated with the identifiers, it is also
298 removed from database (but data in regular tables is not removed).
299 This method should be called after successful transfer of data from
300 APDB to PPDB to free space used by history.
301 """
302 raise NotImplementedError()
304 @abstractmethod
305 def getDiaObjectsHistory(self, ids: Iterable[ApdbInsertId]) -> ApdbTableData:
306 """Return catalog of DiaObject instances from a given time period
307 including the history of each DiaObject.
309 Parameters
310 ----------
311 ids : `iterable` [`ApdbInsertId`]
312 Insert identifiers, can include items returned from `getInsertIds`.
314 Returns
315 -------
316 data : `ApdbTableData`
317 Catalog containing DiaObject records. In addition to all regular
318 columns it will contain ``insert_id`` column.
320 Notes
321 -----
322 This part of API may not be very stable and can change before the
323 implementation finalizes.
324 """
325 raise NotImplementedError()
327 @abstractmethod
328 def getDiaSourcesHistory(self, ids: Iterable[ApdbInsertId]) -> ApdbTableData:
329 """Return catalog of DiaSource instances from a given time period.
331 Parameters
332 ----------
333 ids : `iterable` [`ApdbInsertId`]
334 Insert identifiers, can include items returned from `getInsertIds`.
336 Returns
337 -------
338 data : `ApdbTableData`
339 Catalog containing DiaSource records. In addition to all regular
340 columns it will contain ``insert_id`` column.
342 Notes
343 -----
344 This part of API may not be very stable and can change before the
345 implementation finalizes.
346 """
347 raise NotImplementedError()
349 @abstractmethod
350 def getDiaForcedSourcesHistory(self, ids: Iterable[ApdbInsertId]) -> ApdbTableData:
351 """Return catalog of DiaForcedSource instances from a given time
352 period.
354 Parameters
355 ----------
356 ids : `iterable` [`ApdbInsertId`]
357 Insert identifiers, can include items returned from `getInsertIds`.
359 Returns
360 -------
361 data : `ApdbTableData`
362 Catalog containing DiaForcedSource records. In addition to all
363 regular columns it will contain ``insert_id`` column.
365 Notes
366 -----
367 This part of API may not be very stable and can change before the
368 implementation finalizes.
369 """
370 raise NotImplementedError()
372 @abstractmethod
373 def getSSObjects(self) -> pandas.DataFrame:
374 """Return catalog of SSObject instances.
376 Returns
377 -------
378 catalog : `pandas.DataFrame`
379 Catalog containing SSObject records, all existing records are
380 returned.
381 """
382 raise NotImplementedError()
384 @abstractmethod
385 def store(
386 self,
387 visit_time: dafBase.DateTime,
388 objects: pandas.DataFrame,
389 sources: pandas.DataFrame | None = None,
390 forced_sources: pandas.DataFrame | None = None,
391 ) -> None:
392 """Store all three types of catalogs in the database.
394 Parameters
395 ----------
396 visit_time : `lsst.daf.base.DateTime`
397 Time of the visit.
398 objects : `pandas.DataFrame`
399 Catalog with DiaObject records.
400 sources : `pandas.DataFrame`, optional
401 Catalog with DiaSource records.
402 forced_sources : `pandas.DataFrame`, optional
403 Catalog with DiaForcedSource records.
405 Notes
406 -----
407 This methods takes DataFrame catalogs, their schema must be
408 compatible with the schema of APDB table:
410 - column names must correspond to database table columns
411 - types and units of the columns must match database definitions,
412 no unit conversion is performed presently
413 - columns that have default values in database schema can be
414 omitted from catalog
415 - this method knows how to fill interval-related columns of DiaObject
416 (validityStart, validityEnd) they do not need to appear in a
417 catalog
418 - source catalogs have ``diaObjectId`` column associating sources
419 with objects
420 """
421 raise NotImplementedError()
423 @abstractmethod
424 def storeSSObjects(self, objects: pandas.DataFrame) -> None:
425 """Store or update SSObject catalog.
427 Parameters
428 ----------
429 objects : `pandas.DataFrame`
430 Catalog with SSObject records.
432 Notes
433 -----
434 If SSObjects with matching IDs already exist in the database, their
435 records will be updated with the information from provided records.
436 """
437 raise NotImplementedError()
439 @abstractmethod
440 def reassignDiaSources(self, idMap: Mapping[int, int]) -> None:
441 """Associate DiaSources with SSObjects, dis-associating them
442 from DiaObjects.
444 Parameters
445 ----------
446 idMap : `Mapping`
447 Maps DiaSource IDs to their new SSObject IDs.
449 Raises
450 ------
451 ValueError
452 Raised if DiaSource ID does not exist in the database.
453 """
454 raise NotImplementedError()
456 @abstractmethod
457 def dailyJob(self) -> None:
458 """Implement daily activities like cleanup/vacuum.
460 What should be done during daily activities is determined by
461 specific implementation.
462 """
463 raise NotImplementedError()
465 @abstractmethod
466 def countUnassociatedObjects(self) -> int:
467 """Return the number of DiaObjects that have only one DiaSource
468 associated with them.
470 Used as part of ap_verify metrics.
472 Returns
473 -------
474 count : `int`
475 Number of DiaObjects with exactly one associated DiaSource.
477 Notes
478 -----
479 This method can be very inefficient or slow in some implementations.
480 """
481 raise NotImplementedError()
483 @classmethod
484 def makeField(cls, doc: str) -> ConfigurableField:
485 """Make a `~lsst.pex.config.ConfigurableField` for Apdb.
487 Parameters
488 ----------
489 doc : `str`
490 Help text for the field.
492 Returns
493 -------
494 configurableField : `lsst.pex.config.ConfigurableField`
495 A `~lsst.pex.config.ConfigurableField` for Apdb.
496 """
497 return ConfigurableField(doc=doc, target=cls)