Coverage for python/lsst/dax/apdb/apdb.py: 83%
110 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-03-28 10:11 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2024-03-28 10:11 +0000
1# This file is part of dax_apdb.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ["ApdbConfig", "Apdb", "ApdbInsertId", "ApdbTableData"]
26import os
27from abc import ABC, abstractmethod
28from collections.abc import Iterable, Mapping
29from dataclasses import dataclass
30from typing import TYPE_CHECKING
31from uuid import UUID, uuid4
33import astropy.time
34import pandas
35from felis.simple import Table
36from lsst.pex.config import Config, ConfigurableField, Field
37from lsst.sphgeom import Region
39from .apdbSchema import ApdbTables
41if TYPE_CHECKING: 41 ↛ 42line 41 didn't jump to line 42, because the condition on line 41 was never true
42 from .apdbMetadata import ApdbMetadata
43 from .versionTuple import VersionTuple
46def _data_file_name(basename: str) -> str:
47 """Return path name of a data file in sdm_schemas package."""
48 return os.path.join("${SDM_SCHEMAS_DIR}", "yml", basename)
51class ApdbConfig(Config):
52 """Part of Apdb configuration common to all implementations."""
54 read_sources_months = Field[int](doc="Number of months of history to read from DiaSource", default=12)
55 read_forced_sources_months = Field[int](
56 doc="Number of months of history to read from DiaForcedSource", default=12
57 )
58 schema_file = Field[str](
59 doc="Location of (YAML) configuration file with standard schema", default=_data_file_name("apdb.yaml")
60 )
61 schema_name = Field[str](doc="Name of the schema in YAML configuration file.", default="ApdbSchema")
62 extra_schema_file = Field[str](
63 doc="Location of (YAML) configuration file with extra schema, "
64 "definitions in this file are merged with the definitions in "
65 "'schema_file', extending or replacing parts of the schema.",
66 default=None,
67 optional=True,
68 deprecated="This field is deprecated, its value is not used.",
69 )
70 use_insert_id = Field[bool](
71 doc=(
72 "If True, make and fill additional tables used for getHistory methods. "
73 "Databases created with earlier versions of APDB may not have these tables, "
74 "and corresponding methods will not work for them."
75 ),
76 default=False,
77 )
80class ApdbTableData(ABC):
81 """Abstract class for representing table data."""
83 @abstractmethod
84 def column_names(self) -> list[str]:
85 """Return ordered sequence of column names in the table.
87 Returns
88 -------
89 names : `list` [`str`]
90 Column names.
91 """
92 raise NotImplementedError()
94 @abstractmethod
95 def rows(self) -> Iterable[tuple]:
96 """Return table rows, each row is a tuple of values.
98 Returns
99 -------
100 rows : `iterable` [`tuple`]
101 Iterable of tuples.
102 """
103 raise NotImplementedError()
106@dataclass(frozen=True)
107class ApdbInsertId:
108 """Class used to identify single insert operation.
110 Instances of this class are used to identify the units of transfer from
111 APDB to PPDB. Usually single `ApdbInsertId` corresponds to a single call to
112 `store` method.
113 """
115 id: UUID
116 insert_time: astropy.time.Time
117 """Time of this insert, usually corresponds to visit time
118 (`astropy.time.Time`).
119 """
121 @classmethod
122 def new_insert_id(cls, insert_time: astropy.time.Time) -> ApdbInsertId:
123 """Generate new unique insert identifier."""
124 return ApdbInsertId(id=uuid4(), insert_time=insert_time)
127class Apdb(ABC):
128 """Abstract interface for APDB."""
130 ConfigClass = ApdbConfig
132 @classmethod
133 @abstractmethod
134 def apdbImplementationVersion(cls) -> VersionTuple:
135 """Return version number for current APDB implementation.
137 Returns
138 -------
139 version : `VersionTuple`
140 Version of the code defined in implementation class.
141 """
142 raise NotImplementedError()
144 @abstractmethod
145 def apdbSchemaVersion(self) -> VersionTuple:
146 """Return schema version number as defined in config file.
148 Returns
149 -------
150 version : `VersionTuple`
151 Version of the schema defined in schema config file.
152 """
153 raise NotImplementedError()
155 @abstractmethod
156 def tableDef(self, table: ApdbTables) -> Table | None:
157 """Return table schema definition for a given table.
159 Parameters
160 ----------
161 table : `ApdbTables`
162 One of the known APDB tables.
164 Returns
165 -------
166 tableSchema : `felis.simple.Table` or `None`
167 Table schema description, `None` is returned if table is not
168 defined by this implementation.
169 """
170 raise NotImplementedError()
172 @classmethod
173 def makeSchema(cls, config: ApdbConfig, *, drop: bool = False) -> None:
174 """Create or re-create whole database schema.
176 Parameters
177 ----------
178 config : `ApdbConfig`
179 Instance of configuration class, the type has to match the type of
180 the actual implementation class of this interface.
181 drop : `bool`
182 If True then drop all tables before creating new ones.
183 """
184 # Dispatch to actual implementation class based on config type.
185 from .factory import apdb_type
187 klass = apdb_type(config)
188 klass.makeSchema(config, drop=drop)
190 @abstractmethod
191 def getDiaObjects(self, region: Region) -> pandas.DataFrame:
192 """Return catalog of DiaObject instances from a given region.
194 This method returns only the last version of each DiaObject. Some
195 records in a returned catalog may be outside the specified region, it
196 is up to a client to ignore those records or cleanup the catalog before
197 futher use.
199 Parameters
200 ----------
201 region : `lsst.sphgeom.Region`
202 Region to search for DIAObjects.
204 Returns
205 -------
206 catalog : `pandas.DataFrame`
207 Catalog containing DiaObject records for a region that may be a
208 superset of the specified region.
209 """
210 raise NotImplementedError()
212 @abstractmethod
213 def getDiaSources(
214 self, region: Region, object_ids: Iterable[int] | None, visit_time: astropy.time.Time
215 ) -> pandas.DataFrame | None:
216 """Return catalog of DiaSource instances from a given region.
218 Parameters
219 ----------
220 region : `lsst.sphgeom.Region`
221 Region to search for DIASources.
222 object_ids : iterable [ `int` ], optional
223 List of DiaObject IDs to further constrain the set of returned
224 sources. If `None` then returned sources are not constrained. If
225 list is empty then empty catalog is returned with a correct
226 schema.
227 visit_time : `astropy.time.Time`
228 Time of the current visit.
230 Returns
231 -------
232 catalog : `pandas.DataFrame`, or `None`
233 Catalog containing DiaSource records. `None` is returned if
234 ``read_sources_months`` configuration parameter is set to 0.
236 Notes
237 -----
238 This method returns DiaSource catalog for a region with additional
239 filtering based on DiaObject IDs. Only a subset of DiaSource history
240 is returned limited by ``read_sources_months`` config parameter, w.r.t.
241 ``visit_time``. If ``object_ids`` is empty then an empty catalog is
242 always returned with the correct schema (columns/types). If
243 ``object_ids`` is `None` then no filtering is performed and some of the
244 returned records may be outside the specified region.
245 """
246 raise NotImplementedError()
248 @abstractmethod
249 def getDiaForcedSources(
250 self, region: Region, object_ids: Iterable[int] | None, visit_time: astropy.time.Time
251 ) -> pandas.DataFrame | None:
252 """Return catalog of DiaForcedSource instances from a given region.
254 Parameters
255 ----------
256 region : `lsst.sphgeom.Region`
257 Region to search for DIASources.
258 object_ids : iterable [ `int` ], optional
259 List of DiaObject IDs to further constrain the set of returned
260 sources. If list is empty then empty catalog is returned with a
261 correct schema. If `None` then returned sources are not
262 constrained. Some implementations may not support latter case.
263 visit_time : `astropy.time.Time`
264 Time of the current visit.
266 Returns
267 -------
268 catalog : `pandas.DataFrame`, or `None`
269 Catalog containing DiaSource records. `None` is returned if
270 ``read_forced_sources_months`` configuration parameter is set to 0.
272 Raises
273 ------
274 NotImplementedError
275 May be raised by some implementations if ``object_ids`` is `None`.
277 Notes
278 -----
279 This method returns DiaForcedSource catalog for a region with
280 additional filtering based on DiaObject IDs. Only a subset of DiaSource
281 history is returned limited by ``read_forced_sources_months`` config
282 parameter, w.r.t. ``visit_time``. If ``object_ids`` is empty then an
283 empty catalog is always returned with the correct schema
284 (columns/types). If ``object_ids`` is `None` then no filtering is
285 performed and some of the returned records may be outside the specified
286 region.
287 """
288 raise NotImplementedError()
290 @abstractmethod
291 def containsVisitDetector(self, visit: int, detector: int) -> bool:
292 """Test whether data for a given visit-detector is present in the APDB.
294 Parameters
295 ----------
296 visit, detector : `int`
297 The ID of the visit-detector to search for.
299 Returns
300 -------
301 present : `bool`
302 `True` if some DiaObject, DiaSource, or DiaForcedSource records
303 exist for the specified observation, `False` otherwise.
304 """
305 raise NotImplementedError()
307 @abstractmethod
308 def getInsertIds(self) -> list[ApdbInsertId] | None:
309 """Return collection of insert identifiers known to the database.
311 Returns
312 -------
313 ids : `list` [`ApdbInsertId`] or `None`
314 List of identifiers, they may be time-ordered if database supports
315 ordering. `None` is returned if database is not configured to store
316 insert identifiers.
317 """
318 raise NotImplementedError()
320 @abstractmethod
321 def deleteInsertIds(self, ids: Iterable[ApdbInsertId]) -> None:
322 """Remove insert identifiers from the database.
324 Parameters
325 ----------
326 ids : `iterable` [`ApdbInsertId`]
327 Insert identifiers, can include items returned from `getInsertIds`.
329 Notes
330 -----
331 This method causes Apdb to forget about specified identifiers. If there
332 are any auxiliary data associated with the identifiers, it is also
333 removed from database (but data in regular tables is not removed).
334 This method should be called after successful transfer of data from
335 APDB to PPDB to free space used by history.
336 """
337 raise NotImplementedError()
339 @abstractmethod
340 def getDiaObjectsHistory(self, ids: Iterable[ApdbInsertId]) -> ApdbTableData:
341 """Return catalog of DiaObject instances from a given time period
342 including the history of each DiaObject.
344 Parameters
345 ----------
346 ids : `iterable` [`ApdbInsertId`]
347 Insert identifiers, can include items returned from `getInsertIds`.
349 Returns
350 -------
351 data : `ApdbTableData`
352 Catalog containing DiaObject records. In addition to all regular
353 columns it will contain ``insert_id`` column.
355 Notes
356 -----
357 This part of API may not be very stable and can change before the
358 implementation finalizes.
359 """
360 raise NotImplementedError()
362 @abstractmethod
363 def getDiaSourcesHistory(self, ids: Iterable[ApdbInsertId]) -> ApdbTableData:
364 """Return catalog of DiaSource instances from a given time period.
366 Parameters
367 ----------
368 ids : `iterable` [`ApdbInsertId`]
369 Insert identifiers, can include items returned from `getInsertIds`.
371 Returns
372 -------
373 data : `ApdbTableData`
374 Catalog containing DiaSource records. In addition to all regular
375 columns it will contain ``insert_id`` column.
377 Notes
378 -----
379 This part of API may not be very stable and can change before the
380 implementation finalizes.
381 """
382 raise NotImplementedError()
384 @abstractmethod
385 def getDiaForcedSourcesHistory(self, ids: Iterable[ApdbInsertId]) -> ApdbTableData:
386 """Return catalog of DiaForcedSource instances from a given time
387 period.
389 Parameters
390 ----------
391 ids : `iterable` [`ApdbInsertId`]
392 Insert identifiers, can include items returned from `getInsertIds`.
394 Returns
395 -------
396 data : `ApdbTableData`
397 Catalog containing DiaForcedSource records. In addition to all
398 regular columns it will contain ``insert_id`` column.
400 Notes
401 -----
402 This part of API may not be very stable and can change before the
403 implementation finalizes.
404 """
405 raise NotImplementedError()
407 @abstractmethod
408 def getSSObjects(self) -> pandas.DataFrame:
409 """Return catalog of SSObject instances.
411 Returns
412 -------
413 catalog : `pandas.DataFrame`
414 Catalog containing SSObject records, all existing records are
415 returned.
416 """
417 raise NotImplementedError()
419 @abstractmethod
420 def store(
421 self,
422 visit_time: astropy.time.Time,
423 objects: pandas.DataFrame,
424 sources: pandas.DataFrame | None = None,
425 forced_sources: pandas.DataFrame | None = None,
426 ) -> None:
427 """Store all three types of catalogs in the database.
429 Parameters
430 ----------
431 visit_time : `astropy.time.Time`
432 Time of the visit.
433 objects : `pandas.DataFrame`
434 Catalog with DiaObject records.
435 sources : `pandas.DataFrame`, optional
436 Catalog with DiaSource records.
437 forced_sources : `pandas.DataFrame`, optional
438 Catalog with DiaForcedSource records.
440 Notes
441 -----
442 This methods takes DataFrame catalogs, their schema must be
443 compatible with the schema of APDB table:
445 - column names must correspond to database table columns
446 - types and units of the columns must match database definitions,
447 no unit conversion is performed presently
448 - columns that have default values in database schema can be
449 omitted from catalog
450 - this method knows how to fill interval-related columns of DiaObject
451 (validityStart, validityEnd) they do not need to appear in a
452 catalog
453 - source catalogs have ``diaObjectId`` column associating sources
454 with objects
455 """
456 raise NotImplementedError()
458 @abstractmethod
459 def storeSSObjects(self, objects: pandas.DataFrame) -> None:
460 """Store or update SSObject catalog.
462 Parameters
463 ----------
464 objects : `pandas.DataFrame`
465 Catalog with SSObject records.
467 Notes
468 -----
469 If SSObjects with matching IDs already exist in the database, their
470 records will be updated with the information from provided records.
471 """
472 raise NotImplementedError()
474 @abstractmethod
475 def reassignDiaSources(self, idMap: Mapping[int, int]) -> None:
476 """Associate DiaSources with SSObjects, dis-associating them
477 from DiaObjects.
479 Parameters
480 ----------
481 idMap : `Mapping`
482 Maps DiaSource IDs to their new SSObject IDs.
484 Raises
485 ------
486 ValueError
487 Raised if DiaSource ID does not exist in the database.
488 """
489 raise NotImplementedError()
491 @abstractmethod
492 def dailyJob(self) -> None:
493 """Implement daily activities like cleanup/vacuum.
495 What should be done during daily activities is determined by
496 specific implementation.
497 """
498 raise NotImplementedError()
500 @abstractmethod
501 def countUnassociatedObjects(self) -> int:
502 """Return the number of DiaObjects that have only one DiaSource
503 associated with them.
505 Used as part of ap_verify metrics.
507 Returns
508 -------
509 count : `int`
510 Number of DiaObjects with exactly one associated DiaSource.
512 Notes
513 -----
514 This method can be very inefficient or slow in some implementations.
515 """
516 raise NotImplementedError()
518 @classmethod
519 def makeField(cls, doc: str) -> ConfigurableField:
520 """Make a `~lsst.pex.config.ConfigurableField` for Apdb.
522 Parameters
523 ----------
524 doc : `str`
525 Help text for the field.
527 Returns
528 -------
529 configurableField : `lsst.pex.config.ConfigurableField`
530 A `~lsst.pex.config.ConfigurableField` for Apdb.
531 """
532 return ConfigurableField(doc=doc, target=cls)
534 @property
535 @abstractmethod
536 def metadata(self) -> ApdbMetadata:
537 """Object controlling access to APDB metadata (`ApdbMetadata`)."""
538 raise NotImplementedError()