Coverage for python/lsst/dax/apdb/apdb.py: 91%
98 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-10 10:38 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-10 10:38 +0000
1# This file is part of dax_apdb.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ["ApdbConfig", "Apdb", "ApdbInsertId", "ApdbTableData"]
26import os
27from abc import ABC, abstractmethod
28from collections.abc import Iterable, Mapping
29from dataclasses import dataclass
30from typing import TYPE_CHECKING, cast
31from uuid import UUID, uuid4
33import astropy.time
34import pandas
35from felis.simple import Table
36from lsst.pex.config import Config, ConfigurableField, Field
37from lsst.resources import ResourcePath, ResourcePathExpression
38from lsst.sphgeom import Region
40from .apdbIndex import ApdbIndex
41from .apdbSchema import ApdbTables
42from .factory import make_apdb
44if TYPE_CHECKING:
45 from .apdbMetadata import ApdbMetadata
46 from .versionTuple import VersionTuple
49def _data_file_name(basename: str) -> str:
50 """Return path name of a data file in sdm_schemas package."""
51 return os.path.join("${SDM_SCHEMAS_DIR}", "yml", basename)
54class ApdbConfig(Config):
55 """Part of Apdb configuration common to all implementations."""
57 read_sources_months = Field[int](doc="Number of months of history to read from DiaSource", default=12)
58 read_forced_sources_months = Field[int](
59 doc="Number of months of history to read from DiaForcedSource", default=12
60 )
61 schema_file = Field[str](
62 doc="Location of (YAML) configuration file with standard schema", default=_data_file_name("apdb.yaml")
63 )
64 schema_name = Field[str](doc="Name of the schema in YAML configuration file.", default="ApdbSchema")
65 extra_schema_file = Field[str](
66 doc="Location of (YAML) configuration file with extra schema, "
67 "definitions in this file are merged with the definitions in "
68 "'schema_file', extending or replacing parts of the schema.",
69 default=None,
70 optional=True,
71 deprecated="This field is deprecated, its value is not used.",
72 )
73 use_insert_id = Field[bool](
74 doc=(
75 "If True, make and fill additional tables used for getHistory methods. "
76 "Databases created with earlier versions of APDB may not have these tables, "
77 "and corresponding methods will not work for them."
78 ),
79 default=False,
80 )
83class ApdbTableData(ABC):
84 """Abstract class for representing table data."""
86 @abstractmethod
87 def column_names(self) -> list[str]:
88 """Return ordered sequence of column names in the table.
90 Returns
91 -------
92 names : `list` [`str`]
93 Column names.
94 """
95 raise NotImplementedError()
97 @abstractmethod
98 def rows(self) -> Iterable[tuple]:
99 """Return table rows, each row is a tuple of values.
101 Returns
102 -------
103 rows : `iterable` [`tuple`]
104 Iterable of tuples.
105 """
106 raise NotImplementedError()
109@dataclass(frozen=True)
110class ApdbInsertId:
111 """Class used to identify single insert operation.
113 Instances of this class are used to identify the units of transfer from
114 APDB to PPDB. Usually single `ApdbInsertId` corresponds to a single call to
115 `store` method.
116 """
118 id: UUID
119 insert_time: astropy.time.Time
120 """Time of this insert, usually corresponds to visit time
121 (`astropy.time.Time`).
122 """
124 @classmethod
125 def new_insert_id(cls, insert_time: astropy.time.Time) -> ApdbInsertId:
126 """Generate new unique insert identifier."""
127 return ApdbInsertId(id=uuid4(), insert_time=insert_time)
130class Apdb(ABC):
131 """Abstract interface for APDB."""
133 ConfigClass = ApdbConfig
135 @classmethod
136 def from_config(cls, config: ApdbConfig) -> Apdb:
137 """Create Ppdb instance from configuration object.
139 Parameters
140 ----------
141 config : `ApdbConfig`
142 Configuration object, type of this object determines type of the
143 Apdb implementation.
145 Returns
146 -------
147 apdb : `apdb`
148 Instance of `Apdb` class.
149 """
150 return make_apdb(config)
152 @classmethod
153 def from_uri(cls, uri: ResourcePathExpression) -> Apdb:
154 """Make Apdb instance from a serialized configuration.
156 Parameters
157 ----------
158 uri : `~lsst.resources.ResourcePathExpression`
159 URI or local file path pointing to a file with serialized
160 configuration, or a string with a "label:" prefix. In the latter
161 case, the configuration will be looked up from an APDB index file
162 using the label name that follows the prefix. The APDB index file's
163 location is determined by the ``DAX_APDB_INDEX_URI`` environment
164 variable.
166 Returns
167 -------
168 apdb : `apdb`
169 Instance of `Apdb` class, the type of the returned instance is
170 determined by configuration.
171 """
172 if isinstance(uri, str) and uri.startswith("label:"):
173 tag, _, label = uri.partition(":")
174 index = ApdbIndex()
175 # Current format for config files is "pex_config"
176 format = "pex_config"
177 uri = index.get_apdb_uri(label, format)
178 path = ResourcePath(uri)
179 config_str = path.read().decode()
180 # Assume that this is ApdbConfig, make_apdb will raise if not.
181 config = cast(ApdbConfig, Config._fromPython(config_str))
182 return make_apdb(config)
184 @classmethod
185 @abstractmethod
186 def apdbImplementationVersion(cls) -> VersionTuple:
187 """Return version number for current APDB implementation.
189 Returns
190 -------
191 version : `VersionTuple`
192 Version of the code defined in implementation class.
193 """
194 raise NotImplementedError()
196 @abstractmethod
197 def apdbSchemaVersion(self) -> VersionTuple:
198 """Return schema version number as defined in config file.
200 Returns
201 -------
202 version : `VersionTuple`
203 Version of the schema defined in schema config file.
204 """
205 raise NotImplementedError()
207 @abstractmethod
208 def tableDef(self, table: ApdbTables) -> Table | None:
209 """Return table schema definition for a given table.
211 Parameters
212 ----------
213 table : `ApdbTables`
214 One of the known APDB tables.
216 Returns
217 -------
218 tableSchema : `felis.simple.Table` or `None`
219 Table schema description, `None` is returned if table is not
220 defined by this implementation.
221 """
222 raise NotImplementedError()
224 @abstractmethod
225 def getDiaObjects(self, region: Region) -> pandas.DataFrame:
226 """Return catalog of DiaObject instances from a given region.
228 This method returns only the last version of each DiaObject. Some
229 records in a returned catalog may be outside the specified region, it
230 is up to a client to ignore those records or cleanup the catalog before
231 futher use.
233 Parameters
234 ----------
235 region : `lsst.sphgeom.Region`
236 Region to search for DIAObjects.
238 Returns
239 -------
240 catalog : `pandas.DataFrame`
241 Catalog containing DiaObject records for a region that may be a
242 superset of the specified region.
243 """
244 raise NotImplementedError()
246 @abstractmethod
247 def getDiaSources(
248 self, region: Region, object_ids: Iterable[int] | None, visit_time: astropy.time.Time
249 ) -> pandas.DataFrame | None:
250 """Return catalog of DiaSource instances from a given region.
252 Parameters
253 ----------
254 region : `lsst.sphgeom.Region`
255 Region to search for DIASources.
256 object_ids : iterable [ `int` ], optional
257 List of DiaObject IDs to further constrain the set of returned
258 sources. If `None` then returned sources are not constrained. If
259 list is empty then empty catalog is returned with a correct
260 schema.
261 visit_time : `astropy.time.Time`
262 Time of the current visit.
264 Returns
265 -------
266 catalog : `pandas.DataFrame`, or `None`
267 Catalog containing DiaSource records. `None` is returned if
268 ``read_sources_months`` configuration parameter is set to 0.
270 Notes
271 -----
272 This method returns DiaSource catalog for a region with additional
273 filtering based on DiaObject IDs. Only a subset of DiaSource history
274 is returned limited by ``read_sources_months`` config parameter, w.r.t.
275 ``visit_time``. If ``object_ids`` is empty then an empty catalog is
276 always returned with the correct schema (columns/types). If
277 ``object_ids`` is `None` then no filtering is performed and some of the
278 returned records may be outside the specified region.
279 """
280 raise NotImplementedError()
282 @abstractmethod
283 def getDiaForcedSources(
284 self, region: Region, object_ids: Iterable[int] | None, visit_time: astropy.time.Time
285 ) -> pandas.DataFrame | None:
286 """Return catalog of DiaForcedSource instances from a given region.
288 Parameters
289 ----------
290 region : `lsst.sphgeom.Region`
291 Region to search for DIASources.
292 object_ids : iterable [ `int` ], optional
293 List of DiaObject IDs to further constrain the set of returned
294 sources. If list is empty then empty catalog is returned with a
295 correct schema. If `None` then returned sources are not
296 constrained. Some implementations may not support latter case.
297 visit_time : `astropy.time.Time`
298 Time of the current visit.
300 Returns
301 -------
302 catalog : `pandas.DataFrame`, or `None`
303 Catalog containing DiaSource records. `None` is returned if
304 ``read_forced_sources_months`` configuration parameter is set to 0.
306 Raises
307 ------
308 NotImplementedError
309 May be raised by some implementations if ``object_ids`` is `None`.
311 Notes
312 -----
313 This method returns DiaForcedSource catalog for a region with
314 additional filtering based on DiaObject IDs. Only a subset of DiaSource
315 history is returned limited by ``read_forced_sources_months`` config
316 parameter, w.r.t. ``visit_time``. If ``object_ids`` is empty then an
317 empty catalog is always returned with the correct schema
318 (columns/types). If ``object_ids`` is `None` then no filtering is
319 performed and some of the returned records may be outside the specified
320 region.
321 """
322 raise NotImplementedError()
324 @abstractmethod
325 def containsVisitDetector(self, visit: int, detector: int) -> bool:
326 """Test whether data for a given visit-detector is present in the APDB.
328 Parameters
329 ----------
330 visit, detector : `int`
331 The ID of the visit-detector to search for.
333 Returns
334 -------
335 present : `bool`
336 `True` if some DiaObject, DiaSource, or DiaForcedSource records
337 exist for the specified observation, `False` otherwise.
338 """
339 raise NotImplementedError()
341 @abstractmethod
342 def getInsertIds(self) -> list[ApdbInsertId] | None:
343 """Return collection of insert identifiers known to the database.
345 Returns
346 -------
347 ids : `list` [`ApdbInsertId`] or `None`
348 List of identifiers, they may be time-ordered if database supports
349 ordering. `None` is returned if database is not configured to store
350 insert identifiers.
351 """
352 raise NotImplementedError()
354 @abstractmethod
355 def deleteInsertIds(self, ids: Iterable[ApdbInsertId]) -> None:
356 """Remove insert identifiers from the database.
358 Parameters
359 ----------
360 ids : `iterable` [`ApdbInsertId`]
361 Insert identifiers, can include items returned from `getInsertIds`.
363 Notes
364 -----
365 This method causes Apdb to forget about specified identifiers. If there
366 are any auxiliary data associated with the identifiers, it is also
367 removed from database (but data in regular tables is not removed).
368 This method should be called after successful transfer of data from
369 APDB to PPDB to free space used by history.
370 """
371 raise NotImplementedError()
373 @abstractmethod
374 def getDiaObjectsHistory(self, ids: Iterable[ApdbInsertId]) -> ApdbTableData:
375 """Return catalog of DiaObject instances from a given time period
376 including the history of each DiaObject.
378 Parameters
379 ----------
380 ids : `iterable` [`ApdbInsertId`]
381 Insert identifiers, can include items returned from `getInsertIds`.
383 Returns
384 -------
385 data : `ApdbTableData`
386 Catalog containing DiaObject records. In addition to all regular
387 columns it will contain ``insert_id`` column.
389 Notes
390 -----
391 This part of API may not be very stable and can change before the
392 implementation finalizes.
393 """
394 raise NotImplementedError()
396 @abstractmethod
397 def getDiaSourcesHistory(self, ids: Iterable[ApdbInsertId]) -> ApdbTableData:
398 """Return catalog of DiaSource instances from a given time period.
400 Parameters
401 ----------
402 ids : `iterable` [`ApdbInsertId`]
403 Insert identifiers, can include items returned from `getInsertIds`.
405 Returns
406 -------
407 data : `ApdbTableData`
408 Catalog containing DiaSource records. In addition to all regular
409 columns it will contain ``insert_id`` column.
411 Notes
412 -----
413 This part of API may not be very stable and can change before the
414 implementation finalizes.
415 """
416 raise NotImplementedError()
418 @abstractmethod
419 def getDiaForcedSourcesHistory(self, ids: Iterable[ApdbInsertId]) -> ApdbTableData:
420 """Return catalog of DiaForcedSource instances from a given time
421 period.
423 Parameters
424 ----------
425 ids : `iterable` [`ApdbInsertId`]
426 Insert identifiers, can include items returned from `getInsertIds`.
428 Returns
429 -------
430 data : `ApdbTableData`
431 Catalog containing DiaForcedSource records. In addition to all
432 regular columns it will contain ``insert_id`` column.
434 Notes
435 -----
436 This part of API may not be very stable and can change before the
437 implementation finalizes.
438 """
439 raise NotImplementedError()
441 @abstractmethod
442 def getSSObjects(self) -> pandas.DataFrame:
443 """Return catalog of SSObject instances.
445 Returns
446 -------
447 catalog : `pandas.DataFrame`
448 Catalog containing SSObject records, all existing records are
449 returned.
450 """
451 raise NotImplementedError()
453 @abstractmethod
454 def store(
455 self,
456 visit_time: astropy.time.Time,
457 objects: pandas.DataFrame,
458 sources: pandas.DataFrame | None = None,
459 forced_sources: pandas.DataFrame | None = None,
460 ) -> None:
461 """Store all three types of catalogs in the database.
463 Parameters
464 ----------
465 visit_time : `astropy.time.Time`
466 Time of the visit.
467 objects : `pandas.DataFrame`
468 Catalog with DiaObject records.
469 sources : `pandas.DataFrame`, optional
470 Catalog with DiaSource records.
471 forced_sources : `pandas.DataFrame`, optional
472 Catalog with DiaForcedSource records.
474 Notes
475 -----
476 This methods takes DataFrame catalogs, their schema must be
477 compatible with the schema of APDB table:
479 - column names must correspond to database table columns
480 - types and units of the columns must match database definitions,
481 no unit conversion is performed presently
482 - columns that have default values in database schema can be
483 omitted from catalog
484 - this method knows how to fill interval-related columns of DiaObject
485 (validityStart, validityEnd) they do not need to appear in a
486 catalog
487 - source catalogs have ``diaObjectId`` column associating sources
488 with objects
489 """
490 raise NotImplementedError()
492 @abstractmethod
493 def storeSSObjects(self, objects: pandas.DataFrame) -> None:
494 """Store or update SSObject catalog.
496 Parameters
497 ----------
498 objects : `pandas.DataFrame`
499 Catalog with SSObject records.
501 Notes
502 -----
503 If SSObjects with matching IDs already exist in the database, their
504 records will be updated with the information from provided records.
505 """
506 raise NotImplementedError()
508 @abstractmethod
509 def reassignDiaSources(self, idMap: Mapping[int, int]) -> None:
510 """Associate DiaSources with SSObjects, dis-associating them
511 from DiaObjects.
513 Parameters
514 ----------
515 idMap : `Mapping`
516 Maps DiaSource IDs to their new SSObject IDs.
518 Raises
519 ------
520 ValueError
521 Raised if DiaSource ID does not exist in the database.
522 """
523 raise NotImplementedError()
525 @abstractmethod
526 def dailyJob(self) -> None:
527 """Implement daily activities like cleanup/vacuum.
529 What should be done during daily activities is determined by
530 specific implementation.
531 """
532 raise NotImplementedError()
534 @abstractmethod
535 def countUnassociatedObjects(self) -> int:
536 """Return the number of DiaObjects that have only one DiaSource
537 associated with them.
539 Used as part of ap_verify metrics.
541 Returns
542 -------
543 count : `int`
544 Number of DiaObjects with exactly one associated DiaSource.
546 Notes
547 -----
548 This method can be very inefficient or slow in some implementations.
549 """
550 raise NotImplementedError()
552 @classmethod
553 def makeField(cls, doc: str) -> ConfigurableField:
554 """Make a `~lsst.pex.config.ConfigurableField` for Apdb.
556 Parameters
557 ----------
558 doc : `str`
559 Help text for the field.
561 Returns
562 -------
563 configurableField : `lsst.pex.config.ConfigurableField`
564 A `~lsst.pex.config.ConfigurableField` for Apdb.
565 """
566 return ConfigurableField(doc=doc, target=cls)
568 @property
569 @abstractmethod
570 def metadata(self) -> ApdbMetadata:
571 """Object controlling access to APDB metadata (`ApdbMetadata`)."""
572 raise NotImplementedError()