Coverage for python/lsst/dax/apdb/apdb.py: 89%
74 statements
« prev ^ index » next coverage.py v7.5.0, created at 2024-04-27 03:01 -0700
« prev ^ index » next coverage.py v7.5.0, created at 2024-04-27 03:01 -0700
1# This file is part of dax_apdb.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ["ApdbConfig", "Apdb"]
26import os
27from abc import ABC, abstractmethod
28from collections.abc import Iterable, Mapping
29from typing import TYPE_CHECKING, cast
31import astropy.time
32import pandas
33from lsst.pex.config import Config, ConfigurableField, Field
34from lsst.resources import ResourcePath, ResourcePathExpression
35from lsst.sphgeom import Region
37from .apdbIndex import ApdbIndex
38from .apdbSchema import ApdbTables
39from .factory import make_apdb
40from .schema_model import Table
42if TYPE_CHECKING:
43 from .apdbMetadata import ApdbMetadata
44 from .versionTuple import VersionTuple
47def _data_file_name(basename: str) -> str:
48 """Return path name of a data file in sdm_schemas package."""
49 return os.path.join("${SDM_SCHEMAS_DIR}", "yml", basename)
52class ApdbConfig(Config):
53 """Part of Apdb configuration common to all implementations."""
55 read_sources_months = Field[int](doc="Number of months of history to read from DiaSource", default=12)
56 read_forced_sources_months = Field[int](
57 doc="Number of months of history to read from DiaForcedSource", default=12
58 )
59 schema_file = Field[str](
60 doc="Location of (YAML) configuration file with standard schema", default=_data_file_name("apdb.yaml")
61 )
62 schema_name = Field[str](doc="Name of the schema in YAML configuration file.", default="ApdbSchema")
63 extra_schema_file = Field[str](
64 doc="Location of (YAML) configuration file with extra schema, "
65 "definitions in this file are merged with the definitions in "
66 "'schema_file', extending or replacing parts of the schema.",
67 default=None,
68 optional=True,
69 deprecated="This field is deprecated, its value is not used.",
70 )
71 use_insert_id = Field[bool](
72 doc=(
73 "If True, make and fill additional tables used for replication. "
74 "Databases created with earlier versions of APDB may not have these tables, "
75 "and corresponding methods will not work for them."
76 ),
77 default=False,
78 )
79 replica_chunk_seconds = Field[int](
80 default=600,
81 doc="Time extent for replica chunks, new chunks are created every specified number of seconds.",
82 )
85class Apdb(ABC):
86 """Abstract interface for APDB."""
88 ConfigClass = ApdbConfig
90 @classmethod
91 def from_config(cls, config: ApdbConfig) -> Apdb:
92 """Create Ppdb instance from configuration object.
94 Parameters
95 ----------
96 config : `ApdbConfig`
97 Configuration object, type of this object determines type of the
98 Apdb implementation.
100 Returns
101 -------
102 apdb : `apdb`
103 Instance of `Apdb` class.
104 """
105 return make_apdb(config)
107 @classmethod
108 def from_uri(cls, uri: ResourcePathExpression) -> Apdb:
109 """Make Apdb instance from a serialized configuration.
111 Parameters
112 ----------
113 uri : `~lsst.resources.ResourcePathExpression`
114 URI or local file path pointing to a file with serialized
115 configuration, or a string with a "label:" prefix. In the latter
116 case, the configuration will be looked up from an APDB index file
117 using the label name that follows the prefix. The APDB index file's
118 location is determined by the ``DAX_APDB_INDEX_URI`` environment
119 variable.
121 Returns
122 -------
123 apdb : `apdb`
124 Instance of `Apdb` class, the type of the returned instance is
125 determined by configuration.
126 """
127 if isinstance(uri, str) and uri.startswith("label:"):
128 tag, _, label = uri.partition(":")
129 index = ApdbIndex()
130 # Current format for config files is "pex_config"
131 format = "pex_config"
132 uri = index.get_apdb_uri(label, format)
133 path = ResourcePath(uri)
134 config_str = path.read().decode()
135 # Assume that this is ApdbConfig, make_apdb will raise if not.
136 config = cast(ApdbConfig, Config._fromPython(config_str))
137 return make_apdb(config)
139 @classmethod
140 @abstractmethod
141 def apdbImplementationVersion(cls) -> VersionTuple:
142 """Return version number for current APDB implementation.
144 Returns
145 -------
146 version : `VersionTuple`
147 Version of the code defined in implementation class.
148 """
149 raise NotImplementedError()
151 @abstractmethod
152 def apdbSchemaVersion(self) -> VersionTuple:
153 """Return schema version number as defined in config file.
155 Returns
156 -------
157 version : `VersionTuple`
158 Version of the schema defined in schema config file.
159 """
160 raise NotImplementedError()
162 @abstractmethod
163 def tableDef(self, table: ApdbTables) -> Table | None:
164 """Return table schema definition for a given table.
166 Parameters
167 ----------
168 table : `ApdbTables`
169 One of the known APDB tables.
171 Returns
172 -------
173 tableSchema : `.schema_model.Table` or `None`
174 Table schema description, `None` is returned if table is not
175 defined by this implementation.
176 """
177 raise NotImplementedError()
179 @abstractmethod
180 def getDiaObjects(self, region: Region) -> pandas.DataFrame:
181 """Return catalog of DiaObject instances from a given region.
183 This method returns only the last version of each DiaObject. Some
184 records in a returned catalog may be outside the specified region, it
185 is up to a client to ignore those records or cleanup the catalog before
186 futher use.
188 Parameters
189 ----------
190 region : `lsst.sphgeom.Region`
191 Region to search for DIAObjects.
193 Returns
194 -------
195 catalog : `pandas.DataFrame`
196 Catalog containing DiaObject records for a region that may be a
197 superset of the specified region.
198 """
199 raise NotImplementedError()
201 @abstractmethod
202 def getDiaSources(
203 self, region: Region, object_ids: Iterable[int] | None, visit_time: astropy.time.Time
204 ) -> pandas.DataFrame | None:
205 """Return catalog of DiaSource instances from a given region.
207 Parameters
208 ----------
209 region : `lsst.sphgeom.Region`
210 Region to search for DIASources.
211 object_ids : iterable [ `int` ], optional
212 List of DiaObject IDs to further constrain the set of returned
213 sources. If `None` then returned sources are not constrained. If
214 list is empty then empty catalog is returned with a correct
215 schema.
216 visit_time : `astropy.time.Time`
217 Time of the current visit.
219 Returns
220 -------
221 catalog : `pandas.DataFrame`, or `None`
222 Catalog containing DiaSource records. `None` is returned if
223 ``read_sources_months`` configuration parameter is set to 0.
225 Notes
226 -----
227 This method returns DiaSource catalog for a region with additional
228 filtering based on DiaObject IDs. Only a subset of DiaSource history
229 is returned limited by ``read_sources_months`` config parameter, w.r.t.
230 ``visit_time``. If ``object_ids`` is empty then an empty catalog is
231 always returned with the correct schema (columns/types). If
232 ``object_ids`` is `None` then no filtering is performed and some of the
233 returned records may be outside the specified region.
234 """
235 raise NotImplementedError()
237 @abstractmethod
238 def getDiaForcedSources(
239 self, region: Region, object_ids: Iterable[int] | None, visit_time: astropy.time.Time
240 ) -> pandas.DataFrame | None:
241 """Return catalog of DiaForcedSource instances from a given region.
243 Parameters
244 ----------
245 region : `lsst.sphgeom.Region`
246 Region to search for DIASources.
247 object_ids : iterable [ `int` ], optional
248 List of DiaObject IDs to further constrain the set of returned
249 sources. If list is empty then empty catalog is returned with a
250 correct schema. If `None` then returned sources are not
251 constrained. Some implementations may not support latter case.
252 visit_time : `astropy.time.Time`
253 Time of the current visit.
255 Returns
256 -------
257 catalog : `pandas.DataFrame`, or `None`
258 Catalog containing DiaSource records. `None` is returned if
259 ``read_forced_sources_months`` configuration parameter is set to 0.
261 Raises
262 ------
263 NotImplementedError
264 May be raised by some implementations if ``object_ids`` is `None`.
266 Notes
267 -----
268 This method returns DiaForcedSource catalog for a region with
269 additional filtering based on DiaObject IDs. Only a subset of DiaSource
270 history is returned limited by ``read_forced_sources_months`` config
271 parameter, w.r.t. ``visit_time``. If ``object_ids`` is empty then an
272 empty catalog is always returned with the correct schema
273 (columns/types). If ``object_ids`` is `None` then no filtering is
274 performed and some of the returned records may be outside the specified
275 region.
276 """
277 raise NotImplementedError()
279 @abstractmethod
280 def containsVisitDetector(self, visit: int, detector: int) -> bool:
281 """Test whether data for a given visit-detector is present in the APDB.
283 Parameters
284 ----------
285 visit, detector : `int`
286 The ID of the visit-detector to search for.
288 Returns
289 -------
290 present : `bool`
291 `True` if some DiaObject, DiaSource, or DiaForcedSource records
292 exist for the specified observation, `False` otherwise.
293 """
294 raise NotImplementedError()
296 @abstractmethod
297 def getSSObjects(self) -> pandas.DataFrame:
298 """Return catalog of SSObject instances.
300 Returns
301 -------
302 catalog : `pandas.DataFrame`
303 Catalog containing SSObject records, all existing records are
304 returned.
305 """
306 raise NotImplementedError()
308 @abstractmethod
309 def store(
310 self,
311 visit_time: astropy.time.Time,
312 objects: pandas.DataFrame,
313 sources: pandas.DataFrame | None = None,
314 forced_sources: pandas.DataFrame | None = None,
315 ) -> None:
316 """Store all three types of catalogs in the database.
318 Parameters
319 ----------
320 visit_time : `astropy.time.Time`
321 Time of the visit.
322 objects : `pandas.DataFrame`
323 Catalog with DiaObject records.
324 sources : `pandas.DataFrame`, optional
325 Catalog with DiaSource records.
326 forced_sources : `pandas.DataFrame`, optional
327 Catalog with DiaForcedSource records.
329 Notes
330 -----
331 This methods takes DataFrame catalogs, their schema must be
332 compatible with the schema of APDB table:
334 - column names must correspond to database table columns
335 - types and units of the columns must match database definitions,
336 no unit conversion is performed presently
337 - columns that have default values in database schema can be
338 omitted from catalog
339 - this method knows how to fill interval-related columns of DiaObject
340 (validityStart, validityEnd) they do not need to appear in a
341 catalog
342 - source catalogs have ``diaObjectId`` column associating sources
343 with objects
344 """
345 raise NotImplementedError()
347 @abstractmethod
348 def storeSSObjects(self, objects: pandas.DataFrame) -> None:
349 """Store or update SSObject catalog.
351 Parameters
352 ----------
353 objects : `pandas.DataFrame`
354 Catalog with SSObject records.
356 Notes
357 -----
358 If SSObjects with matching IDs already exist in the database, their
359 records will be updated with the information from provided records.
360 """
361 raise NotImplementedError()
363 @abstractmethod
364 def reassignDiaSources(self, idMap: Mapping[int, int]) -> None:
365 """Associate DiaSources with SSObjects, dis-associating them
366 from DiaObjects.
368 Parameters
369 ----------
370 idMap : `Mapping`
371 Maps DiaSource IDs to their new SSObject IDs.
373 Raises
374 ------
375 ValueError
376 Raised if DiaSource ID does not exist in the database.
377 """
378 raise NotImplementedError()
380 @abstractmethod
381 def dailyJob(self) -> None:
382 """Implement daily activities like cleanup/vacuum.
384 What should be done during daily activities is determined by
385 specific implementation.
386 """
387 raise NotImplementedError()
389 @abstractmethod
390 def countUnassociatedObjects(self) -> int:
391 """Return the number of DiaObjects that have only one DiaSource
392 associated with them.
394 Used as part of ap_verify metrics.
396 Returns
397 -------
398 count : `int`
399 Number of DiaObjects with exactly one associated DiaSource.
401 Notes
402 -----
403 This method can be very inefficient or slow in some implementations.
404 """
405 raise NotImplementedError()
407 @classmethod
408 def makeField(cls, doc: str) -> ConfigurableField:
409 """Make a `~lsst.pex.config.ConfigurableField` for Apdb.
411 Parameters
412 ----------
413 doc : `str`
414 Help text for the field.
416 Returns
417 -------
418 configurableField : `lsst.pex.config.ConfigurableField`
419 A `~lsst.pex.config.ConfigurableField` for Apdb.
420 """
421 return ConfigurableField(doc=doc, target=cls)
423 @property
424 @abstractmethod
425 def metadata(self) -> ApdbMetadata:
426 """Object controlling access to APDB metadata (`ApdbMetadata`)."""
427 raise NotImplementedError()