Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of dax_apdb. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ["ApdbConfig", "Apdb"] 

25 

26from abc import ABC, abstractmethod 

27import os 

28import pandas 

29from typing import Iterable, Optional 

30 

31import lsst.daf.base as dafBase 

32from lsst.pex.config import Config, ConfigurableField, Field 

33from lsst.sphgeom import Region 

34 

35 

36def _data_file_name(basename: str) -> str: 

37 """Return path name of a data file in dax_apdb package. 

38 """ 

39 return os.path.join("${DAX_APDB_DIR}", "data", basename) 

40 

41 

42class ApdbConfig(Config): 

43 """Part of Apdb configuration common to all implementations. 

44 """ 

45 read_sources_months = Field(dtype=int, 

46 doc="Number of months of history to read from DiaSource", 

47 default=12) 

48 read_forced_sources_months = Field(dtype=int, 

49 doc="Number of months of history to read from DiaForcedSource", 

50 default=12) 

51 schema_file = Field(dtype=str, 

52 doc="Location of (YAML) configuration file with standard schema", 

53 default=_data_file_name("apdb-schema.yaml")) 

54 extra_schema_file = Field(dtype=str, 

55 doc="Location of (YAML) configuration file with extra schema", 

56 default=_data_file_name("apdb-schema-extra.yaml")) 

57 

58 

59class Apdb(ABC): 

60 """Abstract interface for APDB. 

61 """ 

62 

63 ConfigClass = ApdbConfig 

64 

65 @abstractmethod 

66 def makeSchema(self, drop: bool = False) -> None: 

67 """Create or re-create whole database schema. 

68 

69 Parameters 

70 ---------- 

71 drop : `bool` 

72 If True then drop all tables before creating new ones. 

73 """ 

74 raise NotImplementedError() 

75 

76 @abstractmethod 

77 def getDiaObjects(self, region: Region) -> pandas.DataFrame: 

78 """Returns catalog of DiaObject instances from a given region. 

79 

80 This method returns only the last version of each DiaObject. Some 

81 records in a returned catalog may be outside the specified region, it 

82 is up to a client to ignore those records or cleanup the catalog before 

83 futher use. 

84 

85 Parameters 

86 ---------- 

87 region : `lsst.sphgeom.Region` 

88 Region to search for DIAObjects. 

89 

90 Returns 

91 ------- 

92 catalog : `pandas.DataFrame` 

93 Catalog containing DiaObject records for a region that may be a 

94 superset of the specified region. 

95 """ 

96 raise NotImplementedError() 

97 

98 @abstractmethod 

99 def getDiaSources(self, region: Region, 

100 object_ids: Optional[Iterable[int]], 

101 visit_time: dafBase.DateTime) -> Optional[pandas.DataFrame]: 

102 """Return catalog of DiaSource instances from a given region. 

103 

104 Parameters 

105 ---------- 

106 region : `lsst.sphgeom.Region` 

107 Region to search for DIASources. 

108 object_ids : iterable [ `int` ], optional 

109 List of DiaObject IDs to further constrain the set of returned 

110 sources. If `None` then returned sources are not constrained. If 

111 list is empty then empty catalog is returned with a correct 

112 schema. 

113 visit_time : `lsst.daf.base.DateTime` 

114 Time of the current visit. 

115 

116 Returns 

117 ------- 

118 catalog : `pandas.DataFrame`, or `None` 

119 Catalog containing DiaSource records. `None` is returned if 

120 ``read_sources_months`` configuration parameter is set to 0. 

121 

122 Notes 

123 ----- 

124 This method returns DiaSource catalog for a region with additional 

125 filtering based on DiaObject IDs. Only a subset of DiaSource history 

126 is returned limited by ``read_sources_months`` config parameter, w.r.t. 

127 ``visit_time``. If ``object_ids`` is empty then an empty catalog is 

128 always returned with the correct schema (columns/types). If 

129 ``object_ids`` is `None` then no filtering is performed and some of the 

130 returned records may be outside the specified region. 

131 """ 

132 raise NotImplementedError() 

133 

134 @abstractmethod 

135 def getDiaForcedSources(self, region: Region, 

136 object_ids: Optional[Iterable[int]], 

137 visit_time: dafBase.DateTime) -> Optional[pandas.DataFrame]: 

138 """Return catalog of DiaForcedSource instances from a given region. 

139 

140 Parameters 

141 ---------- 

142 region : `lsst.sphgeom.Region` 

143 Region to search for DIASources. 

144 object_ids : iterable [ `int` ], optional 

145 List of DiaObject IDs to further constrain the set of returned 

146 sources. If list is empty then empty catalog is returned with a 

147 correct schema. If `None` then returned sources are not 

148 constrained. Some implementations may not support latter case. 

149 visit_time : `lsst.daf.base.DateTime` 

150 Time of the current visit. 

151 

152 Returns 

153 ------- 

154 catalog : `pandas.DataFrame`, or `None` 

155 Catalog containing DiaSource records. `None` is returned if 

156 ``read_forced_sources_months`` configuration parameter is set to 0. 

157 

158 Raises 

159 ------ 

160 NotImplementedError 

161 May be raised by some implementations if ``object_ids`` is `None`. 

162 

163 Notes 

164 ----- 

165 This method returns DiaForcedSource catalog for a region with additional 

166 filtering based on DiaObject IDs. Only a subset of DiaSource history 

167 is returned limited by ``read_forced_sources_months`` config parameter, 

168 w.r.t. ``visit_time``. If ``object_ids`` is empty then an empty catalog 

169 is always returned with the correct schema (columns/types). If 

170 ``object_ids`` is `None` then no filtering is performed and some of the 

171 returned records may be outside the specified region. 

172 """ 

173 raise NotImplementedError() 

174 

175 @abstractmethod 

176 def store(self, 

177 visit_time: dafBase.DateTime, 

178 objects: pandas.DataFrame, 

179 sources: Optional[pandas.DataFrame] = None, 

180 forced_sources: Optional[pandas.DataFrame] = None) -> None: 

181 """Store all three types of catalogs in the database. 

182 

183 Parameters 

184 ---------- 

185 visit_time : `lsst.daf.base.DateTime` 

186 Time of the visit. 

187 objects : `pandas.DataFrame` 

188 Catalog with DiaObject records. 

189 sources : `pandas.DataFrame`, optional 

190 Catalog with DiaSource records. 

191 forced_sources : `pandas.DataFrame`, optional 

192 Catalog with DiaForcedSource records. 

193 

194 Notes 

195 ----- 

196 This methods takes DataFrame catalogs, their schema must be 

197 compatible with the schema of APDB table: 

198 

199 - column names must correspond to database table columns 

200 - types and units of the columns must match database definitions, 

201 no unit conversion is performed presently 

202 - columns that have default values in database schema can be 

203 omitted from catalog 

204 - this method knows how to fill interval-related columns of DiaObject 

205 (validityStart, validityEnd) they do not need to appear in a 

206 catalog 

207 - source catalogs have ``diaObjectId`` column associating sources 

208 with objects 

209 """ 

210 raise NotImplementedError() 

211 

212 @abstractmethod 

213 def dailyJob(self) -> None: 

214 """Implement daily activities like cleanup/vacuum. 

215 

216 What should be done during daily activities is determined by 

217 specific implementation. 

218 """ 

219 raise NotImplementedError() 

220 

221 @abstractmethod 

222 def countUnassociatedObjects(self) -> int: 

223 """Return the number of DiaObjects that have only one DiaSource 

224 associated with them. 

225 

226 Used as part of ap_verify metrics. 

227 

228 Returns 

229 ------- 

230 count : `int` 

231 Number of DiaObjects with exactly one associated DiaSource. 

232 

233 Notes 

234 ----- 

235 This method can be very inefficient or slow in some implementations. 

236 """ 

237 raise NotImplementedError() 

238 

239 @classmethod 

240 def makeField(cls, doc: str) -> ConfigurableField: 

241 """Make a `~lsst.pex.config.ConfigurableField` for Apdb. 

242 

243 Parameters 

244 ---------- 

245 doc : `str` 

246 Help text for the field. 

247 

248 Returns 

249 ------- 

250 configurableField : `lsst.pex.config.ConfigurableField` 

251 A `~lsst.pex.config.ConfigurableField` for Apdb. 

252 """ 

253 return ConfigurableField(doc=doc, target=cls)