Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of dax_apdb. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ["ApdbConfig", "Apdb"] 

25 

26from abc import ABC, abstractmethod 

27import os 

28import pandas 

29from typing import Iterable, Optional 

30 

31import lsst.daf.base as dafBase 

32from lsst.pex.config import Config, ConfigurableField, Field 

33from lsst.sphgeom import Region 

34from .apdbSchema import ApdbTables, TableDef 

35 

36 

37def _data_file_name(basename: str) -> str: 

38 """Return path name of a data file in dax_apdb package. 

39 """ 

40 return os.path.join("${DAX_APDB_DIR}", "data", basename) 

41 

42 

43class ApdbConfig(Config): 

44 """Part of Apdb configuration common to all implementations. 

45 """ 

46 read_sources_months = Field( 

47 dtype=int, 

48 doc="Number of months of history to read from DiaSource", 

49 default=12 

50 ) 

51 read_forced_sources_months = Field( 

52 dtype=int, 

53 doc="Number of months of history to read from DiaForcedSource", 

54 default=12 

55 ) 

56 schema_file = Field( 

57 dtype=str, 

58 doc="Location of (YAML) configuration file with standard schema", 

59 default=_data_file_name("apdb-schema.yaml") 

60 ) 

61 extra_schema_file = Field( 

62 dtype=str, 

63 doc="Location of (YAML) configuration file with extra schema, " 

64 "definitions in this file are merged with the definitions in " 

65 "'schema_file', extending or replacing parts of the schema.", 

66 default=_data_file_name("apdb-schema-extra.yaml") 

67 ) 

68 

69 

70class Apdb(ABC): 

71 """Abstract interface for APDB. 

72 """ 

73 

74 ConfigClass = ApdbConfig 

75 

76 @abstractmethod 

77 def tableDef(self, table: ApdbTables) -> Optional[TableDef]: 

78 """Return table schema definition for a given table. 

79 

80 Parameters 

81 ---------- 

82 table : `ApdbTables` 

83 One of the known APDB tables. 

84 

85 Returns 

86 ------- 

87 tableSchema : `TableDef` or `None` 

88 Table schema description, `None` is returned if table is not 

89 defined by this implementation. 

90 """ 

91 raise NotImplementedError() 

92 

93 @abstractmethod 

94 def makeSchema(self, drop: bool = False) -> None: 

95 """Create or re-create whole database schema. 

96 

97 Parameters 

98 ---------- 

99 drop : `bool` 

100 If True then drop all tables before creating new ones. 

101 """ 

102 raise NotImplementedError() 

103 

104 @abstractmethod 

105 def getDiaObjects(self, region: Region) -> pandas.DataFrame: 

106 """Returns catalog of DiaObject instances from a given region. 

107 

108 This method returns only the last version of each DiaObject. Some 

109 records in a returned catalog may be outside the specified region, it 

110 is up to a client to ignore those records or cleanup the catalog before 

111 futher use. 

112 

113 Parameters 

114 ---------- 

115 region : `lsst.sphgeom.Region` 

116 Region to search for DIAObjects. 

117 

118 Returns 

119 ------- 

120 catalog : `pandas.DataFrame` 

121 Catalog containing DiaObject records for a region that may be a 

122 superset of the specified region. 

123 """ 

124 raise NotImplementedError() 

125 

126 @abstractmethod 

127 def getDiaSources(self, region: Region, 

128 object_ids: Optional[Iterable[int]], 

129 visit_time: dafBase.DateTime) -> Optional[pandas.DataFrame]: 

130 """Return catalog of DiaSource instances from a given region. 

131 

132 Parameters 

133 ---------- 

134 region : `lsst.sphgeom.Region` 

135 Region to search for DIASources. 

136 object_ids : iterable [ `int` ], optional 

137 List of DiaObject IDs to further constrain the set of returned 

138 sources. If `None` then returned sources are not constrained. If 

139 list is empty then empty catalog is returned with a correct 

140 schema. 

141 visit_time : `lsst.daf.base.DateTime` 

142 Time of the current visit. 

143 

144 Returns 

145 ------- 

146 catalog : `pandas.DataFrame`, or `None` 

147 Catalog containing DiaSource records. `None` is returned if 

148 ``read_sources_months`` configuration parameter is set to 0. 

149 

150 Notes 

151 ----- 

152 This method returns DiaSource catalog for a region with additional 

153 filtering based on DiaObject IDs. Only a subset of DiaSource history 

154 is returned limited by ``read_sources_months`` config parameter, w.r.t. 

155 ``visit_time``. If ``object_ids`` is empty then an empty catalog is 

156 always returned with the correct schema (columns/types). If 

157 ``object_ids`` is `None` then no filtering is performed and some of the 

158 returned records may be outside the specified region. 

159 """ 

160 raise NotImplementedError() 

161 

162 @abstractmethod 

163 def getDiaForcedSources(self, region: Region, 

164 object_ids: Optional[Iterable[int]], 

165 visit_time: dafBase.DateTime) -> Optional[pandas.DataFrame]: 

166 """Return catalog of DiaForcedSource instances from a given region. 

167 

168 Parameters 

169 ---------- 

170 region : `lsst.sphgeom.Region` 

171 Region to search for DIASources. 

172 object_ids : iterable [ `int` ], optional 

173 List of DiaObject IDs to further constrain the set of returned 

174 sources. If list is empty then empty catalog is returned with a 

175 correct schema. If `None` then returned sources are not 

176 constrained. Some implementations may not support latter case. 

177 visit_time : `lsst.daf.base.DateTime` 

178 Time of the current visit. 

179 

180 Returns 

181 ------- 

182 catalog : `pandas.DataFrame`, or `None` 

183 Catalog containing DiaSource records. `None` is returned if 

184 ``read_forced_sources_months`` configuration parameter is set to 0. 

185 

186 Raises 

187 ------ 

188 NotImplementedError 

189 May be raised by some implementations if ``object_ids`` is `None`. 

190 

191 Notes 

192 ----- 

193 This method returns DiaForcedSource catalog for a region with additional 

194 filtering based on DiaObject IDs. Only a subset of DiaSource history 

195 is returned limited by ``read_forced_sources_months`` config parameter, 

196 w.r.t. ``visit_time``. If ``object_ids`` is empty then an empty catalog 

197 is always returned with the correct schema (columns/types). If 

198 ``object_ids`` is `None` then no filtering is performed and some of the 

199 returned records may be outside the specified region. 

200 """ 

201 raise NotImplementedError() 

202 

203 @abstractmethod 

204 def store(self, 

205 visit_time: dafBase.DateTime, 

206 objects: pandas.DataFrame, 

207 sources: Optional[pandas.DataFrame] = None, 

208 forced_sources: Optional[pandas.DataFrame] = None) -> None: 

209 """Store all three types of catalogs in the database. 

210 

211 Parameters 

212 ---------- 

213 visit_time : `lsst.daf.base.DateTime` 

214 Time of the visit. 

215 objects : `pandas.DataFrame` 

216 Catalog with DiaObject records. 

217 sources : `pandas.DataFrame`, optional 

218 Catalog with DiaSource records. 

219 forced_sources : `pandas.DataFrame`, optional 

220 Catalog with DiaForcedSource records. 

221 

222 Notes 

223 ----- 

224 This methods takes DataFrame catalogs, their schema must be 

225 compatible with the schema of APDB table: 

226 

227 - column names must correspond to database table columns 

228 - types and units of the columns must match database definitions, 

229 no unit conversion is performed presently 

230 - columns that have default values in database schema can be 

231 omitted from catalog 

232 - this method knows how to fill interval-related columns of DiaObject 

233 (validityStart, validityEnd) they do not need to appear in a 

234 catalog 

235 - source catalogs have ``diaObjectId`` column associating sources 

236 with objects 

237 """ 

238 raise NotImplementedError() 

239 

240 @abstractmethod 

241 def dailyJob(self) -> None: 

242 """Implement daily activities like cleanup/vacuum. 

243 

244 What should be done during daily activities is determined by 

245 specific implementation. 

246 """ 

247 raise NotImplementedError() 

248 

249 @abstractmethod 

250 def countUnassociatedObjects(self) -> int: 

251 """Return the number of DiaObjects that have only one DiaSource 

252 associated with them. 

253 

254 Used as part of ap_verify metrics. 

255 

256 Returns 

257 ------- 

258 count : `int` 

259 Number of DiaObjects with exactly one associated DiaSource. 

260 

261 Notes 

262 ----- 

263 This method can be very inefficient or slow in some implementations. 

264 """ 

265 raise NotImplementedError() 

266 

267 @classmethod 

268 def makeField(cls, doc: str) -> ConfigurableField: 

269 """Make a `~lsst.pex.config.ConfigurableField` for Apdb. 

270 

271 Parameters 

272 ---------- 

273 doc : `str` 

274 Help text for the field. 

275 

276 Returns 

277 ------- 

278 configurableField : `lsst.pex.config.ConfigurableField` 

279 A `~lsst.pex.config.ConfigurableField` for Apdb. 

280 """ 

281 return ConfigurableField(doc=doc, target=cls)