Coverage for python/lsst/dax/apdb/apdb.py: 78%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

64 statements  

1# This file is part of dax_apdb. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ["ApdbConfig", "Apdb"] 

25 

26from abc import ABC, abstractmethod 

27import os 

28import pandas 

29from typing import Iterable, Mapping, Optional 

30 

31import lsst.daf.base as dafBase 

32from lsst.pex.config import Config, ConfigurableField, Field 

33from lsst.sphgeom import Region 

34from .apdbSchema import ApdbTables, TableDef 

35 

36 

37def _data_file_name(basename: str) -> str: 

38 """Return path name of a data file in dax_apdb package. 

39 """ 

40 return os.path.join("${DAX_APDB_DIR}", "data", basename) 

41 

42 

43class ApdbConfig(Config): 

44 """Part of Apdb configuration common to all implementations. 

45 """ 

46 read_sources_months = Field( 

47 dtype=int, 

48 doc="Number of months of history to read from DiaSource", 

49 default=12 

50 ) 

51 read_forced_sources_months = Field( 

52 dtype=int, 

53 doc="Number of months of history to read from DiaForcedSource", 

54 default=12 

55 ) 

56 schema_file = Field( 

57 dtype=str, 

58 doc="Location of (YAML) configuration file with standard schema", 

59 default=_data_file_name("apdb-schema.yaml") 

60 ) 

61 extra_schema_file = Field( 

62 dtype=str, 

63 doc="Location of (YAML) configuration file with extra schema, " 

64 "definitions in this file are merged with the definitions in " 

65 "'schema_file', extending or replacing parts of the schema.", 

66 default=_data_file_name("apdb-schema-extra.yaml") 

67 ) 

68 

69 

70class Apdb(ABC): 

71 """Abstract interface for APDB. 

72 """ 

73 

74 ConfigClass = ApdbConfig 

75 

76 @abstractmethod 

77 def tableDef(self, table: ApdbTables) -> Optional[TableDef]: 

78 """Return table schema definition for a given table. 

79 

80 Parameters 

81 ---------- 

82 table : `ApdbTables` 

83 One of the known APDB tables. 

84 

85 Returns 

86 ------- 

87 tableSchema : `TableDef` or `None` 

88 Table schema description, `None` is returned if table is not 

89 defined by this implementation. 

90 """ 

91 raise NotImplementedError() 

92 

93 @abstractmethod 

94 def makeSchema(self, drop: bool = False) -> None: 

95 """Create or re-create whole database schema. 

96 

97 Parameters 

98 ---------- 

99 drop : `bool` 

100 If True then drop all tables before creating new ones. 

101 """ 

102 raise NotImplementedError() 

103 

104 @abstractmethod 

105 def getDiaObjects(self, region: Region) -> pandas.DataFrame: 

106 """Returns catalog of DiaObject instances from a given region. 

107 

108 This method returns only the last version of each DiaObject. Some 

109 records in a returned catalog may be outside the specified region, it 

110 is up to a client to ignore those records or cleanup the catalog before 

111 futher use. 

112 

113 Parameters 

114 ---------- 

115 region : `lsst.sphgeom.Region` 

116 Region to search for DIAObjects. 

117 

118 Returns 

119 ------- 

120 catalog : `pandas.DataFrame` 

121 Catalog containing DiaObject records for a region that may be a 

122 superset of the specified region. 

123 """ 

124 raise NotImplementedError() 

125 

126 @abstractmethod 

127 def getDiaSources(self, region: Region, 

128 object_ids: Optional[Iterable[int]], 

129 visit_time: dafBase.DateTime) -> Optional[pandas.DataFrame]: 

130 """Return catalog of DiaSource instances from a given region. 

131 

132 Parameters 

133 ---------- 

134 region : `lsst.sphgeom.Region` 

135 Region to search for DIASources. 

136 object_ids : iterable [ `int` ], optional 

137 List of DiaObject IDs to further constrain the set of returned 

138 sources. If `None` then returned sources are not constrained. If 

139 list is empty then empty catalog is returned with a correct 

140 schema. 

141 visit_time : `lsst.daf.base.DateTime` 

142 Time of the current visit. 

143 

144 Returns 

145 ------- 

146 catalog : `pandas.DataFrame`, or `None` 

147 Catalog containing DiaSource records. `None` is returned if 

148 ``read_sources_months`` configuration parameter is set to 0. 

149 

150 Notes 

151 ----- 

152 This method returns DiaSource catalog for a region with additional 

153 filtering based on DiaObject IDs. Only a subset of DiaSource history 

154 is returned limited by ``read_sources_months`` config parameter, w.r.t. 

155 ``visit_time``. If ``object_ids`` is empty then an empty catalog is 

156 always returned with the correct schema (columns/types). If 

157 ``object_ids`` is `None` then no filtering is performed and some of the 

158 returned records may be outside the specified region. 

159 """ 

160 raise NotImplementedError() 

161 

162 @abstractmethod 

163 def getDiaForcedSources(self, region: Region, 

164 object_ids: Optional[Iterable[int]], 

165 visit_time: dafBase.DateTime) -> Optional[pandas.DataFrame]: 

166 """Return catalog of DiaForcedSource instances from a given region. 

167 

168 Parameters 

169 ---------- 

170 region : `lsst.sphgeom.Region` 

171 Region to search for DIASources. 

172 object_ids : iterable [ `int` ], optional 

173 List of DiaObject IDs to further constrain the set of returned 

174 sources. If list is empty then empty catalog is returned with a 

175 correct schema. If `None` then returned sources are not 

176 constrained. Some implementations may not support latter case. 

177 visit_time : `lsst.daf.base.DateTime` 

178 Time of the current visit. 

179 

180 Returns 

181 ------- 

182 catalog : `pandas.DataFrame`, or `None` 

183 Catalog containing DiaSource records. `None` is returned if 

184 ``read_forced_sources_months`` configuration parameter is set to 0. 

185 

186 Raises 

187 ------ 

188 NotImplementedError 

189 May be raised by some implementations if ``object_ids`` is `None`. 

190 

191 Notes 

192 ----- 

193 This method returns DiaForcedSource catalog for a region with additional 

194 filtering based on DiaObject IDs. Only a subset of DiaSource history 

195 is returned limited by ``read_forced_sources_months`` config parameter, 

196 w.r.t. ``visit_time``. If ``object_ids`` is empty then an empty catalog 

197 is always returned with the correct schema (columns/types). If 

198 ``object_ids`` is `None` then no filtering is performed and some of the 

199 returned records may be outside the specified region. 

200 """ 

201 raise NotImplementedError() 

202 

203 @abstractmethod 

204 def getDiaObjectsHistory(self, 

205 start_time: dafBase.DateTime, 

206 end_time: Optional[dafBase.DateTime] = None, 

207 region: Optional[Region] = None) -> pandas.DataFrame: 

208 """Returns catalog of DiaObject instances from a given time period 

209 including the history of each DiaObject. 

210 

211 Parameters 

212 ---------- 

213 start_time : `dafBase.DateTime` 

214 Starting time for DiaObject history search. DiaObject record is 

215 selected when its ``validityStart`` falls into an interval 

216 between ``start__time`` (inclusive) and ``end_time`` (exclusive). 

217 end_time : `dafBase.DateTime`, optional 

218 Upper limit on time for DiaObject history search, if not specified 

219 then there is no restriction on upper limit. 

220 region : `lsst.sphgeom.Region`, optional 

221 Region to search for DiaObjects, if not specified then whole sky 

222 is searched. If region is specified then some returned records may 

223 fall outside of this region. 

224 

225 Returns 

226 ------- 

227 catalog : `pandas.DataFrame` 

228 Catalog containing DiaObject records. 

229 

230 Notes 

231 ----- 

232 This part of API may not be very stable and can change before the 

233 implementation finalizes. 

234 """ 

235 raise NotImplementedError() 

236 

237 @abstractmethod 

238 def getDiaSourcesHistory(self, 

239 start_time: dafBase.DateTime, 

240 end_time: Optional[dafBase.DateTime] = None, 

241 region: Optional[Region] = None) -> pandas.DataFrame: 

242 """Returns catalog of DiaSource instances from a given time period. 

243 

244 Parameters 

245 ---------- 

246 start_time : `dafBase.DateTime` 

247 Starting time for DiaSource history search. DiaSource record is 

248 selected when its ``midPointTai`` falls into an interval between 

249 ``start__time`` (inclusive) and ``end_time`` (exclusive). 

250 end_time : `dafBase.DateTime` 

251 Upper limit on time for DiaSource history search, if not specified 

252 then there is no restriction on upper limit. 

253 region : `lsst.sphgeom.Region`, optional 

254 Region to search for DiaSources, if not specified then whole sky 

255 is searched. If region is specified then some returned records may 

256 fall outside of this region. 

257 

258 Returns 

259 ------- 

260 catalog : `pandas.DataFrame` 

261 Catalog containing DiaObject records. 

262 

263 Notes 

264 ----- 

265 This part of API may not be very stable and can change before the 

266 implementation finalizes. 

267 """ 

268 raise NotImplementedError() 

269 

270 @abstractmethod 

271 def getDiaForcedSourcesHistory(self, 

272 start_time: dafBase.DateTime, 

273 end_time: Optional[dafBase.DateTime] = None, 

274 region: Optional[Region] = None) -> pandas.DataFrame: 

275 """Returns catalog of DiaForcedSource instances from a given time 

276 period. 

277 

278 Parameters 

279 ---------- 

280 start_time : `dafBase.DateTime` 

281 Starting time for DiaForcedSource history search. DiaForcedSource 

282 record is selected when its ``midPointTai`` falls into an interval 

283 between ``start__time`` (inclusive) and ``end_time`` (exclusive). 

284 end_time : `dafBase.DateTime` 

285 Upper limit on time for DiaForcedSource history search, if not 

286 specified then there is no restriction on upper limit. 

287 region : `lsst.sphgeom.Region`, optional 

288 Region to search for DiaForcedSources, if not specified then whole 

289 sky is searched. If region is specified then some returned records 

290 may fall outside of this region. 

291 

292 Returns 

293 ------- 

294 catalog : `pandas.DataFrame` 

295 Catalog containing DiaObject records. 

296 

297 Notes 

298 ----- 

299 This part of API may not be very stable and can change before the 

300 implementation finalizes. 

301 """ 

302 raise NotImplementedError() 

303 

304 @abstractmethod 

305 def getSSObjects(self) -> pandas.DataFrame: 

306 """Returns catalog of SSObject instances. 

307 

308 Returns 

309 ------- 

310 catalog : `pandas.DataFrame` 

311 Catalog containing SSObject records, all existing records are 

312 returned. 

313 """ 

314 raise NotImplementedError() 

315 

316 @abstractmethod 

317 def store(self, 

318 visit_time: dafBase.DateTime, 

319 objects: pandas.DataFrame, 

320 sources: Optional[pandas.DataFrame] = None, 

321 forced_sources: Optional[pandas.DataFrame] = None) -> None: 

322 """Store all three types of catalogs in the database. 

323 

324 Parameters 

325 ---------- 

326 visit_time : `lsst.daf.base.DateTime` 

327 Time of the visit. 

328 objects : `pandas.DataFrame` 

329 Catalog with DiaObject records. 

330 sources : `pandas.DataFrame`, optional 

331 Catalog with DiaSource records. 

332 forced_sources : `pandas.DataFrame`, optional 

333 Catalog with DiaForcedSource records. 

334 

335 Notes 

336 ----- 

337 This methods takes DataFrame catalogs, their schema must be 

338 compatible with the schema of APDB table: 

339 

340 - column names must correspond to database table columns 

341 - types and units of the columns must match database definitions, 

342 no unit conversion is performed presently 

343 - columns that have default values in database schema can be 

344 omitted from catalog 

345 - this method knows how to fill interval-related columns of DiaObject 

346 (validityStart, validityEnd) they do not need to appear in a 

347 catalog 

348 - source catalogs have ``diaObjectId`` column associating sources 

349 with objects 

350 """ 

351 raise NotImplementedError() 

352 

353 @abstractmethod 

354 def storeSSObjects(self, objects: pandas.DataFrame) -> None: 

355 """Store or update SSObject catalog. 

356 

357 Parameters 

358 ---------- 

359 objects : `pandas.DataFrame` 

360 Catalog with SSObject records. 

361 

362 Notes 

363 ----- 

364 If SSObjects with matching IDs already exist in the database, their 

365 records will be updated with the information from provided records. 

366 """ 

367 raise NotImplementedError() 

368 

369 @abstractmethod 

370 def reassignDiaSources(self, idMap: Mapping[int, int]) -> None: 

371 """Associate DiaSources with SSObjects, dis-associating them 

372 from DiaObjects. 

373 

374 Parameters 

375 ---------- 

376 idMap : `Mapping` 

377 Maps DiaSource IDs to their new SSObject IDs. 

378 """ 

379 raise NotImplementedError() 

380 

381 @abstractmethod 

382 def dailyJob(self) -> None: 

383 """Implement daily activities like cleanup/vacuum. 

384 

385 What should be done during daily activities is determined by 

386 specific implementation. 

387 """ 

388 raise NotImplementedError() 

389 

390 @abstractmethod 

391 def countUnassociatedObjects(self) -> int: 

392 """Return the number of DiaObjects that have only one DiaSource 

393 associated with them. 

394 

395 Used as part of ap_verify metrics. 

396 

397 Returns 

398 ------- 

399 count : `int` 

400 Number of DiaObjects with exactly one associated DiaSource. 

401 

402 Notes 

403 ----- 

404 This method can be very inefficient or slow in some implementations. 

405 """ 

406 raise NotImplementedError() 

407 

408 @classmethod 

409 def makeField(cls, doc: str) -> ConfigurableField: 

410 """Make a `~lsst.pex.config.ConfigurableField` for Apdb. 

411 

412 Parameters 

413 ---------- 

414 doc : `str` 

415 Help text for the field. 

416 

417 Returns 

418 ------- 

419 configurableField : `lsst.pex.config.ConfigurableField` 

420 A `~lsst.pex.config.ConfigurableField` for Apdb. 

421 """ 

422 return ConfigurableField(doc=doc, target=cls)