Coverage for python/lsst/dax/apdb/apdb.py: 78%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

64 statements  

1# This file is part of dax_apdb. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ["ApdbConfig", "Apdb"] 

25 

26from abc import ABC, abstractmethod 

27import os 

28import pandas 

29from typing import Iterable, Mapping, Optional 

30 

31import lsst.daf.base as dafBase 

32from lsst.pex.config import Config, ConfigurableField, Field 

33from lsst.sphgeom import Region 

34from .apdbSchema import ApdbTables, TableDef 

35 

36 

37def _data_file_name(basename: str) -> str: 

38 """Return path name of a data file in dax_apdb package. 

39 """ 

40 return os.path.join("${DAX_APDB_DIR}", "data", basename) 

41 

42 

43class ApdbConfig(Config): 

44 """Part of Apdb configuration common to all implementations. 

45 """ 

46 read_sources_months = Field( 

47 dtype=int, 

48 doc="Number of months of history to read from DiaSource", 

49 default=12 

50 ) 

51 read_forced_sources_months = Field( 

52 dtype=int, 

53 doc="Number of months of history to read from DiaForcedSource", 

54 default=12 

55 ) 

56 schema_file = Field( 

57 dtype=str, 

58 doc="Location of (YAML) configuration file with standard schema", 

59 default=_data_file_name("apdb-schema.yaml") 

60 ) 

61 extra_schema_file = Field( 

62 dtype=str, 

63 doc="Location of (YAML) configuration file with extra schema, " 

64 "definitions in this file are merged with the definitions in " 

65 "'schema_file', extending or replacing parts of the schema.", 

66 default=_data_file_name("apdb-schema-extra.yaml") 

67 ) 

68 

69 

70class Apdb(ABC): 

71 """Abstract interface for APDB. 

72 """ 

73 

74 ConfigClass = ApdbConfig 

75 

76 @abstractmethod 

77 def tableDef(self, table: ApdbTables) -> Optional[TableDef]: 

78 """Return table schema definition for a given table. 

79 

80 Parameters 

81 ---------- 

82 table : `ApdbTables` 

83 One of the known APDB tables. 

84 

85 Returns 

86 ------- 

87 tableSchema : `TableDef` or `None` 

88 Table schema description, `None` is returned if table is not 

89 defined by this implementation. 

90 """ 

91 raise NotImplementedError() 

92 

93 @abstractmethod 

94 def makeSchema(self, drop: bool = False) -> None: 

95 """Create or re-create whole database schema. 

96 

97 Parameters 

98 ---------- 

99 drop : `bool` 

100 If True then drop all tables before creating new ones. 

101 """ 

102 raise NotImplementedError() 

103 

104 @abstractmethod 

105 def getDiaObjects(self, region: Region) -> pandas.DataFrame: 

106 """Returns catalog of DiaObject instances from a given region. 

107 

108 This method returns only the last version of each DiaObject. Some 

109 records in a returned catalog may be outside the specified region, it 

110 is up to a client to ignore those records or cleanup the catalog before 

111 futher use. 

112 

113 Parameters 

114 ---------- 

115 region : `lsst.sphgeom.Region` 

116 Region to search for DIAObjects. 

117 

118 Returns 

119 ------- 

120 catalog : `pandas.DataFrame` 

121 Catalog containing DiaObject records for a region that may be a 

122 superset of the specified region. 

123 """ 

124 raise NotImplementedError() 

125 

126 @abstractmethod 

127 def getDiaSources(self, region: Region, 

128 object_ids: Optional[Iterable[int]], 

129 visit_time: dafBase.DateTime) -> Optional[pandas.DataFrame]: 

130 """Return catalog of DiaSource instances from a given region. 

131 

132 Parameters 

133 ---------- 

134 region : `lsst.sphgeom.Region` 

135 Region to search for DIASources. 

136 object_ids : iterable [ `int` ], optional 

137 List of DiaObject IDs to further constrain the set of returned 

138 sources. If `None` then returned sources are not constrained. If 

139 list is empty then empty catalog is returned with a correct 

140 schema. 

141 visit_time : `lsst.daf.base.DateTime` 

142 Time of the current visit. 

143 

144 Returns 

145 ------- 

146 catalog : `pandas.DataFrame`, or `None` 

147 Catalog containing DiaSource records. `None` is returned if 

148 ``read_sources_months`` configuration parameter is set to 0. 

149 

150 Notes 

151 ----- 

152 This method returns DiaSource catalog for a region with additional 

153 filtering based on DiaObject IDs. Only a subset of DiaSource history 

154 is returned limited by ``read_sources_months`` config parameter, w.r.t. 

155 ``visit_time``. If ``object_ids`` is empty then an empty catalog is 

156 always returned with the correct schema (columns/types). If 

157 ``object_ids`` is `None` then no filtering is performed and some of the 

158 returned records may be outside the specified region. 

159 """ 

160 raise NotImplementedError() 

161 

162 @abstractmethod 

163 def getDiaForcedSources(self, region: Region, 

164 object_ids: Optional[Iterable[int]], 

165 visit_time: dafBase.DateTime) -> Optional[pandas.DataFrame]: 

166 """Return catalog of DiaForcedSource instances from a given region. 

167 

168 Parameters 

169 ---------- 

170 region : `lsst.sphgeom.Region` 

171 Region to search for DIASources. 

172 object_ids : iterable [ `int` ], optional 

173 List of DiaObject IDs to further constrain the set of returned 

174 sources. If list is empty then empty catalog is returned with a 

175 correct schema. If `None` then returned sources are not 

176 constrained. Some implementations may not support latter case. 

177 visit_time : `lsst.daf.base.DateTime` 

178 Time of the current visit. 

179 

180 Returns 

181 ------- 

182 catalog : `pandas.DataFrame`, or `None` 

183 Catalog containing DiaSource records. `None` is returned if 

184 ``read_forced_sources_months`` configuration parameter is set to 0. 

185 

186 Raises 

187 ------ 

188 NotImplementedError 

189 May be raised by some implementations if ``object_ids`` is `None`. 

190 

191 Notes 

192 ----- 

193 This method returns DiaForcedSource catalog for a region with additional 

194 filtering based on DiaObject IDs. Only a subset of DiaSource history 

195 is returned limited by ``read_forced_sources_months`` config parameter, 

196 w.r.t. ``visit_time``. If ``object_ids`` is empty then an empty catalog 

197 is always returned with the correct schema (columns/types). If 

198 ``object_ids`` is `None` then no filtering is performed and some of the 

199 returned records may be outside the specified region. 

200 """ 

201 raise NotImplementedError() 

202 

203 @abstractmethod 

204 def getDiaObjectsHistory(self, 

205 start_time: dafBase.DateTime, 

206 end_time: dafBase.DateTime, 

207 region: Optional[Region] = None) -> pandas.DataFrame: 

208 """Returns catalog of DiaObject instances from a given time period 

209 including the history of each DiaObject. 

210 

211 Parameters 

212 ---------- 

213 start_time : `dafBase.DateTime` 

214 Starting time for DiaObject history search. DiaObject record is 

215 selected when its ``validityStart`` falls into an interval 

216 between ``start_time`` (inclusive) and ``end_time`` (exclusive). 

217 end_time : `dafBase.DateTime` 

218 Upper limit on time for DiaObject history search. 

219 region : `lsst.sphgeom.Region`, optional 

220 Region to search for DiaObjects, if not specified then whole sky 

221 is searched. If region is specified then some returned records may 

222 fall outside of this region. 

223 

224 Returns 

225 ------- 

226 catalog : `pandas.DataFrame` 

227 Catalog containing DiaObject records. 

228 

229 Notes 

230 ----- 

231 This part of API may not be very stable and can change before the 

232 implementation finalizes. 

233 """ 

234 raise NotImplementedError() 

235 

236 @abstractmethod 

237 def getDiaSourcesHistory(self, 

238 start_time: dafBase.DateTime, 

239 end_time: dafBase.DateTime, 

240 region: Optional[Region] = None) -> pandas.DataFrame: 

241 """Returns catalog of DiaSource instances from a given time period. 

242 

243 Parameters 

244 ---------- 

245 start_time : `dafBase.DateTime` 

246 Starting time for DiaSource history search. DiaSource record is 

247 selected when its ``midPointTai`` falls into an interval between 

248 ``start_time`` (inclusive) and ``end_time`` (exclusive). 

249 end_time : `dafBase.DateTime` 

250 Upper limit on time for DiaSource history search. 

251 region : `lsst.sphgeom.Region`, optional 

252 Region to search for DiaSources, if not specified then whole sky 

253 is searched. If region is specified then some returned records may 

254 fall outside of this region. 

255 

256 Returns 

257 ------- 

258 catalog : `pandas.DataFrame` 

259 Catalog containing DiaObject records. 

260 

261 Notes 

262 ----- 

263 This part of API may not be very stable and can change before the 

264 implementation finalizes. 

265 """ 

266 raise NotImplementedError() 

267 

268 @abstractmethod 

269 def getDiaForcedSourcesHistory(self, 

270 start_time: dafBase.DateTime, 

271 end_time: dafBase.DateTime, 

272 region: Optional[Region] = None) -> pandas.DataFrame: 

273 """Returns catalog of DiaForcedSource instances from a given time 

274 period. 

275 

276 Parameters 

277 ---------- 

278 start_time : `dafBase.DateTime` 

279 Starting time for DiaForcedSource history search. DiaForcedSource 

280 record is selected when its ``midPointTai`` falls into an interval 

281 between ``start_time`` (inclusive) and ``end_time`` (exclusive). 

282 end_time : `dafBase.DateTime` 

283 Upper limit on time for DiaForcedSource history search. 

284 region : `lsst.sphgeom.Region`, optional 

285 Region to search for DiaForcedSources, if not specified then whole 

286 sky is searched. If region is specified then some returned records 

287 may fall outside of this region. 

288 

289 Returns 

290 ------- 

291 catalog : `pandas.DataFrame` 

292 Catalog containing DiaObject records. 

293 

294 Notes 

295 ----- 

296 This part of API may not be very stable and can change before the 

297 implementation finalizes. Some implementations may not support region 

298 filtering, they will return records from the whole sky. 

299 """ 

300 raise NotImplementedError() 

301 

302 @abstractmethod 

303 def getSSObjects(self) -> pandas.DataFrame: 

304 """Returns catalog of SSObject instances. 

305 

306 Returns 

307 ------- 

308 catalog : `pandas.DataFrame` 

309 Catalog containing SSObject records, all existing records are 

310 returned. 

311 """ 

312 raise NotImplementedError() 

313 

314 @abstractmethod 

315 def store(self, 

316 visit_time: dafBase.DateTime, 

317 objects: pandas.DataFrame, 

318 sources: Optional[pandas.DataFrame] = None, 

319 forced_sources: Optional[pandas.DataFrame] = None) -> None: 

320 """Store all three types of catalogs in the database. 

321 

322 Parameters 

323 ---------- 

324 visit_time : `lsst.daf.base.DateTime` 

325 Time of the visit. 

326 objects : `pandas.DataFrame` 

327 Catalog with DiaObject records. 

328 sources : `pandas.DataFrame`, optional 

329 Catalog with DiaSource records. 

330 forced_sources : `pandas.DataFrame`, optional 

331 Catalog with DiaForcedSource records. 

332 

333 Notes 

334 ----- 

335 This methods takes DataFrame catalogs, their schema must be 

336 compatible with the schema of APDB table: 

337 

338 - column names must correspond to database table columns 

339 - types and units of the columns must match database definitions, 

340 no unit conversion is performed presently 

341 - columns that have default values in database schema can be 

342 omitted from catalog 

343 - this method knows how to fill interval-related columns of DiaObject 

344 (validityStart, validityEnd) they do not need to appear in a 

345 catalog 

346 - source catalogs have ``diaObjectId`` column associating sources 

347 with objects 

348 """ 

349 raise NotImplementedError() 

350 

351 @abstractmethod 

352 def storeSSObjects(self, objects: pandas.DataFrame) -> None: 

353 """Store or update SSObject catalog. 

354 

355 Parameters 

356 ---------- 

357 objects : `pandas.DataFrame` 

358 Catalog with SSObject records. 

359 

360 Notes 

361 ----- 

362 If SSObjects with matching IDs already exist in the database, their 

363 records will be updated with the information from provided records. 

364 """ 

365 raise NotImplementedError() 

366 

367 @abstractmethod 

368 def reassignDiaSources(self, idMap: Mapping[int, int]) -> None: 

369 """Associate DiaSources with SSObjects, dis-associating them 

370 from DiaObjects. 

371 

372 Parameters 

373 ---------- 

374 idMap : `Mapping` 

375 Maps DiaSource IDs to their new SSObject IDs. 

376 

377 Raises 

378 ------ 

379 ValueError 

380 Raised if DiaSource ID does not exist in the database. 

381 """ 

382 raise NotImplementedError() 

383 

384 @abstractmethod 

385 def dailyJob(self) -> None: 

386 """Implement daily activities like cleanup/vacuum. 

387 

388 What should be done during daily activities is determined by 

389 specific implementation. 

390 """ 

391 raise NotImplementedError() 

392 

393 @abstractmethod 

394 def countUnassociatedObjects(self) -> int: 

395 """Return the number of DiaObjects that have only one DiaSource 

396 associated with them. 

397 

398 Used as part of ap_verify metrics. 

399 

400 Returns 

401 ------- 

402 count : `int` 

403 Number of DiaObjects with exactly one associated DiaSource. 

404 

405 Notes 

406 ----- 

407 This method can be very inefficient or slow in some implementations. 

408 """ 

409 raise NotImplementedError() 

410 

411 @classmethod 

412 def makeField(cls, doc: str) -> ConfigurableField: 

413 """Make a `~lsst.pex.config.ConfigurableField` for Apdb. 

414 

415 Parameters 

416 ---------- 

417 doc : `str` 

418 Help text for the field. 

419 

420 Returns 

421 ------- 

422 configurableField : `lsst.pex.config.ConfigurableField` 

423 A `~lsst.pex.config.ConfigurableField` for Apdb. 

424 """ 

425 return ConfigurableField(doc=doc, target=cls)