Coverage for python/lsst/dax/apdb/apdb.py: 78%

65 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2022-09-03 08:42 +0000

1# This file is part of dax_apdb. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ["ApdbConfig", "Apdb"] 

25 

26from abc import ABC, abstractmethod 

27import os 

28import pandas 

29from typing import Iterable, Mapping, Optional 

30 

31import lsst.daf.base as dafBase 

32from lsst.pex.config import Config, ConfigurableField, Field 

33from lsst.sphgeom import Region 

34from .apdbSchema import ApdbTables, TableDef 

35 

36 

37def _data_file_name(basename: str) -> str: 

38 """Return path name of a data file in sdm_schemas package. 

39 """ 

40 return os.path.join("${SDM_SCHEMAS_DIR}", "yml", basename) 

41 

42 

43class ApdbConfig(Config): 

44 """Part of Apdb configuration common to all implementations. 

45 """ 

46 read_sources_months = Field( 

47 dtype=int, 

48 doc="Number of months of history to read from DiaSource", 

49 default=12 

50 ) 

51 read_forced_sources_months = Field( 

52 dtype=int, 

53 doc="Number of months of history to read from DiaForcedSource", 

54 default=12 

55 ) 

56 schema_file = Field( 

57 dtype=str, 

58 doc="Location of (YAML) configuration file with standard schema", 

59 default=_data_file_name("apdb.yaml") 

60 ) 

61 schema_name = Field( 

62 dtype=str, 

63 doc="Name of the schema in YAML configuration file.", 

64 default="ApdbSchema" 

65 ) 

66 extra_schema_file = Field( 

67 dtype=str, 

68 doc="Location of (YAML) configuration file with extra schema, " 

69 "definitions in this file are merged with the definitions in " 

70 "'schema_file', extending or replacing parts of the schema.", 

71 default=None, 

72 optional=True, 

73 deprecated="This field is deprecated, its value is not used." 

74 ) 

75 

76 

77class Apdb(ABC): 

78 """Abstract interface for APDB. 

79 """ 

80 

81 ConfigClass = ApdbConfig 

82 

83 @abstractmethod 

84 def tableDef(self, table: ApdbTables) -> Optional[TableDef]: 

85 """Return table schema definition for a given table. 

86 

87 Parameters 

88 ---------- 

89 table : `ApdbTables` 

90 One of the known APDB tables. 

91 

92 Returns 

93 ------- 

94 tableSchema : `TableDef` or `None` 

95 Table schema description, `None` is returned if table is not 

96 defined by this implementation. 

97 """ 

98 raise NotImplementedError() 

99 

100 @abstractmethod 

101 def makeSchema(self, drop: bool = False) -> None: 

102 """Create or re-create whole database schema. 

103 

104 Parameters 

105 ---------- 

106 drop : `bool` 

107 If True then drop all tables before creating new ones. 

108 """ 

109 raise NotImplementedError() 

110 

111 @abstractmethod 

112 def getDiaObjects(self, region: Region) -> pandas.DataFrame: 

113 """Returns catalog of DiaObject instances from a given region. 

114 

115 This method returns only the last version of each DiaObject. Some 

116 records in a returned catalog may be outside the specified region, it 

117 is up to a client to ignore those records or cleanup the catalog before 

118 futher use. 

119 

120 Parameters 

121 ---------- 

122 region : `lsst.sphgeom.Region` 

123 Region to search for DIAObjects. 

124 

125 Returns 

126 ------- 

127 catalog : `pandas.DataFrame` 

128 Catalog containing DiaObject records for a region that may be a 

129 superset of the specified region. 

130 """ 

131 raise NotImplementedError() 

132 

133 @abstractmethod 

134 def getDiaSources(self, region: Region, 

135 object_ids: Optional[Iterable[int]], 

136 visit_time: dafBase.DateTime) -> Optional[pandas.DataFrame]: 

137 """Return catalog of DiaSource instances from a given region. 

138 

139 Parameters 

140 ---------- 

141 region : `lsst.sphgeom.Region` 

142 Region to search for DIASources. 

143 object_ids : iterable [ `int` ], optional 

144 List of DiaObject IDs to further constrain the set of returned 

145 sources. If `None` then returned sources are not constrained. If 

146 list is empty then empty catalog is returned with a correct 

147 schema. 

148 visit_time : `lsst.daf.base.DateTime` 

149 Time of the current visit. 

150 

151 Returns 

152 ------- 

153 catalog : `pandas.DataFrame`, or `None` 

154 Catalog containing DiaSource records. `None` is returned if 

155 ``read_sources_months`` configuration parameter is set to 0. 

156 

157 Notes 

158 ----- 

159 This method returns DiaSource catalog for a region with additional 

160 filtering based on DiaObject IDs. Only a subset of DiaSource history 

161 is returned limited by ``read_sources_months`` config parameter, w.r.t. 

162 ``visit_time``. If ``object_ids`` is empty then an empty catalog is 

163 always returned with the correct schema (columns/types). If 

164 ``object_ids`` is `None` then no filtering is performed and some of the 

165 returned records may be outside the specified region. 

166 """ 

167 raise NotImplementedError() 

168 

169 @abstractmethod 

170 def getDiaForcedSources(self, region: Region, 

171 object_ids: Optional[Iterable[int]], 

172 visit_time: dafBase.DateTime) -> Optional[pandas.DataFrame]: 

173 """Return catalog of DiaForcedSource instances from a given region. 

174 

175 Parameters 

176 ---------- 

177 region : `lsst.sphgeom.Region` 

178 Region to search for DIASources. 

179 object_ids : iterable [ `int` ], optional 

180 List of DiaObject IDs to further constrain the set of returned 

181 sources. If list is empty then empty catalog is returned with a 

182 correct schema. If `None` then returned sources are not 

183 constrained. Some implementations may not support latter case. 

184 visit_time : `lsst.daf.base.DateTime` 

185 Time of the current visit. 

186 

187 Returns 

188 ------- 

189 catalog : `pandas.DataFrame`, or `None` 

190 Catalog containing DiaSource records. `None` is returned if 

191 ``read_forced_sources_months`` configuration parameter is set to 0. 

192 

193 Raises 

194 ------ 

195 NotImplementedError 

196 May be raised by some implementations if ``object_ids`` is `None`. 

197 

198 Notes 

199 ----- 

200 This method returns DiaForcedSource catalog for a region with additional 

201 filtering based on DiaObject IDs. Only a subset of DiaSource history 

202 is returned limited by ``read_forced_sources_months`` config parameter, 

203 w.r.t. ``visit_time``. If ``object_ids`` is empty then an empty catalog 

204 is always returned with the correct schema (columns/types). If 

205 ``object_ids`` is `None` then no filtering is performed and some of the 

206 returned records may be outside the specified region. 

207 """ 

208 raise NotImplementedError() 

209 

210 @abstractmethod 

211 def getDiaObjectsHistory(self, 

212 start_time: dafBase.DateTime, 

213 end_time: dafBase.DateTime, 

214 region: Optional[Region] = None) -> pandas.DataFrame: 

215 """Returns catalog of DiaObject instances from a given time period 

216 including the history of each DiaObject. 

217 

218 Parameters 

219 ---------- 

220 start_time : `dafBase.DateTime` 

221 Starting time for DiaObject history search. DiaObject record is 

222 selected when its ``validityStart`` falls into an interval 

223 between ``start_time`` (inclusive) and ``end_time`` (exclusive). 

224 end_time : `dafBase.DateTime` 

225 Upper limit on time for DiaObject history search. 

226 region : `lsst.sphgeom.Region`, optional 

227 Region to search for DiaObjects, if not specified then whole sky 

228 is searched. If region is specified then some returned records may 

229 fall outside of this region. 

230 

231 Returns 

232 ------- 

233 catalog : `pandas.DataFrame` 

234 Catalog containing DiaObject records. 

235 

236 Notes 

237 ----- 

238 This part of API may not be very stable and can change before the 

239 implementation finalizes. 

240 """ 

241 raise NotImplementedError() 

242 

243 @abstractmethod 

244 def getDiaSourcesHistory(self, 

245 start_time: dafBase.DateTime, 

246 end_time: dafBase.DateTime, 

247 region: Optional[Region] = None) -> pandas.DataFrame: 

248 """Returns catalog of DiaSource instances from a given time period. 

249 

250 Parameters 

251 ---------- 

252 start_time : `dafBase.DateTime` 

253 Starting time for DiaSource history search. DiaSource record is 

254 selected when its ``midPointTai`` falls into an interval between 

255 ``start_time`` (inclusive) and ``end_time`` (exclusive). 

256 end_time : `dafBase.DateTime` 

257 Upper limit on time for DiaSource history search. 

258 region : `lsst.sphgeom.Region`, optional 

259 Region to search for DiaSources, if not specified then whole sky 

260 is searched. If region is specified then some returned records may 

261 fall outside of this region. 

262 

263 Returns 

264 ------- 

265 catalog : `pandas.DataFrame` 

266 Catalog containing DiaObject records. 

267 

268 Notes 

269 ----- 

270 This part of API may not be very stable and can change before the 

271 implementation finalizes. 

272 """ 

273 raise NotImplementedError() 

274 

275 @abstractmethod 

276 def getDiaForcedSourcesHistory(self, 

277 start_time: dafBase.DateTime, 

278 end_time: dafBase.DateTime, 

279 region: Optional[Region] = None) -> pandas.DataFrame: 

280 """Returns catalog of DiaForcedSource instances from a given time 

281 period. 

282 

283 Parameters 

284 ---------- 

285 start_time : `dafBase.DateTime` 

286 Starting time for DiaForcedSource history search. DiaForcedSource 

287 record is selected when its ``midPointTai`` falls into an interval 

288 between ``start_time`` (inclusive) and ``end_time`` (exclusive). 

289 end_time : `dafBase.DateTime` 

290 Upper limit on time for DiaForcedSource history search. 

291 region : `lsst.sphgeom.Region`, optional 

292 Region to search for DiaForcedSources, if not specified then whole 

293 sky is searched. If region is specified then some returned records 

294 may fall outside of this region. 

295 

296 Returns 

297 ------- 

298 catalog : `pandas.DataFrame` 

299 Catalog containing DiaObject records. 

300 

301 Notes 

302 ----- 

303 This part of API may not be very stable and can change before the 

304 implementation finalizes. Some implementations may not support region 

305 filtering, they will return records from the whole sky. 

306 """ 

307 raise NotImplementedError() 

308 

309 @abstractmethod 

310 def getSSObjects(self) -> pandas.DataFrame: 

311 """Returns catalog of SSObject instances. 

312 

313 Returns 

314 ------- 

315 catalog : `pandas.DataFrame` 

316 Catalog containing SSObject records, all existing records are 

317 returned. 

318 """ 

319 raise NotImplementedError() 

320 

321 @abstractmethod 

322 def store(self, 

323 visit_time: dafBase.DateTime, 

324 objects: pandas.DataFrame, 

325 sources: Optional[pandas.DataFrame] = None, 

326 forced_sources: Optional[pandas.DataFrame] = None) -> None: 

327 """Store all three types of catalogs in the database. 

328 

329 Parameters 

330 ---------- 

331 visit_time : `lsst.daf.base.DateTime` 

332 Time of the visit. 

333 objects : `pandas.DataFrame` 

334 Catalog with DiaObject records. 

335 sources : `pandas.DataFrame`, optional 

336 Catalog with DiaSource records. 

337 forced_sources : `pandas.DataFrame`, optional 

338 Catalog with DiaForcedSource records. 

339 

340 Notes 

341 ----- 

342 This methods takes DataFrame catalogs, their schema must be 

343 compatible with the schema of APDB table: 

344 

345 - column names must correspond to database table columns 

346 - types and units of the columns must match database definitions, 

347 no unit conversion is performed presently 

348 - columns that have default values in database schema can be 

349 omitted from catalog 

350 - this method knows how to fill interval-related columns of DiaObject 

351 (validityStart, validityEnd) they do not need to appear in a 

352 catalog 

353 - source catalogs have ``diaObjectId`` column associating sources 

354 with objects 

355 """ 

356 raise NotImplementedError() 

357 

358 @abstractmethod 

359 def storeSSObjects(self, objects: pandas.DataFrame) -> None: 

360 """Store or update SSObject catalog. 

361 

362 Parameters 

363 ---------- 

364 objects : `pandas.DataFrame` 

365 Catalog with SSObject records. 

366 

367 Notes 

368 ----- 

369 If SSObjects with matching IDs already exist in the database, their 

370 records will be updated with the information from provided records. 

371 """ 

372 raise NotImplementedError() 

373 

374 @abstractmethod 

375 def reassignDiaSources(self, idMap: Mapping[int, int]) -> None: 

376 """Associate DiaSources with SSObjects, dis-associating them 

377 from DiaObjects. 

378 

379 Parameters 

380 ---------- 

381 idMap : `Mapping` 

382 Maps DiaSource IDs to their new SSObject IDs. 

383 

384 Raises 

385 ------ 

386 ValueError 

387 Raised if DiaSource ID does not exist in the database. 

388 """ 

389 raise NotImplementedError() 

390 

391 @abstractmethod 

392 def dailyJob(self) -> None: 

393 """Implement daily activities like cleanup/vacuum. 

394 

395 What should be done during daily activities is determined by 

396 specific implementation. 

397 """ 

398 raise NotImplementedError() 

399 

400 @abstractmethod 

401 def countUnassociatedObjects(self) -> int: 

402 """Return the number of DiaObjects that have only one DiaSource 

403 associated with them. 

404 

405 Used as part of ap_verify metrics. 

406 

407 Returns 

408 ------- 

409 count : `int` 

410 Number of DiaObjects with exactly one associated DiaSource. 

411 

412 Notes 

413 ----- 

414 This method can be very inefficient or slow in some implementations. 

415 """ 

416 raise NotImplementedError() 

417 

418 @classmethod 

419 def makeField(cls, doc: str) -> ConfigurableField: 

420 """Make a `~lsst.pex.config.ConfigurableField` for Apdb. 

421 

422 Parameters 

423 ---------- 

424 doc : `str` 

425 Help text for the field. 

426 

427 Returns 

428 ------- 

429 configurableField : `lsst.pex.config.ConfigurableField` 

430 A `~lsst.pex.config.ConfigurableField` for Apdb. 

431 """ 

432 return ConfigurableField(doc=doc, target=cls)