Coverage for python/lsst/dax/apdb/apdb.py: 77%

66 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-10-22 01:55 -0700

1# This file is part of dax_apdb. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ["ApdbConfig", "Apdb"] 

25 

26import os 

27from abc import ABC, abstractmethod 

28from typing import Iterable, Mapping, Optional 

29 

30import lsst.daf.base as dafBase 

31import pandas 

32from felis.simple import Table 

33from lsst.pex.config import Config, ConfigurableField, Field 

34from lsst.sphgeom import Region 

35 

36from .apdbSchema import ApdbTables 

37 

38 

39def _data_file_name(basename: str) -> str: 

40 """Return path name of a data file in sdm_schemas package. 

41 """ 

42 return os.path.join("${SDM_SCHEMAS_DIR}", "yml", basename) 

43 

44 

45class ApdbConfig(Config): 

46 """Part of Apdb configuration common to all implementations. 

47 """ 

48 read_sources_months = Field[int]( 

49 doc="Number of months of history to read from DiaSource", 

50 default=12 

51 ) 

52 read_forced_sources_months = Field[int]( 

53 doc="Number of months of history to read from DiaForcedSource", 

54 default=12 

55 ) 

56 schema_file = Field[str]( 

57 doc="Location of (YAML) configuration file with standard schema", 

58 default=_data_file_name("apdb.yaml") 

59 ) 

60 schema_name = Field[str]( 

61 doc="Name of the schema in YAML configuration file.", 

62 default="ApdbSchema" 

63 ) 

64 extra_schema_file = Field[str]( 

65 doc="Location of (YAML) configuration file with extra schema, " 

66 "definitions in this file are merged with the definitions in " 

67 "'schema_file', extending or replacing parts of the schema.", 

68 default=None, 

69 optional=True, 

70 deprecated="This field is deprecated, its value is not used." 

71 ) 

72 

73 

74class Apdb(ABC): 

75 """Abstract interface for APDB. 

76 """ 

77 

78 ConfigClass = ApdbConfig 

79 

80 @abstractmethod 

81 def tableDef(self, table: ApdbTables) -> Optional[Table]: 

82 """Return table schema definition for a given table. 

83 

84 Parameters 

85 ---------- 

86 table : `ApdbTables` 

87 One of the known APDB tables. 

88 

89 Returns 

90 ------- 

91 tableSchema : `felis.simple.Table` or `None` 

92 Table schema description, `None` is returned if table is not 

93 defined by this implementation. 

94 """ 

95 raise NotImplementedError() 

96 

97 @abstractmethod 

98 def makeSchema(self, drop: bool = False) -> None: 

99 """Create or re-create whole database schema. 

100 

101 Parameters 

102 ---------- 

103 drop : `bool` 

104 If True then drop all tables before creating new ones. 

105 """ 

106 raise NotImplementedError() 

107 

108 @abstractmethod 

109 def getDiaObjects(self, region: Region) -> pandas.DataFrame: 

110 """Returns catalog of DiaObject instances from a given region. 

111 

112 This method returns only the last version of each DiaObject. Some 

113 records in a returned catalog may be outside the specified region, it 

114 is up to a client to ignore those records or cleanup the catalog before 

115 futher use. 

116 

117 Parameters 

118 ---------- 

119 region : `lsst.sphgeom.Region` 

120 Region to search for DIAObjects. 

121 

122 Returns 

123 ------- 

124 catalog : `pandas.DataFrame` 

125 Catalog containing DiaObject records for a region that may be a 

126 superset of the specified region. 

127 """ 

128 raise NotImplementedError() 

129 

130 @abstractmethod 

131 def getDiaSources(self, region: Region, 

132 object_ids: Optional[Iterable[int]], 

133 visit_time: dafBase.DateTime) -> Optional[pandas.DataFrame]: 

134 """Return catalog of DiaSource instances from a given region. 

135 

136 Parameters 

137 ---------- 

138 region : `lsst.sphgeom.Region` 

139 Region to search for DIASources. 

140 object_ids : iterable [ `int` ], optional 

141 List of DiaObject IDs to further constrain the set of returned 

142 sources. If `None` then returned sources are not constrained. If 

143 list is empty then empty catalog is returned with a correct 

144 schema. 

145 visit_time : `lsst.daf.base.DateTime` 

146 Time of the current visit. 

147 

148 Returns 

149 ------- 

150 catalog : `pandas.DataFrame`, or `None` 

151 Catalog containing DiaSource records. `None` is returned if 

152 ``read_sources_months`` configuration parameter is set to 0. 

153 

154 Notes 

155 ----- 

156 This method returns DiaSource catalog for a region with additional 

157 filtering based on DiaObject IDs. Only a subset of DiaSource history 

158 is returned limited by ``read_sources_months`` config parameter, w.r.t. 

159 ``visit_time``. If ``object_ids`` is empty then an empty catalog is 

160 always returned with the correct schema (columns/types). If 

161 ``object_ids`` is `None` then no filtering is performed and some of the 

162 returned records may be outside the specified region. 

163 """ 

164 raise NotImplementedError() 

165 

166 @abstractmethod 

167 def getDiaForcedSources(self, region: Region, 

168 object_ids: Optional[Iterable[int]], 

169 visit_time: dafBase.DateTime) -> Optional[pandas.DataFrame]: 

170 """Return catalog of DiaForcedSource instances from a given region. 

171 

172 Parameters 

173 ---------- 

174 region : `lsst.sphgeom.Region` 

175 Region to search for DIASources. 

176 object_ids : iterable [ `int` ], optional 

177 List of DiaObject IDs to further constrain the set of returned 

178 sources. If list is empty then empty catalog is returned with a 

179 correct schema. If `None` then returned sources are not 

180 constrained. Some implementations may not support latter case. 

181 visit_time : `lsst.daf.base.DateTime` 

182 Time of the current visit. 

183 

184 Returns 

185 ------- 

186 catalog : `pandas.DataFrame`, or `None` 

187 Catalog containing DiaSource records. `None` is returned if 

188 ``read_forced_sources_months`` configuration parameter is set to 0. 

189 

190 Raises 

191 ------ 

192 NotImplementedError 

193 May be raised by some implementations if ``object_ids`` is `None`. 

194 

195 Notes 

196 ----- 

197 This method returns DiaForcedSource catalog for a region with additional 

198 filtering based on DiaObject IDs. Only a subset of DiaSource history 

199 is returned limited by ``read_forced_sources_months`` config parameter, 

200 w.r.t. ``visit_time``. If ``object_ids`` is empty then an empty catalog 

201 is always returned with the correct schema (columns/types). If 

202 ``object_ids`` is `None` then no filtering is performed and some of the 

203 returned records may be outside the specified region. 

204 """ 

205 raise NotImplementedError() 

206 

207 @abstractmethod 

208 def getDiaObjectsHistory(self, 

209 start_time: dafBase.DateTime, 

210 end_time: dafBase.DateTime, 

211 region: Optional[Region] = None) -> pandas.DataFrame: 

212 """Returns catalog of DiaObject instances from a given time period 

213 including the history of each DiaObject. 

214 

215 Parameters 

216 ---------- 

217 start_time : `dafBase.DateTime` 

218 Starting time for DiaObject history search. DiaObject record is 

219 selected when its ``validityStart`` falls into an interval 

220 between ``start_time`` (inclusive) and ``end_time`` (exclusive). 

221 end_time : `dafBase.DateTime` 

222 Upper limit on time for DiaObject history search. 

223 region : `lsst.sphgeom.Region`, optional 

224 Region to search for DiaObjects, if not specified then whole sky 

225 is searched. If region is specified then some returned records may 

226 fall outside of this region. 

227 

228 Returns 

229 ------- 

230 catalog : `pandas.DataFrame` 

231 Catalog containing DiaObject records. 

232 

233 Notes 

234 ----- 

235 This part of API may not be very stable and can change before the 

236 implementation finalizes. 

237 """ 

238 raise NotImplementedError() 

239 

240 @abstractmethod 

241 def getDiaSourcesHistory(self, 

242 start_time: dafBase.DateTime, 

243 end_time: dafBase.DateTime, 

244 region: Optional[Region] = None) -> pandas.DataFrame: 

245 """Returns catalog of DiaSource instances from a given time period. 

246 

247 Parameters 

248 ---------- 

249 start_time : `dafBase.DateTime` 

250 Starting time for DiaSource history search. DiaSource record is 

251 selected when its ``midPointTai`` falls into an interval between 

252 ``start_time`` (inclusive) and ``end_time`` (exclusive). 

253 end_time : `dafBase.DateTime` 

254 Upper limit on time for DiaSource history search. 

255 region : `lsst.sphgeom.Region`, optional 

256 Region to search for DiaSources, if not specified then whole sky 

257 is searched. If region is specified then some returned records may 

258 fall outside of this region. 

259 

260 Returns 

261 ------- 

262 catalog : `pandas.DataFrame` 

263 Catalog containing DiaObject records. 

264 

265 Notes 

266 ----- 

267 This part of API may not be very stable and can change before the 

268 implementation finalizes. 

269 """ 

270 raise NotImplementedError() 

271 

272 @abstractmethod 

273 def getDiaForcedSourcesHistory(self, 

274 start_time: dafBase.DateTime, 

275 end_time: dafBase.DateTime, 

276 region: Optional[Region] = None) -> pandas.DataFrame: 

277 """Returns catalog of DiaForcedSource instances from a given time 

278 period. 

279 

280 Parameters 

281 ---------- 

282 start_time : `dafBase.DateTime` 

283 Starting time for DiaForcedSource history search. DiaForcedSource 

284 record is selected when its ``midPointTai`` falls into an interval 

285 between ``start_time`` (inclusive) and ``end_time`` (exclusive). 

286 end_time : `dafBase.DateTime` 

287 Upper limit on time for DiaForcedSource history search. 

288 region : `lsst.sphgeom.Region`, optional 

289 Region to search for DiaForcedSources, if not specified then whole 

290 sky is searched. If region is specified then some returned records 

291 may fall outside of this region. 

292 

293 Returns 

294 ------- 

295 catalog : `pandas.DataFrame` 

296 Catalog containing DiaObject records. 

297 

298 Notes 

299 ----- 

300 This part of API may not be very stable and can change before the 

301 implementation finalizes. Some implementations may not support region 

302 filtering, they will return records from the whole sky. 

303 """ 

304 raise NotImplementedError() 

305 

306 @abstractmethod 

307 def getSSObjects(self) -> pandas.DataFrame: 

308 """Returns catalog of SSObject instances. 

309 

310 Returns 

311 ------- 

312 catalog : `pandas.DataFrame` 

313 Catalog containing SSObject records, all existing records are 

314 returned. 

315 """ 

316 raise NotImplementedError() 

317 

318 @abstractmethod 

319 def store(self, 

320 visit_time: dafBase.DateTime, 

321 objects: pandas.DataFrame, 

322 sources: Optional[pandas.DataFrame] = None, 

323 forced_sources: Optional[pandas.DataFrame] = None) -> None: 

324 """Store all three types of catalogs in the database. 

325 

326 Parameters 

327 ---------- 

328 visit_time : `lsst.daf.base.DateTime` 

329 Time of the visit. 

330 objects : `pandas.DataFrame` 

331 Catalog with DiaObject records. 

332 sources : `pandas.DataFrame`, optional 

333 Catalog with DiaSource records. 

334 forced_sources : `pandas.DataFrame`, optional 

335 Catalog with DiaForcedSource records. 

336 

337 Notes 

338 ----- 

339 This methods takes DataFrame catalogs, their schema must be 

340 compatible with the schema of APDB table: 

341 

342 - column names must correspond to database table columns 

343 - types and units of the columns must match database definitions, 

344 no unit conversion is performed presently 

345 - columns that have default values in database schema can be 

346 omitted from catalog 

347 - this method knows how to fill interval-related columns of DiaObject 

348 (validityStart, validityEnd) they do not need to appear in a 

349 catalog 

350 - source catalogs have ``diaObjectId`` column associating sources 

351 with objects 

352 """ 

353 raise NotImplementedError() 

354 

355 @abstractmethod 

356 def storeSSObjects(self, objects: pandas.DataFrame) -> None: 

357 """Store or update SSObject catalog. 

358 

359 Parameters 

360 ---------- 

361 objects : `pandas.DataFrame` 

362 Catalog with SSObject records. 

363 

364 Notes 

365 ----- 

366 If SSObjects with matching IDs already exist in the database, their 

367 records will be updated with the information from provided records. 

368 """ 

369 raise NotImplementedError() 

370 

371 @abstractmethod 

372 def reassignDiaSources(self, idMap: Mapping[int, int]) -> None: 

373 """Associate DiaSources with SSObjects, dis-associating them 

374 from DiaObjects. 

375 

376 Parameters 

377 ---------- 

378 idMap : `Mapping` 

379 Maps DiaSource IDs to their new SSObject IDs. 

380 

381 Raises 

382 ------ 

383 ValueError 

384 Raised if DiaSource ID does not exist in the database. 

385 """ 

386 raise NotImplementedError() 

387 

388 @abstractmethod 

389 def dailyJob(self) -> None: 

390 """Implement daily activities like cleanup/vacuum. 

391 

392 What should be done during daily activities is determined by 

393 specific implementation. 

394 """ 

395 raise NotImplementedError() 

396 

397 @abstractmethod 

398 def countUnassociatedObjects(self) -> int: 

399 """Return the number of DiaObjects that have only one DiaSource 

400 associated with them. 

401 

402 Used as part of ap_verify metrics. 

403 

404 Returns 

405 ------- 

406 count : `int` 

407 Number of DiaObjects with exactly one associated DiaSource. 

408 

409 Notes 

410 ----- 

411 This method can be very inefficient or slow in some implementations. 

412 """ 

413 raise NotImplementedError() 

414 

415 @classmethod 

416 def makeField(cls, doc: str) -> ConfigurableField: 

417 """Make a `~lsst.pex.config.ConfigurableField` for Apdb. 

418 

419 Parameters 

420 ---------- 

421 doc : `str` 

422 Help text for the field. 

423 

424 Returns 

425 ------- 

426 configurableField : `lsst.pex.config.ConfigurableField` 

427 A `~lsst.pex.config.ConfigurableField` for Apdb. 

428 """ 

429 return ConfigurableField(doc=doc, target=cls)