Coverage for python/lsst/dax/apdb/apdb.py: 78%

89 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-10 10:17 +0000

1# This file is part of dax_apdb. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ["ApdbConfig", "Apdb", "ApdbInsertId", "ApdbTableData"] 

25 

26import os 

27from abc import ABC, abstractmethod 

28from collections.abc import Iterable, Mapping 

29from dataclasses import dataclass 

30from typing import Optional 

31from uuid import UUID, uuid4 

32 

33import lsst.daf.base as dafBase 

34import pandas 

35from felis.simple import Table 

36from lsst.pex.config import Config, ConfigurableField, Field 

37from lsst.sphgeom import Region 

38 

39from .apdbSchema import ApdbTables 

40 

41 

42def _data_file_name(basename: str) -> str: 

43 """Return path name of a data file in sdm_schemas package.""" 

44 return os.path.join("${SDM_SCHEMAS_DIR}", "yml", basename) 

45 

46 

47class ApdbConfig(Config): 

48 """Part of Apdb configuration common to all implementations.""" 

49 

50 read_sources_months = Field[int](doc="Number of months of history to read from DiaSource", default=12) 

51 read_forced_sources_months = Field[int]( 

52 doc="Number of months of history to read from DiaForcedSource", default=12 

53 ) 

54 schema_file = Field[str]( 

55 doc="Location of (YAML) configuration file with standard schema", default=_data_file_name("apdb.yaml") 

56 ) 

57 schema_name = Field[str](doc="Name of the schema in YAML configuration file.", default="ApdbSchema") 

58 extra_schema_file = Field[str]( 

59 doc="Location of (YAML) configuration file with extra schema, " 

60 "definitions in this file are merged with the definitions in " 

61 "'schema_file', extending or replacing parts of the schema.", 

62 default=None, 

63 optional=True, 

64 deprecated="This field is deprecated, its value is not used.", 

65 ) 

66 use_insert_id = Field[bool]( 

67 doc=( 

68 "If True (default), make and fill additional tables used for getHistory methods. " 

69 "Databases created with earlier versions of APDB may not have these tables, " 

70 "and corresponding methods will not work for them." 

71 ), 

72 default=True, 

73 ) 

74 

75 

76class ApdbTableData(ABC): 

77 """Abstract class for representing table data.""" 

78 

79 @abstractmethod 

80 def column_names(self) -> list[str]: 

81 """Return ordered sequence of column names in the table. 

82 

83 Returns 

84 ------- 

85 names : `list` [`str`] 

86 Column names. 

87 """ 

88 raise NotImplementedError() 

89 

90 @abstractmethod 

91 def rows(self) -> Iterable[tuple]: 

92 """Return table rows, each row is a tuple of values. 

93 

94 Returns 

95 ------- 

96 rows : `iterable` [`tuple`] 

97 Iterable of tuples. 

98 """ 

99 raise NotImplementedError() 

100 

101 

102@dataclass(frozen=True) 

103class ApdbInsertId: 

104 """Class used to identify single insert operation. 

105 

106 Instances of this class are used to identify the units of transfer from 

107 APDB to PPDB. Usually single `ApdbInsertId` corresponds to a single call to 

108 `store` method. 

109 """ 

110 

111 id: UUID 

112 

113 @classmethod 

114 def new_insert_id(cls) -> ApdbInsertId: 

115 """Generate new unique insert identifier.""" 

116 return ApdbInsertId(id=uuid4()) 

117 

118 

119class Apdb(ABC): 

120 """Abstract interface for APDB.""" 

121 

122 ConfigClass = ApdbConfig 

123 

124 @abstractmethod 

125 def tableDef(self, table: ApdbTables) -> Optional[Table]: 

126 """Return table schema definition for a given table. 

127 

128 Parameters 

129 ---------- 

130 table : `ApdbTables` 

131 One of the known APDB tables. 

132 

133 Returns 

134 ------- 

135 tableSchema : `felis.simple.Table` or `None` 

136 Table schema description, `None` is returned if table is not 

137 defined by this implementation. 

138 """ 

139 raise NotImplementedError() 

140 

141 @abstractmethod 

142 def makeSchema(self, drop: bool = False) -> None: 

143 """Create or re-create whole database schema. 

144 

145 Parameters 

146 ---------- 

147 drop : `bool` 

148 If True then drop all tables before creating new ones. 

149 """ 

150 raise NotImplementedError() 

151 

152 @abstractmethod 

153 def getDiaObjects(self, region: Region) -> pandas.DataFrame: 

154 """Returns catalog of DiaObject instances from a given region. 

155 

156 This method returns only the last version of each DiaObject. Some 

157 records in a returned catalog may be outside the specified region, it 

158 is up to a client to ignore those records or cleanup the catalog before 

159 futher use. 

160 

161 Parameters 

162 ---------- 

163 region : `lsst.sphgeom.Region` 

164 Region to search for DIAObjects. 

165 

166 Returns 

167 ------- 

168 catalog : `pandas.DataFrame` 

169 Catalog containing DiaObject records for a region that may be a 

170 superset of the specified region. 

171 """ 

172 raise NotImplementedError() 

173 

174 @abstractmethod 

175 def getDiaSources( 

176 self, region: Region, object_ids: Optional[Iterable[int]], visit_time: dafBase.DateTime 

177 ) -> Optional[pandas.DataFrame]: 

178 """Return catalog of DiaSource instances from a given region. 

179 

180 Parameters 

181 ---------- 

182 region : `lsst.sphgeom.Region` 

183 Region to search for DIASources. 

184 object_ids : iterable [ `int` ], optional 

185 List of DiaObject IDs to further constrain the set of returned 

186 sources. If `None` then returned sources are not constrained. If 

187 list is empty then empty catalog is returned with a correct 

188 schema. 

189 visit_time : `lsst.daf.base.DateTime` 

190 Time of the current visit. 

191 

192 Returns 

193 ------- 

194 catalog : `pandas.DataFrame`, or `None` 

195 Catalog containing DiaSource records. `None` is returned if 

196 ``read_sources_months`` configuration parameter is set to 0. 

197 

198 Notes 

199 ----- 

200 This method returns DiaSource catalog for a region with additional 

201 filtering based on DiaObject IDs. Only a subset of DiaSource history 

202 is returned limited by ``read_sources_months`` config parameter, w.r.t. 

203 ``visit_time``. If ``object_ids`` is empty then an empty catalog is 

204 always returned with the correct schema (columns/types). If 

205 ``object_ids`` is `None` then no filtering is performed and some of the 

206 returned records may be outside the specified region. 

207 """ 

208 raise NotImplementedError() 

209 

210 @abstractmethod 

211 def getDiaForcedSources( 

212 self, region: Region, object_ids: Optional[Iterable[int]], visit_time: dafBase.DateTime 

213 ) -> Optional[pandas.DataFrame]: 

214 """Return catalog of DiaForcedSource instances from a given region. 

215 

216 Parameters 

217 ---------- 

218 region : `lsst.sphgeom.Region` 

219 Region to search for DIASources. 

220 object_ids : iterable [ `int` ], optional 

221 List of DiaObject IDs to further constrain the set of returned 

222 sources. If list is empty then empty catalog is returned with a 

223 correct schema. If `None` then returned sources are not 

224 constrained. Some implementations may not support latter case. 

225 visit_time : `lsst.daf.base.DateTime` 

226 Time of the current visit. 

227 

228 Returns 

229 ------- 

230 catalog : `pandas.DataFrame`, or `None` 

231 Catalog containing DiaSource records. `None` is returned if 

232 ``read_forced_sources_months`` configuration parameter is set to 0. 

233 

234 Raises 

235 ------ 

236 NotImplementedError 

237 May be raised by some implementations if ``object_ids`` is `None`. 

238 

239 Notes 

240 ----- 

241 This method returns DiaForcedSource catalog for a region with additional 

242 filtering based on DiaObject IDs. Only a subset of DiaSource history 

243 is returned limited by ``read_forced_sources_months`` config parameter, 

244 w.r.t. ``visit_time``. If ``object_ids`` is empty then an empty catalog 

245 is always returned with the correct schema (columns/types). If 

246 ``object_ids`` is `None` then no filtering is performed and some of the 

247 returned records may be outside the specified region. 

248 """ 

249 raise NotImplementedError() 

250 

251 @abstractmethod 

252 def getInsertIds(self) -> list[ApdbInsertId] | None: 

253 """Return collection of insert identifiers known to the database. 

254 

255 Returns 

256 ------- 

257 ids : `list` [`ApdbInsertId`] or `None` 

258 List of identifiers, they may be time-ordered if database supports 

259 ordering. `None` is returned if database is not configured to store 

260 insert identifiers. 

261 """ 

262 raise NotImplementedError() 

263 

264 @abstractmethod 

265 def deleteInsertIds(self, ids: Iterable[ApdbInsertId]) -> None: 

266 """Remove insert identifiers from the database. 

267 

268 Parameters 

269 ------- 

270 ids : `iterable` [`ApdbInsertId`] 

271 Insert identifiers, can include items returned from `getInsertIds`. 

272 

273 Notes 

274 ----- 

275 This method causes Apdb to forget about specified identifiers. If there 

276 are any auxiliary data associated with the identifiers, it is also 

277 removed from database (but data in regular tables is not removed). 

278 This method should be called after successful transfer of data from 

279 APDB to PPDB to free space used by history. 

280 """ 

281 raise NotImplementedError() 

282 

283 @abstractmethod 

284 def getDiaObjectsHistory(self, ids: Iterable[ApdbInsertId]) -> ApdbTableData: 

285 """Returns catalog of DiaObject instances from a given time period 

286 including the history of each DiaObject. 

287 

288 Parameters 

289 ---------- 

290 ids : `iterable` [`ApdbInsertId`] 

291 Insert identifiers, can include items returned from `getInsertIds`. 

292 

293 Returns 

294 ------- 

295 data : `ApdbTableData` 

296 Catalog containing DiaObject records. In addition to all regular 

297 columns it will contain ``insert_id`` column. 

298 

299 Notes 

300 ----- 

301 This part of API may not be very stable and can change before the 

302 implementation finalizes. 

303 """ 

304 raise NotImplementedError() 

305 

306 @abstractmethod 

307 def getDiaSourcesHistory(self, ids: Iterable[ApdbInsertId]) -> ApdbTableData: 

308 """Returns catalog of DiaSource instances from a given time period. 

309 

310 Parameters 

311 ---------- 

312 ids : `iterable` [`ApdbInsertId`] 

313 Insert identifiers, can include items returned from `getInsertIds`. 

314 

315 Returns 

316 ------- 

317 data : `ApdbTableData` 

318 Catalog containing DiaSource records. In addition to all regular 

319 columns it will contain ``insert_id`` column. 

320 

321 Notes 

322 ----- 

323 This part of API may not be very stable and can change before the 

324 implementation finalizes. 

325 """ 

326 raise NotImplementedError() 

327 

328 @abstractmethod 

329 def getDiaForcedSourcesHistory(self, ids: Iterable[ApdbInsertId]) -> ApdbTableData: 

330 """Returns catalog of DiaForcedSource instances from a given time 

331 period. 

332 

333 Parameters 

334 ---------- 

335 ids : `iterable` [`ApdbInsertId`] 

336 Insert identifiers, can include items returned from `getInsertIds`. 

337 

338 Returns 

339 ------- 

340 data : `ApdbTableData` 

341 Catalog containing DiaForcedSource records. In addition to all 

342 regular columns it will contain ``insert_id`` column. 

343 

344 Notes 

345 ----- 

346 This part of API may not be very stable and can change before the 

347 implementation finalizes. 

348 """ 

349 raise NotImplementedError() 

350 

351 @abstractmethod 

352 def getSSObjects(self) -> pandas.DataFrame: 

353 """Returns catalog of SSObject instances. 

354 

355 Returns 

356 ------- 

357 catalog : `pandas.DataFrame` 

358 Catalog containing SSObject records, all existing records are 

359 returned. 

360 """ 

361 raise NotImplementedError() 

362 

363 @abstractmethod 

364 def store( 

365 self, 

366 visit_time: dafBase.DateTime, 

367 objects: pandas.DataFrame, 

368 sources: Optional[pandas.DataFrame] = None, 

369 forced_sources: Optional[pandas.DataFrame] = None, 

370 ) -> None: 

371 """Store all three types of catalogs in the database. 

372 

373 Parameters 

374 ---------- 

375 visit_time : `lsst.daf.base.DateTime` 

376 Time of the visit. 

377 objects : `pandas.DataFrame` 

378 Catalog with DiaObject records. 

379 sources : `pandas.DataFrame`, optional 

380 Catalog with DiaSource records. 

381 forced_sources : `pandas.DataFrame`, optional 

382 Catalog with DiaForcedSource records. 

383 

384 Notes 

385 ----- 

386 This methods takes DataFrame catalogs, their schema must be 

387 compatible with the schema of APDB table: 

388 

389 - column names must correspond to database table columns 

390 - types and units of the columns must match database definitions, 

391 no unit conversion is performed presently 

392 - columns that have default values in database schema can be 

393 omitted from catalog 

394 - this method knows how to fill interval-related columns of DiaObject 

395 (validityStart, validityEnd) they do not need to appear in a 

396 catalog 

397 - source catalogs have ``diaObjectId`` column associating sources 

398 with objects 

399 """ 

400 raise NotImplementedError() 

401 

402 @abstractmethod 

403 def storeSSObjects(self, objects: pandas.DataFrame) -> None: 

404 """Store or update SSObject catalog. 

405 

406 Parameters 

407 ---------- 

408 objects : `pandas.DataFrame` 

409 Catalog with SSObject records. 

410 

411 Notes 

412 ----- 

413 If SSObjects with matching IDs already exist in the database, their 

414 records will be updated with the information from provided records. 

415 """ 

416 raise NotImplementedError() 

417 

418 @abstractmethod 

419 def reassignDiaSources(self, idMap: Mapping[int, int]) -> None: 

420 """Associate DiaSources with SSObjects, dis-associating them 

421 from DiaObjects. 

422 

423 Parameters 

424 ---------- 

425 idMap : `Mapping` 

426 Maps DiaSource IDs to their new SSObject IDs. 

427 

428 Raises 

429 ------ 

430 ValueError 

431 Raised if DiaSource ID does not exist in the database. 

432 """ 

433 raise NotImplementedError() 

434 

435 @abstractmethod 

436 def dailyJob(self) -> None: 

437 """Implement daily activities like cleanup/vacuum. 

438 

439 What should be done during daily activities is determined by 

440 specific implementation. 

441 """ 

442 raise NotImplementedError() 

443 

444 @abstractmethod 

445 def countUnassociatedObjects(self) -> int: 

446 """Return the number of DiaObjects that have only one DiaSource 

447 associated with them. 

448 

449 Used as part of ap_verify metrics. 

450 

451 Returns 

452 ------- 

453 count : `int` 

454 Number of DiaObjects with exactly one associated DiaSource. 

455 

456 Notes 

457 ----- 

458 This method can be very inefficient or slow in some implementations. 

459 """ 

460 raise NotImplementedError() 

461 

462 @classmethod 

463 def makeField(cls, doc: str) -> ConfigurableField: 

464 """Make a `~lsst.pex.config.ConfigurableField` for Apdb. 

465 

466 Parameters 

467 ---------- 

468 doc : `str` 

469 Help text for the field. 

470 

471 Returns 

472 ------- 

473 configurableField : `lsst.pex.config.ConfigurableField` 

474 A `~lsst.pex.config.ConfigurableField` for Apdb. 

475 """ 

476 return ConfigurableField(doc=doc, target=cls)