Coverage for python/lsst/dax/apdb/apdb.py: 85%

89 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-10-12 09:46 +0000

1# This file is part of dax_apdb. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ["ApdbConfig", "Apdb", "ApdbInsertId", "ApdbTableData"] 

25 

26import os 

27from abc import ABC, abstractmethod 

28from collections.abc import Iterable, Mapping 

29from dataclasses import dataclass 

30from typing import Optional 

31from uuid import UUID, uuid4 

32 

33import lsst.daf.base as dafBase 

34import pandas 

35from felis.simple import Table 

36from lsst.pex.config import Config, ConfigurableField, Field 

37from lsst.sphgeom import Region 

38 

39from .apdbSchema import ApdbTables 

40 

41 

42def _data_file_name(basename: str) -> str: 

43 """Return path name of a data file in sdm_schemas package.""" 

44 return os.path.join("${SDM_SCHEMAS_DIR}", "yml", basename) 

45 

46 

47class ApdbConfig(Config): 

48 """Part of Apdb configuration common to all implementations.""" 

49 

50 read_sources_months = Field[int](doc="Number of months of history to read from DiaSource", default=12) 

51 read_forced_sources_months = Field[int]( 

52 doc="Number of months of history to read from DiaForcedSource", default=12 

53 ) 

54 schema_file = Field[str]( 

55 doc="Location of (YAML) configuration file with standard schema", default=_data_file_name("apdb.yaml") 

56 ) 

57 schema_name = Field[str](doc="Name of the schema in YAML configuration file.", default="ApdbSchema") 

58 extra_schema_file = Field[str]( 

59 doc="Location of (YAML) configuration file with extra schema, " 

60 "definitions in this file are merged with the definitions in " 

61 "'schema_file', extending or replacing parts of the schema.", 

62 default=None, 

63 optional=True, 

64 deprecated="This field is deprecated, its value is not used.", 

65 ) 

66 use_insert_id = Field[bool]( 

67 doc=( 

68 "If True, make and fill additional tables used for getHistory methods. " 

69 "Databases created with earlier versions of APDB may not have these tables, " 

70 "and corresponding methods will not work for them." 

71 ), 

72 default=False, 

73 ) 

74 

75 

76class ApdbTableData(ABC): 

77 """Abstract class for representing table data.""" 

78 

79 @abstractmethod 

80 def column_names(self) -> list[str]: 

81 """Return ordered sequence of column names in the table. 

82 

83 Returns 

84 ------- 

85 names : `list` [`str`] 

86 Column names. 

87 """ 

88 raise NotImplementedError() 

89 

90 @abstractmethod 

91 def rows(self) -> Iterable[tuple]: 

92 """Return table rows, each row is a tuple of values. 

93 

94 Returns 

95 ------- 

96 rows : `iterable` [`tuple`] 

97 Iterable of tuples. 

98 """ 

99 raise NotImplementedError() 

100 

101 

102@dataclass(frozen=True) 

103class ApdbInsertId: 

104 """Class used to identify single insert operation. 

105 

106 Instances of this class are used to identify the units of transfer from 

107 APDB to PPDB. Usually single `ApdbInsertId` corresponds to a single call to 

108 `store` method. 

109 """ 

110 

111 id: UUID 

112 

113 @classmethod 

114 def new_insert_id(cls) -> ApdbInsertId: 

115 """Generate new unique insert identifier.""" 

116 return ApdbInsertId(id=uuid4()) 

117 

118 

119class Apdb(ABC): 

120 """Abstract interface for APDB.""" 

121 

122 ConfigClass = ApdbConfig 

123 

124 @abstractmethod 

125 def tableDef(self, table: ApdbTables) -> Optional[Table]: 

126 """Return table schema definition for a given table. 

127 

128 Parameters 

129 ---------- 

130 table : `ApdbTables` 

131 One of the known APDB tables. 

132 

133 Returns 

134 ------- 

135 tableSchema : `felis.simple.Table` or `None` 

136 Table schema description, `None` is returned if table is not 

137 defined by this implementation. 

138 """ 

139 raise NotImplementedError() 

140 

141 @abstractmethod 

142 def makeSchema(self, drop: bool = False) -> None: 

143 """Create or re-create whole database schema. 

144 

145 Parameters 

146 ---------- 

147 drop : `bool` 

148 If True then drop all tables before creating new ones. 

149 """ 

150 raise NotImplementedError() 

151 

152 @abstractmethod 

153 def getDiaObjects(self, region: Region) -> pandas.DataFrame: 

154 """Return catalog of DiaObject instances from a given region. 

155 

156 This method returns only the last version of each DiaObject. Some 

157 records in a returned catalog may be outside the specified region, it 

158 is up to a client to ignore those records or cleanup the catalog before 

159 futher use. 

160 

161 Parameters 

162 ---------- 

163 region : `lsst.sphgeom.Region` 

164 Region to search for DIAObjects. 

165 

166 Returns 

167 ------- 

168 catalog : `pandas.DataFrame` 

169 Catalog containing DiaObject records for a region that may be a 

170 superset of the specified region. 

171 """ 

172 raise NotImplementedError() 

173 

174 @abstractmethod 

175 def getDiaSources( 

176 self, region: Region, object_ids: Optional[Iterable[int]], visit_time: dafBase.DateTime 

177 ) -> Optional[pandas.DataFrame]: 

178 """Return catalog of DiaSource instances from a given region. 

179 

180 Parameters 

181 ---------- 

182 region : `lsst.sphgeom.Region` 

183 Region to search for DIASources. 

184 object_ids : iterable [ `int` ], optional 

185 List of DiaObject IDs to further constrain the set of returned 

186 sources. If `None` then returned sources are not constrained. If 

187 list is empty then empty catalog is returned with a correct 

188 schema. 

189 visit_time : `lsst.daf.base.DateTime` 

190 Time of the current visit. 

191 

192 Returns 

193 ------- 

194 catalog : `pandas.DataFrame`, or `None` 

195 Catalog containing DiaSource records. `None` is returned if 

196 ``read_sources_months`` configuration parameter is set to 0. 

197 

198 Notes 

199 ----- 

200 This method returns DiaSource catalog for a region with additional 

201 filtering based on DiaObject IDs. Only a subset of DiaSource history 

202 is returned limited by ``read_sources_months`` config parameter, w.r.t. 

203 ``visit_time``. If ``object_ids`` is empty then an empty catalog is 

204 always returned with the correct schema (columns/types). If 

205 ``object_ids`` is `None` then no filtering is performed and some of the 

206 returned records may be outside the specified region. 

207 """ 

208 raise NotImplementedError() 

209 

210 @abstractmethod 

211 def getDiaForcedSources( 

212 self, region: Region, object_ids: Optional[Iterable[int]], visit_time: dafBase.DateTime 

213 ) -> Optional[pandas.DataFrame]: 

214 """Return catalog of DiaForcedSource instances from a given region. 

215 

216 Parameters 

217 ---------- 

218 region : `lsst.sphgeom.Region` 

219 Region to search for DIASources. 

220 object_ids : iterable [ `int` ], optional 

221 List of DiaObject IDs to further constrain the set of returned 

222 sources. If list is empty then empty catalog is returned with a 

223 correct schema. If `None` then returned sources are not 

224 constrained. Some implementations may not support latter case. 

225 visit_time : `lsst.daf.base.DateTime` 

226 Time of the current visit. 

227 

228 Returns 

229 ------- 

230 catalog : `pandas.DataFrame`, or `None` 

231 Catalog containing DiaSource records. `None` is returned if 

232 ``read_forced_sources_months`` configuration parameter is set to 0. 

233 

234 Raises 

235 ------ 

236 NotImplementedError 

237 May be raised by some implementations if ``object_ids`` is `None`. 

238 

239 Notes 

240 ----- 

241 This method returns DiaForcedSource catalog for a region with 

242 additional filtering based on DiaObject IDs. Only a subset of DiaSource 

243 history is returned limited by ``read_forced_sources_months`` config 

244 parameter, w.r.t. ``visit_time``. If ``object_ids`` is empty then an 

245 empty catalog is always returned with the correct schema 

246 (columns/types). If ``object_ids`` is `None` then no filtering is 

247 performed and some of the returned records may be outside the specified 

248 region. 

249 """ 

250 raise NotImplementedError() 

251 

252 @abstractmethod 

253 def getInsertIds(self) -> list[ApdbInsertId] | None: 

254 """Return collection of insert identifiers known to the database. 

255 

256 Returns 

257 ------- 

258 ids : `list` [`ApdbInsertId`] or `None` 

259 List of identifiers, they may be time-ordered if database supports 

260 ordering. `None` is returned if database is not configured to store 

261 insert identifiers. 

262 """ 

263 raise NotImplementedError() 

264 

265 @abstractmethod 

266 def deleteInsertIds(self, ids: Iterable[ApdbInsertId]) -> None: 

267 """Remove insert identifiers from the database. 

268 

269 Parameters 

270 ---------- 

271 ids : `iterable` [`ApdbInsertId`] 

272 Insert identifiers, can include items returned from `getInsertIds`. 

273 

274 Notes 

275 ----- 

276 This method causes Apdb to forget about specified identifiers. If there 

277 are any auxiliary data associated with the identifiers, it is also 

278 removed from database (but data in regular tables is not removed). 

279 This method should be called after successful transfer of data from 

280 APDB to PPDB to free space used by history. 

281 """ 

282 raise NotImplementedError() 

283 

284 @abstractmethod 

285 def getDiaObjectsHistory(self, ids: Iterable[ApdbInsertId]) -> ApdbTableData: 

286 """Return catalog of DiaObject instances from a given time period 

287 including the history of each DiaObject. 

288 

289 Parameters 

290 ---------- 

291 ids : `iterable` [`ApdbInsertId`] 

292 Insert identifiers, can include items returned from `getInsertIds`. 

293 

294 Returns 

295 ------- 

296 data : `ApdbTableData` 

297 Catalog containing DiaObject records. In addition to all regular 

298 columns it will contain ``insert_id`` column. 

299 

300 Notes 

301 ----- 

302 This part of API may not be very stable and can change before the 

303 implementation finalizes. 

304 """ 

305 raise NotImplementedError() 

306 

307 @abstractmethod 

308 def getDiaSourcesHistory(self, ids: Iterable[ApdbInsertId]) -> ApdbTableData: 

309 """Return catalog of DiaSource instances from a given time period. 

310 

311 Parameters 

312 ---------- 

313 ids : `iterable` [`ApdbInsertId`] 

314 Insert identifiers, can include items returned from `getInsertIds`. 

315 

316 Returns 

317 ------- 

318 data : `ApdbTableData` 

319 Catalog containing DiaSource records. In addition to all regular 

320 columns it will contain ``insert_id`` column. 

321 

322 Notes 

323 ----- 

324 This part of API may not be very stable and can change before the 

325 implementation finalizes. 

326 """ 

327 raise NotImplementedError() 

328 

329 @abstractmethod 

330 def getDiaForcedSourcesHistory(self, ids: Iterable[ApdbInsertId]) -> ApdbTableData: 

331 """Return catalog of DiaForcedSource instances from a given time 

332 period. 

333 

334 Parameters 

335 ---------- 

336 ids : `iterable` [`ApdbInsertId`] 

337 Insert identifiers, can include items returned from `getInsertIds`. 

338 

339 Returns 

340 ------- 

341 data : `ApdbTableData` 

342 Catalog containing DiaForcedSource records. In addition to all 

343 regular columns it will contain ``insert_id`` column. 

344 

345 Notes 

346 ----- 

347 This part of API may not be very stable and can change before the 

348 implementation finalizes. 

349 """ 

350 raise NotImplementedError() 

351 

352 @abstractmethod 

353 def getSSObjects(self) -> pandas.DataFrame: 

354 """Return catalog of SSObject instances. 

355 

356 Returns 

357 ------- 

358 catalog : `pandas.DataFrame` 

359 Catalog containing SSObject records, all existing records are 

360 returned. 

361 """ 

362 raise NotImplementedError() 

363 

364 @abstractmethod 

365 def store( 

366 self, 

367 visit_time: dafBase.DateTime, 

368 objects: pandas.DataFrame, 

369 sources: Optional[pandas.DataFrame] = None, 

370 forced_sources: Optional[pandas.DataFrame] = None, 

371 ) -> None: 

372 """Store all three types of catalogs in the database. 

373 

374 Parameters 

375 ---------- 

376 visit_time : `lsst.daf.base.DateTime` 

377 Time of the visit. 

378 objects : `pandas.DataFrame` 

379 Catalog with DiaObject records. 

380 sources : `pandas.DataFrame`, optional 

381 Catalog with DiaSource records. 

382 forced_sources : `pandas.DataFrame`, optional 

383 Catalog with DiaForcedSource records. 

384 

385 Notes 

386 ----- 

387 This methods takes DataFrame catalogs, their schema must be 

388 compatible with the schema of APDB table: 

389 

390 - column names must correspond to database table columns 

391 - types and units of the columns must match database definitions, 

392 no unit conversion is performed presently 

393 - columns that have default values in database schema can be 

394 omitted from catalog 

395 - this method knows how to fill interval-related columns of DiaObject 

396 (validityStart, validityEnd) they do not need to appear in a 

397 catalog 

398 - source catalogs have ``diaObjectId`` column associating sources 

399 with objects 

400 """ 

401 raise NotImplementedError() 

402 

403 @abstractmethod 

404 def storeSSObjects(self, objects: pandas.DataFrame) -> None: 

405 """Store or update SSObject catalog. 

406 

407 Parameters 

408 ---------- 

409 objects : `pandas.DataFrame` 

410 Catalog with SSObject records. 

411 

412 Notes 

413 ----- 

414 If SSObjects with matching IDs already exist in the database, their 

415 records will be updated with the information from provided records. 

416 """ 

417 raise NotImplementedError() 

418 

419 @abstractmethod 

420 def reassignDiaSources(self, idMap: Mapping[int, int]) -> None: 

421 """Associate DiaSources with SSObjects, dis-associating them 

422 from DiaObjects. 

423 

424 Parameters 

425 ---------- 

426 idMap : `Mapping` 

427 Maps DiaSource IDs to their new SSObject IDs. 

428 

429 Raises 

430 ------ 

431 ValueError 

432 Raised if DiaSource ID does not exist in the database. 

433 """ 

434 raise NotImplementedError() 

435 

436 @abstractmethod 

437 def dailyJob(self) -> None: 

438 """Implement daily activities like cleanup/vacuum. 

439 

440 What should be done during daily activities is determined by 

441 specific implementation. 

442 """ 

443 raise NotImplementedError() 

444 

445 @abstractmethod 

446 def countUnassociatedObjects(self) -> int: 

447 """Return the number of DiaObjects that have only one DiaSource 

448 associated with them. 

449 

450 Used as part of ap_verify metrics. 

451 

452 Returns 

453 ------- 

454 count : `int` 

455 Number of DiaObjects with exactly one associated DiaSource. 

456 

457 Notes 

458 ----- 

459 This method can be very inefficient or slow in some implementations. 

460 """ 

461 raise NotImplementedError() 

462 

463 @classmethod 

464 def makeField(cls, doc: str) -> ConfigurableField: 

465 """Make a `~lsst.pex.config.ConfigurableField` for Apdb. 

466 

467 Parameters 

468 ---------- 

469 doc : `str` 

470 Help text for the field. 

471 

472 Returns 

473 ------- 

474 configurableField : `lsst.pex.config.ConfigurableField` 

475 A `~lsst.pex.config.ConfigurableField` for Apdb. 

476 """ 

477 return ConfigurableField(doc=doc, target=cls)