Coverage for python/lsst/dax/apdb/apdb.py: 85%

93 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-11-21 11:47 +0000

1# This file is part of dax_apdb. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ["ApdbConfig", "Apdb", "ApdbInsertId", "ApdbTableData"] 

25 

26import os 

27from abc import ABC, abstractmethod 

28from collections.abc import Iterable, Mapping 

29from dataclasses import dataclass 

30from uuid import UUID, uuid4 

31 

32import lsst.daf.base as dafBase 

33import pandas 

34from felis.simple import Table 

35from lsst.pex.config import Config, ConfigurableField, Field 

36from lsst.sphgeom import Region 

37 

38from .apdbSchema import ApdbTables 

39 

40 

41def _data_file_name(basename: str) -> str: 

42 """Return path name of a data file in sdm_schemas package.""" 

43 return os.path.join("${SDM_SCHEMAS_DIR}", "yml", basename) 

44 

45 

46class ApdbConfig(Config): 

47 """Part of Apdb configuration common to all implementations.""" 

48 

49 read_sources_months = Field[int](doc="Number of months of history to read from DiaSource", default=12) 

50 read_forced_sources_months = Field[int]( 

51 doc="Number of months of history to read from DiaForcedSource", default=12 

52 ) 

53 schema_file = Field[str]( 

54 doc="Location of (YAML) configuration file with standard schema", default=_data_file_name("apdb.yaml") 

55 ) 

56 schema_name = Field[str](doc="Name of the schema in YAML configuration file.", default="ApdbSchema") 

57 extra_schema_file = Field[str]( 

58 doc="Location of (YAML) configuration file with extra schema, " 

59 "definitions in this file are merged with the definitions in " 

60 "'schema_file', extending or replacing parts of the schema.", 

61 default=None, 

62 optional=True, 

63 deprecated="This field is deprecated, its value is not used.", 

64 ) 

65 use_insert_id = Field[bool]( 

66 doc=( 

67 "If True, make and fill additional tables used for getHistory methods. " 

68 "Databases created with earlier versions of APDB may not have these tables, " 

69 "and corresponding methods will not work for them." 

70 ), 

71 default=False, 

72 ) 

73 

74 

75class ApdbTableData(ABC): 

76 """Abstract class for representing table data.""" 

77 

78 @abstractmethod 

79 def column_names(self) -> list[str]: 

80 """Return ordered sequence of column names in the table. 

81 

82 Returns 

83 ------- 

84 names : `list` [`str`] 

85 Column names. 

86 """ 

87 raise NotImplementedError() 

88 

89 @abstractmethod 

90 def rows(self) -> Iterable[tuple]: 

91 """Return table rows, each row is a tuple of values. 

92 

93 Returns 

94 ------- 

95 rows : `iterable` [`tuple`] 

96 Iterable of tuples. 

97 """ 

98 raise NotImplementedError() 

99 

100 

101@dataclass(frozen=True) 

102class ApdbInsertId: 

103 """Class used to identify single insert operation. 

104 

105 Instances of this class are used to identify the units of transfer from 

106 APDB to PPDB. Usually single `ApdbInsertId` corresponds to a single call to 

107 `store` method. 

108 """ 

109 

110 id: UUID 

111 insert_time: dafBase.DateTime 

112 """Time of this insert, usually corresponds to visit time 

113 (`dafBase.DateTime`). 

114 """ 

115 

116 @classmethod 

117 def new_insert_id(cls, insert_time: dafBase.DateTime) -> ApdbInsertId: 

118 """Generate new unique insert identifier.""" 

119 return ApdbInsertId(id=uuid4(), insert_time=insert_time) 

120 

121 

122class Apdb(ABC): 

123 """Abstract interface for APDB.""" 

124 

125 ConfigClass = ApdbConfig 

126 

127 @abstractmethod 

128 def tableDef(self, table: ApdbTables) -> Table | None: 

129 """Return table schema definition for a given table. 

130 

131 Parameters 

132 ---------- 

133 table : `ApdbTables` 

134 One of the known APDB tables. 

135 

136 Returns 

137 ------- 

138 tableSchema : `felis.simple.Table` or `None` 

139 Table schema description, `None` is returned if table is not 

140 defined by this implementation. 

141 """ 

142 raise NotImplementedError() 

143 

144 @abstractmethod 

145 def makeSchema(self, drop: bool = False) -> None: 

146 """Create or re-create whole database schema. 

147 

148 Parameters 

149 ---------- 

150 drop : `bool` 

151 If True then drop all tables before creating new ones. 

152 """ 

153 raise NotImplementedError() 

154 

155 @abstractmethod 

156 def getDiaObjects(self, region: Region) -> pandas.DataFrame: 

157 """Return catalog of DiaObject instances from a given region. 

158 

159 This method returns only the last version of each DiaObject. Some 

160 records in a returned catalog may be outside the specified region, it 

161 is up to a client to ignore those records or cleanup the catalog before 

162 futher use. 

163 

164 Parameters 

165 ---------- 

166 region : `lsst.sphgeom.Region` 

167 Region to search for DIAObjects. 

168 

169 Returns 

170 ------- 

171 catalog : `pandas.DataFrame` 

172 Catalog containing DiaObject records for a region that may be a 

173 superset of the specified region. 

174 """ 

175 raise NotImplementedError() 

176 

177 @abstractmethod 

178 def getDiaSources( 

179 self, region: Region, object_ids: Iterable[int] | None, visit_time: dafBase.DateTime 

180 ) -> pandas.DataFrame | None: 

181 """Return catalog of DiaSource instances from a given region. 

182 

183 Parameters 

184 ---------- 

185 region : `lsst.sphgeom.Region` 

186 Region to search for DIASources. 

187 object_ids : iterable [ `int` ], optional 

188 List of DiaObject IDs to further constrain the set of returned 

189 sources. If `None` then returned sources are not constrained. If 

190 list is empty then empty catalog is returned with a correct 

191 schema. 

192 visit_time : `lsst.daf.base.DateTime` 

193 Time of the current visit. 

194 

195 Returns 

196 ------- 

197 catalog : `pandas.DataFrame`, or `None` 

198 Catalog containing DiaSource records. `None` is returned if 

199 ``read_sources_months`` configuration parameter is set to 0. 

200 

201 Notes 

202 ----- 

203 This method returns DiaSource catalog for a region with additional 

204 filtering based on DiaObject IDs. Only a subset of DiaSource history 

205 is returned limited by ``read_sources_months`` config parameter, w.r.t. 

206 ``visit_time``. If ``object_ids`` is empty then an empty catalog is 

207 always returned with the correct schema (columns/types). If 

208 ``object_ids`` is `None` then no filtering is performed and some of the 

209 returned records may be outside the specified region. 

210 """ 

211 raise NotImplementedError() 

212 

213 @abstractmethod 

214 def getDiaForcedSources( 

215 self, region: Region, object_ids: Iterable[int] | None, visit_time: dafBase.DateTime 

216 ) -> pandas.DataFrame | None: 

217 """Return catalog of DiaForcedSource instances from a given region. 

218 

219 Parameters 

220 ---------- 

221 region : `lsst.sphgeom.Region` 

222 Region to search for DIASources. 

223 object_ids : iterable [ `int` ], optional 

224 List of DiaObject IDs to further constrain the set of returned 

225 sources. If list is empty then empty catalog is returned with a 

226 correct schema. If `None` then returned sources are not 

227 constrained. Some implementations may not support latter case. 

228 visit_time : `lsst.daf.base.DateTime` 

229 Time of the current visit. 

230 

231 Returns 

232 ------- 

233 catalog : `pandas.DataFrame`, or `None` 

234 Catalog containing DiaSource records. `None` is returned if 

235 ``read_forced_sources_months`` configuration parameter is set to 0. 

236 

237 Raises 

238 ------ 

239 NotImplementedError 

240 May be raised by some implementations if ``object_ids`` is `None`. 

241 

242 Notes 

243 ----- 

244 This method returns DiaForcedSource catalog for a region with 

245 additional filtering based on DiaObject IDs. Only a subset of DiaSource 

246 history is returned limited by ``read_forced_sources_months`` config 

247 parameter, w.r.t. ``visit_time``. If ``object_ids`` is empty then an 

248 empty catalog is always returned with the correct schema 

249 (columns/types). If ``object_ids`` is `None` then no filtering is 

250 performed and some of the returned records may be outside the specified 

251 region. 

252 """ 

253 raise NotImplementedError() 

254 

255 @abstractmethod 

256 def containsVisitDetector(self, visit: int, detector: int) -> bool: 

257 """Test whether data for a given visit-detector is present in the APDB. 

258 

259 Parameters 

260 ---------- 

261 visit, detector : `int` 

262 The ID of the visit-detector to search for. 

263 

264 Returns 

265 ------- 

266 present : `bool` 

267 `True` if some DiaObject, DiaSource, or DiaForcedSource records 

268 exist for the specified observation, `False` otherwise. 

269 """ 

270 raise NotImplementedError() 

271 

272 @abstractmethod 

273 def getInsertIds(self) -> list[ApdbInsertId] | None: 

274 """Return collection of insert identifiers known to the database. 

275 

276 Returns 

277 ------- 

278 ids : `list` [`ApdbInsertId`] or `None` 

279 List of identifiers, they may be time-ordered if database supports 

280 ordering. `None` is returned if database is not configured to store 

281 insert identifiers. 

282 """ 

283 raise NotImplementedError() 

284 

285 @abstractmethod 

286 def deleteInsertIds(self, ids: Iterable[ApdbInsertId]) -> None: 

287 """Remove insert identifiers from the database. 

288 

289 Parameters 

290 ---------- 

291 ids : `iterable` [`ApdbInsertId`] 

292 Insert identifiers, can include items returned from `getInsertIds`. 

293 

294 Notes 

295 ----- 

296 This method causes Apdb to forget about specified identifiers. If there 

297 are any auxiliary data associated with the identifiers, it is also 

298 removed from database (but data in regular tables is not removed). 

299 This method should be called after successful transfer of data from 

300 APDB to PPDB to free space used by history. 

301 """ 

302 raise NotImplementedError() 

303 

304 @abstractmethod 

305 def getDiaObjectsHistory(self, ids: Iterable[ApdbInsertId]) -> ApdbTableData: 

306 """Return catalog of DiaObject instances from a given time period 

307 including the history of each DiaObject. 

308 

309 Parameters 

310 ---------- 

311 ids : `iterable` [`ApdbInsertId`] 

312 Insert identifiers, can include items returned from `getInsertIds`. 

313 

314 Returns 

315 ------- 

316 data : `ApdbTableData` 

317 Catalog containing DiaObject records. In addition to all regular 

318 columns it will contain ``insert_id`` column. 

319 

320 Notes 

321 ----- 

322 This part of API may not be very stable and can change before the 

323 implementation finalizes. 

324 """ 

325 raise NotImplementedError() 

326 

327 @abstractmethod 

328 def getDiaSourcesHistory(self, ids: Iterable[ApdbInsertId]) -> ApdbTableData: 

329 """Return catalog of DiaSource instances from a given time period. 

330 

331 Parameters 

332 ---------- 

333 ids : `iterable` [`ApdbInsertId`] 

334 Insert identifiers, can include items returned from `getInsertIds`. 

335 

336 Returns 

337 ------- 

338 data : `ApdbTableData` 

339 Catalog containing DiaSource records. In addition to all regular 

340 columns it will contain ``insert_id`` column. 

341 

342 Notes 

343 ----- 

344 This part of API may not be very stable and can change before the 

345 implementation finalizes. 

346 """ 

347 raise NotImplementedError() 

348 

349 @abstractmethod 

350 def getDiaForcedSourcesHistory(self, ids: Iterable[ApdbInsertId]) -> ApdbTableData: 

351 """Return catalog of DiaForcedSource instances from a given time 

352 period. 

353 

354 Parameters 

355 ---------- 

356 ids : `iterable` [`ApdbInsertId`] 

357 Insert identifiers, can include items returned from `getInsertIds`. 

358 

359 Returns 

360 ------- 

361 data : `ApdbTableData` 

362 Catalog containing DiaForcedSource records. In addition to all 

363 regular columns it will contain ``insert_id`` column. 

364 

365 Notes 

366 ----- 

367 This part of API may not be very stable and can change before the 

368 implementation finalizes. 

369 """ 

370 raise NotImplementedError() 

371 

372 @abstractmethod 

373 def getSSObjects(self) -> pandas.DataFrame: 

374 """Return catalog of SSObject instances. 

375 

376 Returns 

377 ------- 

378 catalog : `pandas.DataFrame` 

379 Catalog containing SSObject records, all existing records are 

380 returned. 

381 """ 

382 raise NotImplementedError() 

383 

384 @abstractmethod 

385 def store( 

386 self, 

387 visit_time: dafBase.DateTime, 

388 objects: pandas.DataFrame, 

389 sources: pandas.DataFrame | None = None, 

390 forced_sources: pandas.DataFrame | None = None, 

391 ) -> None: 

392 """Store all three types of catalogs in the database. 

393 

394 Parameters 

395 ---------- 

396 visit_time : `lsst.daf.base.DateTime` 

397 Time of the visit. 

398 objects : `pandas.DataFrame` 

399 Catalog with DiaObject records. 

400 sources : `pandas.DataFrame`, optional 

401 Catalog with DiaSource records. 

402 forced_sources : `pandas.DataFrame`, optional 

403 Catalog with DiaForcedSource records. 

404 

405 Notes 

406 ----- 

407 This methods takes DataFrame catalogs, their schema must be 

408 compatible with the schema of APDB table: 

409 

410 - column names must correspond to database table columns 

411 - types and units of the columns must match database definitions, 

412 no unit conversion is performed presently 

413 - columns that have default values in database schema can be 

414 omitted from catalog 

415 - this method knows how to fill interval-related columns of DiaObject 

416 (validityStart, validityEnd) they do not need to appear in a 

417 catalog 

418 - source catalogs have ``diaObjectId`` column associating sources 

419 with objects 

420 """ 

421 raise NotImplementedError() 

422 

423 @abstractmethod 

424 def storeSSObjects(self, objects: pandas.DataFrame) -> None: 

425 """Store or update SSObject catalog. 

426 

427 Parameters 

428 ---------- 

429 objects : `pandas.DataFrame` 

430 Catalog with SSObject records. 

431 

432 Notes 

433 ----- 

434 If SSObjects with matching IDs already exist in the database, their 

435 records will be updated with the information from provided records. 

436 """ 

437 raise NotImplementedError() 

438 

439 @abstractmethod 

440 def reassignDiaSources(self, idMap: Mapping[int, int]) -> None: 

441 """Associate DiaSources with SSObjects, dis-associating them 

442 from DiaObjects. 

443 

444 Parameters 

445 ---------- 

446 idMap : `Mapping` 

447 Maps DiaSource IDs to their new SSObject IDs. 

448 

449 Raises 

450 ------ 

451 ValueError 

452 Raised if DiaSource ID does not exist in the database. 

453 """ 

454 raise NotImplementedError() 

455 

456 @abstractmethod 

457 def dailyJob(self) -> None: 

458 """Implement daily activities like cleanup/vacuum. 

459 

460 What should be done during daily activities is determined by 

461 specific implementation. 

462 """ 

463 raise NotImplementedError() 

464 

465 @abstractmethod 

466 def countUnassociatedObjects(self) -> int: 

467 """Return the number of DiaObjects that have only one DiaSource 

468 associated with them. 

469 

470 Used as part of ap_verify metrics. 

471 

472 Returns 

473 ------- 

474 count : `int` 

475 Number of DiaObjects with exactly one associated DiaSource. 

476 

477 Notes 

478 ----- 

479 This method can be very inefficient or slow in some implementations. 

480 """ 

481 raise NotImplementedError() 

482 

483 @classmethod 

484 def makeField(cls, doc: str) -> ConfigurableField: 

485 """Make a `~lsst.pex.config.ConfigurableField` for Apdb. 

486 

487 Parameters 

488 ---------- 

489 doc : `str` 

490 Help text for the field. 

491 

492 Returns 

493 ------- 

494 configurableField : `lsst.pex.config.ConfigurableField` 

495 A `~lsst.pex.config.ConfigurableField` for Apdb. 

496 """ 

497 return ConfigurableField(doc=doc, target=cls)