Coverage for python/lsst/dax/apdb/apdb.py: 84%

108 statements  

« prev     ^ index     » next       coverage.py v7.4.2, created at 2024-02-23 11:49 +0000

1# This file is part of dax_apdb. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ["ApdbConfig", "Apdb", "ApdbInsertId", "ApdbTableData"] 

25 

26import os 

27from abc import ABC, abstractmethod 

28from collections.abc import Iterable, Mapping 

29from dataclasses import dataclass 

30from typing import TYPE_CHECKING 

31from uuid import UUID, uuid4 

32 

33import lsst.daf.base as dafBase 

34import pandas 

35from felis.simple import Table 

36from lsst.pex.config import Config, ConfigurableField, Field 

37from lsst.sphgeom import Region 

38 

39from .apdbSchema import ApdbTables 

40 

41if TYPE_CHECKING: 41 ↛ 42line 41 didn't jump to line 42, because the condition on line 41 was never true

42 from .apdbMetadata import ApdbMetadata 

43 from .versionTuple import VersionTuple 

44 

45 

46def _data_file_name(basename: str) -> str: 

47 """Return path name of a data file in sdm_schemas package.""" 

48 return os.path.join("${SDM_SCHEMAS_DIR}", "yml", basename) 

49 

50 

51class ApdbConfig(Config): 

52 """Part of Apdb configuration common to all implementations.""" 

53 

54 read_sources_months = Field[int](doc="Number of months of history to read from DiaSource", default=12) 

55 read_forced_sources_months = Field[int]( 

56 doc="Number of months of history to read from DiaForcedSource", default=12 

57 ) 

58 schema_file = Field[str]( 

59 doc="Location of (YAML) configuration file with standard schema", default=_data_file_name("apdb.yaml") 

60 ) 

61 schema_name = Field[str](doc="Name of the schema in YAML configuration file.", default="ApdbSchema") 

62 extra_schema_file = Field[str]( 

63 doc="Location of (YAML) configuration file with extra schema, " 

64 "definitions in this file are merged with the definitions in " 

65 "'schema_file', extending or replacing parts of the schema.", 

66 default=None, 

67 optional=True, 

68 deprecated="This field is deprecated, its value is not used.", 

69 ) 

70 use_insert_id = Field[bool]( 

71 doc=( 

72 "If True, make and fill additional tables used for getHistory methods. " 

73 "Databases created with earlier versions of APDB may not have these tables, " 

74 "and corresponding methods will not work for them." 

75 ), 

76 default=False, 

77 ) 

78 

79 

80class ApdbTableData(ABC): 

81 """Abstract class for representing table data.""" 

82 

83 @abstractmethod 

84 def column_names(self) -> list[str]: 

85 """Return ordered sequence of column names in the table. 

86 

87 Returns 

88 ------- 

89 names : `list` [`str`] 

90 Column names. 

91 """ 

92 raise NotImplementedError() 

93 

94 @abstractmethod 

95 def rows(self) -> Iterable[tuple]: 

96 """Return table rows, each row is a tuple of values. 

97 

98 Returns 

99 ------- 

100 rows : `iterable` [`tuple`] 

101 Iterable of tuples. 

102 """ 

103 raise NotImplementedError() 

104 

105 

106@dataclass(frozen=True) 

107class ApdbInsertId: 

108 """Class used to identify single insert operation. 

109 

110 Instances of this class are used to identify the units of transfer from 

111 APDB to PPDB. Usually single `ApdbInsertId` corresponds to a single call to 

112 `store` method. 

113 """ 

114 

115 id: UUID 

116 insert_time: dafBase.DateTime 

117 """Time of this insert, usually corresponds to visit time 

118 (`dafBase.DateTime`). 

119 """ 

120 

121 @classmethod 

122 def new_insert_id(cls, insert_time: dafBase.DateTime) -> ApdbInsertId: 

123 """Generate new unique insert identifier.""" 

124 return ApdbInsertId(id=uuid4(), insert_time=insert_time) 

125 

126 

127class Apdb(ABC): 

128 """Abstract interface for APDB.""" 

129 

130 ConfigClass = ApdbConfig 

131 

132 @classmethod 

133 @abstractmethod 

134 def apdbImplementationVersion(cls) -> VersionTuple: 

135 """Return version number for current APDB implementation. 

136 

137 Returns 

138 ------- 

139 version : `VersionTuple` 

140 Version of the code defined in implementation class. 

141 """ 

142 raise NotImplementedError() 

143 

144 @abstractmethod 

145 def apdbSchemaVersion(self) -> VersionTuple: 

146 """Return schema version number as defined in config file. 

147 

148 Returns 

149 ------- 

150 version : `VersionTuple` 

151 Version of the schema defined in schema config file. 

152 """ 

153 raise NotImplementedError() 

154 

155 @abstractmethod 

156 def tableDef(self, table: ApdbTables) -> Table | None: 

157 """Return table schema definition for a given table. 

158 

159 Parameters 

160 ---------- 

161 table : `ApdbTables` 

162 One of the known APDB tables. 

163 

164 Returns 

165 ------- 

166 tableSchema : `felis.simple.Table` or `None` 

167 Table schema description, `None` is returned if table is not 

168 defined by this implementation. 

169 """ 

170 raise NotImplementedError() 

171 

172 @abstractmethod 

173 def makeSchema(self, drop: bool = False) -> None: 

174 """Create or re-create whole database schema. 

175 

176 Parameters 

177 ---------- 

178 drop : `bool` 

179 If True then drop all tables before creating new ones. 

180 """ 

181 raise NotImplementedError() 

182 

183 @abstractmethod 

184 def getDiaObjects(self, region: Region) -> pandas.DataFrame: 

185 """Return catalog of DiaObject instances from a given region. 

186 

187 This method returns only the last version of each DiaObject. Some 

188 records in a returned catalog may be outside the specified region, it 

189 is up to a client to ignore those records or cleanup the catalog before 

190 futher use. 

191 

192 Parameters 

193 ---------- 

194 region : `lsst.sphgeom.Region` 

195 Region to search for DIAObjects. 

196 

197 Returns 

198 ------- 

199 catalog : `pandas.DataFrame` 

200 Catalog containing DiaObject records for a region that may be a 

201 superset of the specified region. 

202 """ 

203 raise NotImplementedError() 

204 

205 @abstractmethod 

206 def getDiaSources( 

207 self, region: Region, object_ids: Iterable[int] | None, visit_time: dafBase.DateTime 

208 ) -> pandas.DataFrame | None: 

209 """Return catalog of DiaSource instances from a given region. 

210 

211 Parameters 

212 ---------- 

213 region : `lsst.sphgeom.Region` 

214 Region to search for DIASources. 

215 object_ids : iterable [ `int` ], optional 

216 List of DiaObject IDs to further constrain the set of returned 

217 sources. If `None` then returned sources are not constrained. If 

218 list is empty then empty catalog is returned with a correct 

219 schema. 

220 visit_time : `lsst.daf.base.DateTime` 

221 Time of the current visit. 

222 

223 Returns 

224 ------- 

225 catalog : `pandas.DataFrame`, or `None` 

226 Catalog containing DiaSource records. `None` is returned if 

227 ``read_sources_months`` configuration parameter is set to 0. 

228 

229 Notes 

230 ----- 

231 This method returns DiaSource catalog for a region with additional 

232 filtering based on DiaObject IDs. Only a subset of DiaSource history 

233 is returned limited by ``read_sources_months`` config parameter, w.r.t. 

234 ``visit_time``. If ``object_ids`` is empty then an empty catalog is 

235 always returned with the correct schema (columns/types). If 

236 ``object_ids`` is `None` then no filtering is performed and some of the 

237 returned records may be outside the specified region. 

238 """ 

239 raise NotImplementedError() 

240 

241 @abstractmethod 

242 def getDiaForcedSources( 

243 self, region: Region, object_ids: Iterable[int] | None, visit_time: dafBase.DateTime 

244 ) -> pandas.DataFrame | None: 

245 """Return catalog of DiaForcedSource instances from a given region. 

246 

247 Parameters 

248 ---------- 

249 region : `lsst.sphgeom.Region` 

250 Region to search for DIASources. 

251 object_ids : iterable [ `int` ], optional 

252 List of DiaObject IDs to further constrain the set of returned 

253 sources. If list is empty then empty catalog is returned with a 

254 correct schema. If `None` then returned sources are not 

255 constrained. Some implementations may not support latter case. 

256 visit_time : `lsst.daf.base.DateTime` 

257 Time of the current visit. 

258 

259 Returns 

260 ------- 

261 catalog : `pandas.DataFrame`, or `None` 

262 Catalog containing DiaSource records. `None` is returned if 

263 ``read_forced_sources_months`` configuration parameter is set to 0. 

264 

265 Raises 

266 ------ 

267 NotImplementedError 

268 May be raised by some implementations if ``object_ids`` is `None`. 

269 

270 Notes 

271 ----- 

272 This method returns DiaForcedSource catalog for a region with 

273 additional filtering based on DiaObject IDs. Only a subset of DiaSource 

274 history is returned limited by ``read_forced_sources_months`` config 

275 parameter, w.r.t. ``visit_time``. If ``object_ids`` is empty then an 

276 empty catalog is always returned with the correct schema 

277 (columns/types). If ``object_ids`` is `None` then no filtering is 

278 performed and some of the returned records may be outside the specified 

279 region. 

280 """ 

281 raise NotImplementedError() 

282 

283 @abstractmethod 

284 def containsVisitDetector(self, visit: int, detector: int) -> bool: 

285 """Test whether data for a given visit-detector is present in the APDB. 

286 

287 Parameters 

288 ---------- 

289 visit, detector : `int` 

290 The ID of the visit-detector to search for. 

291 

292 Returns 

293 ------- 

294 present : `bool` 

295 `True` if some DiaObject, DiaSource, or DiaForcedSource records 

296 exist for the specified observation, `False` otherwise. 

297 """ 

298 raise NotImplementedError() 

299 

300 @abstractmethod 

301 def getInsertIds(self) -> list[ApdbInsertId] | None: 

302 """Return collection of insert identifiers known to the database. 

303 

304 Returns 

305 ------- 

306 ids : `list` [`ApdbInsertId`] or `None` 

307 List of identifiers, they may be time-ordered if database supports 

308 ordering. `None` is returned if database is not configured to store 

309 insert identifiers. 

310 """ 

311 raise NotImplementedError() 

312 

313 @abstractmethod 

314 def deleteInsertIds(self, ids: Iterable[ApdbInsertId]) -> None: 

315 """Remove insert identifiers from the database. 

316 

317 Parameters 

318 ---------- 

319 ids : `iterable` [`ApdbInsertId`] 

320 Insert identifiers, can include items returned from `getInsertIds`. 

321 

322 Notes 

323 ----- 

324 This method causes Apdb to forget about specified identifiers. If there 

325 are any auxiliary data associated with the identifiers, it is also 

326 removed from database (but data in regular tables is not removed). 

327 This method should be called after successful transfer of data from 

328 APDB to PPDB to free space used by history. 

329 """ 

330 raise NotImplementedError() 

331 

332 @abstractmethod 

333 def getDiaObjectsHistory(self, ids: Iterable[ApdbInsertId]) -> ApdbTableData: 

334 """Return catalog of DiaObject instances from a given time period 

335 including the history of each DiaObject. 

336 

337 Parameters 

338 ---------- 

339 ids : `iterable` [`ApdbInsertId`] 

340 Insert identifiers, can include items returned from `getInsertIds`. 

341 

342 Returns 

343 ------- 

344 data : `ApdbTableData` 

345 Catalog containing DiaObject records. In addition to all regular 

346 columns it will contain ``insert_id`` column. 

347 

348 Notes 

349 ----- 

350 This part of API may not be very stable and can change before the 

351 implementation finalizes. 

352 """ 

353 raise NotImplementedError() 

354 

355 @abstractmethod 

356 def getDiaSourcesHistory(self, ids: Iterable[ApdbInsertId]) -> ApdbTableData: 

357 """Return catalog of DiaSource instances from a given time period. 

358 

359 Parameters 

360 ---------- 

361 ids : `iterable` [`ApdbInsertId`] 

362 Insert identifiers, can include items returned from `getInsertIds`. 

363 

364 Returns 

365 ------- 

366 data : `ApdbTableData` 

367 Catalog containing DiaSource records. In addition to all regular 

368 columns it will contain ``insert_id`` column. 

369 

370 Notes 

371 ----- 

372 This part of API may not be very stable and can change before the 

373 implementation finalizes. 

374 """ 

375 raise NotImplementedError() 

376 

377 @abstractmethod 

378 def getDiaForcedSourcesHistory(self, ids: Iterable[ApdbInsertId]) -> ApdbTableData: 

379 """Return catalog of DiaForcedSource instances from a given time 

380 period. 

381 

382 Parameters 

383 ---------- 

384 ids : `iterable` [`ApdbInsertId`] 

385 Insert identifiers, can include items returned from `getInsertIds`. 

386 

387 Returns 

388 ------- 

389 data : `ApdbTableData` 

390 Catalog containing DiaForcedSource records. In addition to all 

391 regular columns it will contain ``insert_id`` column. 

392 

393 Notes 

394 ----- 

395 This part of API may not be very stable and can change before the 

396 implementation finalizes. 

397 """ 

398 raise NotImplementedError() 

399 

400 @abstractmethod 

401 def getSSObjects(self) -> pandas.DataFrame: 

402 """Return catalog of SSObject instances. 

403 

404 Returns 

405 ------- 

406 catalog : `pandas.DataFrame` 

407 Catalog containing SSObject records, all existing records are 

408 returned. 

409 """ 

410 raise NotImplementedError() 

411 

412 @abstractmethod 

413 def store( 

414 self, 

415 visit_time: dafBase.DateTime, 

416 objects: pandas.DataFrame, 

417 sources: pandas.DataFrame | None = None, 

418 forced_sources: pandas.DataFrame | None = None, 

419 ) -> None: 

420 """Store all three types of catalogs in the database. 

421 

422 Parameters 

423 ---------- 

424 visit_time : `lsst.daf.base.DateTime` 

425 Time of the visit. 

426 objects : `pandas.DataFrame` 

427 Catalog with DiaObject records. 

428 sources : `pandas.DataFrame`, optional 

429 Catalog with DiaSource records. 

430 forced_sources : `pandas.DataFrame`, optional 

431 Catalog with DiaForcedSource records. 

432 

433 Notes 

434 ----- 

435 This methods takes DataFrame catalogs, their schema must be 

436 compatible with the schema of APDB table: 

437 

438 - column names must correspond to database table columns 

439 - types and units of the columns must match database definitions, 

440 no unit conversion is performed presently 

441 - columns that have default values in database schema can be 

442 omitted from catalog 

443 - this method knows how to fill interval-related columns of DiaObject 

444 (validityStart, validityEnd) they do not need to appear in a 

445 catalog 

446 - source catalogs have ``diaObjectId`` column associating sources 

447 with objects 

448 """ 

449 raise NotImplementedError() 

450 

451 @abstractmethod 

452 def storeSSObjects(self, objects: pandas.DataFrame) -> None: 

453 """Store or update SSObject catalog. 

454 

455 Parameters 

456 ---------- 

457 objects : `pandas.DataFrame` 

458 Catalog with SSObject records. 

459 

460 Notes 

461 ----- 

462 If SSObjects with matching IDs already exist in the database, their 

463 records will be updated with the information from provided records. 

464 """ 

465 raise NotImplementedError() 

466 

467 @abstractmethod 

468 def reassignDiaSources(self, idMap: Mapping[int, int]) -> None: 

469 """Associate DiaSources with SSObjects, dis-associating them 

470 from DiaObjects. 

471 

472 Parameters 

473 ---------- 

474 idMap : `Mapping` 

475 Maps DiaSource IDs to their new SSObject IDs. 

476 

477 Raises 

478 ------ 

479 ValueError 

480 Raised if DiaSource ID does not exist in the database. 

481 """ 

482 raise NotImplementedError() 

483 

484 @abstractmethod 

485 def dailyJob(self) -> None: 

486 """Implement daily activities like cleanup/vacuum. 

487 

488 What should be done during daily activities is determined by 

489 specific implementation. 

490 """ 

491 raise NotImplementedError() 

492 

493 @abstractmethod 

494 def countUnassociatedObjects(self) -> int: 

495 """Return the number of DiaObjects that have only one DiaSource 

496 associated with them. 

497 

498 Used as part of ap_verify metrics. 

499 

500 Returns 

501 ------- 

502 count : `int` 

503 Number of DiaObjects with exactly one associated DiaSource. 

504 

505 Notes 

506 ----- 

507 This method can be very inefficient or slow in some implementations. 

508 """ 

509 raise NotImplementedError() 

510 

511 @classmethod 

512 def makeField(cls, doc: str) -> ConfigurableField: 

513 """Make a `~lsst.pex.config.ConfigurableField` for Apdb. 

514 

515 Parameters 

516 ---------- 

517 doc : `str` 

518 Help text for the field. 

519 

520 Returns 

521 ------- 

522 configurableField : `lsst.pex.config.ConfigurableField` 

523 A `~lsst.pex.config.ConfigurableField` for Apdb. 

524 """ 

525 return ConfigurableField(doc=doc, target=cls) 

526 

527 @property 

528 @abstractmethod 

529 def metadata(self) -> ApdbMetadata: 

530 """Object controlling access to APDB metadata (`ApdbMetadata`).""" 

531 raise NotImplementedError()