Coverage for python/lsst/dax/apdb/apdb.py: 83%

110 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-03-28 10:11 +0000

1# This file is part of dax_apdb. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ["ApdbConfig", "Apdb", "ApdbInsertId", "ApdbTableData"] 

25 

26import os 

27from abc import ABC, abstractmethod 

28from collections.abc import Iterable, Mapping 

29from dataclasses import dataclass 

30from typing import TYPE_CHECKING 

31from uuid import UUID, uuid4 

32 

33import astropy.time 

34import pandas 

35from felis.simple import Table 

36from lsst.pex.config import Config, ConfigurableField, Field 

37from lsst.sphgeom import Region 

38 

39from .apdbSchema import ApdbTables 

40 

41if TYPE_CHECKING: 41 ↛ 42line 41 didn't jump to line 42, because the condition on line 41 was never true

42 from .apdbMetadata import ApdbMetadata 

43 from .versionTuple import VersionTuple 

44 

45 

46def _data_file_name(basename: str) -> str: 

47 """Return path name of a data file in sdm_schemas package.""" 

48 return os.path.join("${SDM_SCHEMAS_DIR}", "yml", basename) 

49 

50 

51class ApdbConfig(Config): 

52 """Part of Apdb configuration common to all implementations.""" 

53 

54 read_sources_months = Field[int](doc="Number of months of history to read from DiaSource", default=12) 

55 read_forced_sources_months = Field[int]( 

56 doc="Number of months of history to read from DiaForcedSource", default=12 

57 ) 

58 schema_file = Field[str]( 

59 doc="Location of (YAML) configuration file with standard schema", default=_data_file_name("apdb.yaml") 

60 ) 

61 schema_name = Field[str](doc="Name of the schema in YAML configuration file.", default="ApdbSchema") 

62 extra_schema_file = Field[str]( 

63 doc="Location of (YAML) configuration file with extra schema, " 

64 "definitions in this file are merged with the definitions in " 

65 "'schema_file', extending or replacing parts of the schema.", 

66 default=None, 

67 optional=True, 

68 deprecated="This field is deprecated, its value is not used.", 

69 ) 

70 use_insert_id = Field[bool]( 

71 doc=( 

72 "If True, make and fill additional tables used for getHistory methods. " 

73 "Databases created with earlier versions of APDB may not have these tables, " 

74 "and corresponding methods will not work for them." 

75 ), 

76 default=False, 

77 ) 

78 

79 

80class ApdbTableData(ABC): 

81 """Abstract class for representing table data.""" 

82 

83 @abstractmethod 

84 def column_names(self) -> list[str]: 

85 """Return ordered sequence of column names in the table. 

86 

87 Returns 

88 ------- 

89 names : `list` [`str`] 

90 Column names. 

91 """ 

92 raise NotImplementedError() 

93 

94 @abstractmethod 

95 def rows(self) -> Iterable[tuple]: 

96 """Return table rows, each row is a tuple of values. 

97 

98 Returns 

99 ------- 

100 rows : `iterable` [`tuple`] 

101 Iterable of tuples. 

102 """ 

103 raise NotImplementedError() 

104 

105 

106@dataclass(frozen=True) 

107class ApdbInsertId: 

108 """Class used to identify single insert operation. 

109 

110 Instances of this class are used to identify the units of transfer from 

111 APDB to PPDB. Usually single `ApdbInsertId` corresponds to a single call to 

112 `store` method. 

113 """ 

114 

115 id: UUID 

116 insert_time: astropy.time.Time 

117 """Time of this insert, usually corresponds to visit time 

118 (`astropy.time.Time`). 

119 """ 

120 

121 @classmethod 

122 def new_insert_id(cls, insert_time: astropy.time.Time) -> ApdbInsertId: 

123 """Generate new unique insert identifier.""" 

124 return ApdbInsertId(id=uuid4(), insert_time=insert_time) 

125 

126 

127class Apdb(ABC): 

128 """Abstract interface for APDB.""" 

129 

130 ConfigClass = ApdbConfig 

131 

132 @classmethod 

133 @abstractmethod 

134 def apdbImplementationVersion(cls) -> VersionTuple: 

135 """Return version number for current APDB implementation. 

136 

137 Returns 

138 ------- 

139 version : `VersionTuple` 

140 Version of the code defined in implementation class. 

141 """ 

142 raise NotImplementedError() 

143 

144 @abstractmethod 

145 def apdbSchemaVersion(self) -> VersionTuple: 

146 """Return schema version number as defined in config file. 

147 

148 Returns 

149 ------- 

150 version : `VersionTuple` 

151 Version of the schema defined in schema config file. 

152 """ 

153 raise NotImplementedError() 

154 

155 @abstractmethod 

156 def tableDef(self, table: ApdbTables) -> Table | None: 

157 """Return table schema definition for a given table. 

158 

159 Parameters 

160 ---------- 

161 table : `ApdbTables` 

162 One of the known APDB tables. 

163 

164 Returns 

165 ------- 

166 tableSchema : `felis.simple.Table` or `None` 

167 Table schema description, `None` is returned if table is not 

168 defined by this implementation. 

169 """ 

170 raise NotImplementedError() 

171 

172 @classmethod 

173 def makeSchema(cls, config: ApdbConfig, *, drop: bool = False) -> None: 

174 """Create or re-create whole database schema. 

175 

176 Parameters 

177 ---------- 

178 config : `ApdbConfig` 

179 Instance of configuration class, the type has to match the type of 

180 the actual implementation class of this interface. 

181 drop : `bool` 

182 If True then drop all tables before creating new ones. 

183 """ 

184 # Dispatch to actual implementation class based on config type. 

185 from .factory import apdb_type 

186 

187 klass = apdb_type(config) 

188 klass.makeSchema(config, drop=drop) 

189 

190 @abstractmethod 

191 def getDiaObjects(self, region: Region) -> pandas.DataFrame: 

192 """Return catalog of DiaObject instances from a given region. 

193 

194 This method returns only the last version of each DiaObject. Some 

195 records in a returned catalog may be outside the specified region, it 

196 is up to a client to ignore those records or cleanup the catalog before 

197 futher use. 

198 

199 Parameters 

200 ---------- 

201 region : `lsst.sphgeom.Region` 

202 Region to search for DIAObjects. 

203 

204 Returns 

205 ------- 

206 catalog : `pandas.DataFrame` 

207 Catalog containing DiaObject records for a region that may be a 

208 superset of the specified region. 

209 """ 

210 raise NotImplementedError() 

211 

212 @abstractmethod 

213 def getDiaSources( 

214 self, region: Region, object_ids: Iterable[int] | None, visit_time: astropy.time.Time 

215 ) -> pandas.DataFrame | None: 

216 """Return catalog of DiaSource instances from a given region. 

217 

218 Parameters 

219 ---------- 

220 region : `lsst.sphgeom.Region` 

221 Region to search for DIASources. 

222 object_ids : iterable [ `int` ], optional 

223 List of DiaObject IDs to further constrain the set of returned 

224 sources. If `None` then returned sources are not constrained. If 

225 list is empty then empty catalog is returned with a correct 

226 schema. 

227 visit_time : `astropy.time.Time` 

228 Time of the current visit. 

229 

230 Returns 

231 ------- 

232 catalog : `pandas.DataFrame`, or `None` 

233 Catalog containing DiaSource records. `None` is returned if 

234 ``read_sources_months`` configuration parameter is set to 0. 

235 

236 Notes 

237 ----- 

238 This method returns DiaSource catalog for a region with additional 

239 filtering based on DiaObject IDs. Only a subset of DiaSource history 

240 is returned limited by ``read_sources_months`` config parameter, w.r.t. 

241 ``visit_time``. If ``object_ids`` is empty then an empty catalog is 

242 always returned with the correct schema (columns/types). If 

243 ``object_ids`` is `None` then no filtering is performed and some of the 

244 returned records may be outside the specified region. 

245 """ 

246 raise NotImplementedError() 

247 

248 @abstractmethod 

249 def getDiaForcedSources( 

250 self, region: Region, object_ids: Iterable[int] | None, visit_time: astropy.time.Time 

251 ) -> pandas.DataFrame | None: 

252 """Return catalog of DiaForcedSource instances from a given region. 

253 

254 Parameters 

255 ---------- 

256 region : `lsst.sphgeom.Region` 

257 Region to search for DIASources. 

258 object_ids : iterable [ `int` ], optional 

259 List of DiaObject IDs to further constrain the set of returned 

260 sources. If list is empty then empty catalog is returned with a 

261 correct schema. If `None` then returned sources are not 

262 constrained. Some implementations may not support latter case. 

263 visit_time : `astropy.time.Time` 

264 Time of the current visit. 

265 

266 Returns 

267 ------- 

268 catalog : `pandas.DataFrame`, or `None` 

269 Catalog containing DiaSource records. `None` is returned if 

270 ``read_forced_sources_months`` configuration parameter is set to 0. 

271 

272 Raises 

273 ------ 

274 NotImplementedError 

275 May be raised by some implementations if ``object_ids`` is `None`. 

276 

277 Notes 

278 ----- 

279 This method returns DiaForcedSource catalog for a region with 

280 additional filtering based on DiaObject IDs. Only a subset of DiaSource 

281 history is returned limited by ``read_forced_sources_months`` config 

282 parameter, w.r.t. ``visit_time``. If ``object_ids`` is empty then an 

283 empty catalog is always returned with the correct schema 

284 (columns/types). If ``object_ids`` is `None` then no filtering is 

285 performed and some of the returned records may be outside the specified 

286 region. 

287 """ 

288 raise NotImplementedError() 

289 

290 @abstractmethod 

291 def containsVisitDetector(self, visit: int, detector: int) -> bool: 

292 """Test whether data for a given visit-detector is present in the APDB. 

293 

294 Parameters 

295 ---------- 

296 visit, detector : `int` 

297 The ID of the visit-detector to search for. 

298 

299 Returns 

300 ------- 

301 present : `bool` 

302 `True` if some DiaObject, DiaSource, or DiaForcedSource records 

303 exist for the specified observation, `False` otherwise. 

304 """ 

305 raise NotImplementedError() 

306 

307 @abstractmethod 

308 def getInsertIds(self) -> list[ApdbInsertId] | None: 

309 """Return collection of insert identifiers known to the database. 

310 

311 Returns 

312 ------- 

313 ids : `list` [`ApdbInsertId`] or `None` 

314 List of identifiers, they may be time-ordered if database supports 

315 ordering. `None` is returned if database is not configured to store 

316 insert identifiers. 

317 """ 

318 raise NotImplementedError() 

319 

320 @abstractmethod 

321 def deleteInsertIds(self, ids: Iterable[ApdbInsertId]) -> None: 

322 """Remove insert identifiers from the database. 

323 

324 Parameters 

325 ---------- 

326 ids : `iterable` [`ApdbInsertId`] 

327 Insert identifiers, can include items returned from `getInsertIds`. 

328 

329 Notes 

330 ----- 

331 This method causes Apdb to forget about specified identifiers. If there 

332 are any auxiliary data associated with the identifiers, it is also 

333 removed from database (but data in regular tables is not removed). 

334 This method should be called after successful transfer of data from 

335 APDB to PPDB to free space used by history. 

336 """ 

337 raise NotImplementedError() 

338 

339 @abstractmethod 

340 def getDiaObjectsHistory(self, ids: Iterable[ApdbInsertId]) -> ApdbTableData: 

341 """Return catalog of DiaObject instances from a given time period 

342 including the history of each DiaObject. 

343 

344 Parameters 

345 ---------- 

346 ids : `iterable` [`ApdbInsertId`] 

347 Insert identifiers, can include items returned from `getInsertIds`. 

348 

349 Returns 

350 ------- 

351 data : `ApdbTableData` 

352 Catalog containing DiaObject records. In addition to all regular 

353 columns it will contain ``insert_id`` column. 

354 

355 Notes 

356 ----- 

357 This part of API may not be very stable and can change before the 

358 implementation finalizes. 

359 """ 

360 raise NotImplementedError() 

361 

362 @abstractmethod 

363 def getDiaSourcesHistory(self, ids: Iterable[ApdbInsertId]) -> ApdbTableData: 

364 """Return catalog of DiaSource instances from a given time period. 

365 

366 Parameters 

367 ---------- 

368 ids : `iterable` [`ApdbInsertId`] 

369 Insert identifiers, can include items returned from `getInsertIds`. 

370 

371 Returns 

372 ------- 

373 data : `ApdbTableData` 

374 Catalog containing DiaSource records. In addition to all regular 

375 columns it will contain ``insert_id`` column. 

376 

377 Notes 

378 ----- 

379 This part of API may not be very stable and can change before the 

380 implementation finalizes. 

381 """ 

382 raise NotImplementedError() 

383 

384 @abstractmethod 

385 def getDiaForcedSourcesHistory(self, ids: Iterable[ApdbInsertId]) -> ApdbTableData: 

386 """Return catalog of DiaForcedSource instances from a given time 

387 period. 

388 

389 Parameters 

390 ---------- 

391 ids : `iterable` [`ApdbInsertId`] 

392 Insert identifiers, can include items returned from `getInsertIds`. 

393 

394 Returns 

395 ------- 

396 data : `ApdbTableData` 

397 Catalog containing DiaForcedSource records. In addition to all 

398 regular columns it will contain ``insert_id`` column. 

399 

400 Notes 

401 ----- 

402 This part of API may not be very stable and can change before the 

403 implementation finalizes. 

404 """ 

405 raise NotImplementedError() 

406 

407 @abstractmethod 

408 def getSSObjects(self) -> pandas.DataFrame: 

409 """Return catalog of SSObject instances. 

410 

411 Returns 

412 ------- 

413 catalog : `pandas.DataFrame` 

414 Catalog containing SSObject records, all existing records are 

415 returned. 

416 """ 

417 raise NotImplementedError() 

418 

419 @abstractmethod 

420 def store( 

421 self, 

422 visit_time: astropy.time.Time, 

423 objects: pandas.DataFrame, 

424 sources: pandas.DataFrame | None = None, 

425 forced_sources: pandas.DataFrame | None = None, 

426 ) -> None: 

427 """Store all three types of catalogs in the database. 

428 

429 Parameters 

430 ---------- 

431 visit_time : `astropy.time.Time` 

432 Time of the visit. 

433 objects : `pandas.DataFrame` 

434 Catalog with DiaObject records. 

435 sources : `pandas.DataFrame`, optional 

436 Catalog with DiaSource records. 

437 forced_sources : `pandas.DataFrame`, optional 

438 Catalog with DiaForcedSource records. 

439 

440 Notes 

441 ----- 

442 This methods takes DataFrame catalogs, their schema must be 

443 compatible with the schema of APDB table: 

444 

445 - column names must correspond to database table columns 

446 - types and units of the columns must match database definitions, 

447 no unit conversion is performed presently 

448 - columns that have default values in database schema can be 

449 omitted from catalog 

450 - this method knows how to fill interval-related columns of DiaObject 

451 (validityStart, validityEnd) they do not need to appear in a 

452 catalog 

453 - source catalogs have ``diaObjectId`` column associating sources 

454 with objects 

455 """ 

456 raise NotImplementedError() 

457 

458 @abstractmethod 

459 def storeSSObjects(self, objects: pandas.DataFrame) -> None: 

460 """Store or update SSObject catalog. 

461 

462 Parameters 

463 ---------- 

464 objects : `pandas.DataFrame` 

465 Catalog with SSObject records. 

466 

467 Notes 

468 ----- 

469 If SSObjects with matching IDs already exist in the database, their 

470 records will be updated with the information from provided records. 

471 """ 

472 raise NotImplementedError() 

473 

474 @abstractmethod 

475 def reassignDiaSources(self, idMap: Mapping[int, int]) -> None: 

476 """Associate DiaSources with SSObjects, dis-associating them 

477 from DiaObjects. 

478 

479 Parameters 

480 ---------- 

481 idMap : `Mapping` 

482 Maps DiaSource IDs to their new SSObject IDs. 

483 

484 Raises 

485 ------ 

486 ValueError 

487 Raised if DiaSource ID does not exist in the database. 

488 """ 

489 raise NotImplementedError() 

490 

491 @abstractmethod 

492 def dailyJob(self) -> None: 

493 """Implement daily activities like cleanup/vacuum. 

494 

495 What should be done during daily activities is determined by 

496 specific implementation. 

497 """ 

498 raise NotImplementedError() 

499 

500 @abstractmethod 

501 def countUnassociatedObjects(self) -> int: 

502 """Return the number of DiaObjects that have only one DiaSource 

503 associated with them. 

504 

505 Used as part of ap_verify metrics. 

506 

507 Returns 

508 ------- 

509 count : `int` 

510 Number of DiaObjects with exactly one associated DiaSource. 

511 

512 Notes 

513 ----- 

514 This method can be very inefficient or slow in some implementations. 

515 """ 

516 raise NotImplementedError() 

517 

518 @classmethod 

519 def makeField(cls, doc: str) -> ConfigurableField: 

520 """Make a `~lsst.pex.config.ConfigurableField` for Apdb. 

521 

522 Parameters 

523 ---------- 

524 doc : `str` 

525 Help text for the field. 

526 

527 Returns 

528 ------- 

529 configurableField : `lsst.pex.config.ConfigurableField` 

530 A `~lsst.pex.config.ConfigurableField` for Apdb. 

531 """ 

532 return ConfigurableField(doc=doc, target=cls) 

533 

534 @property 

535 @abstractmethod 

536 def metadata(self) -> ApdbMetadata: 

537 """Object controlling access to APDB metadata (`ApdbMetadata`).""" 

538 raise NotImplementedError()