Coverage for python/lsst/dax/apdb/apdb.py: 91%

98 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-06 04:04 -0700

1# This file is part of dax_apdb. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ["ApdbConfig", "Apdb", "ApdbInsertId", "ApdbTableData"] 

25 

26import os 

27from abc import ABC, abstractmethod 

28from collections.abc import Iterable, Mapping 

29from dataclasses import dataclass 

30from typing import TYPE_CHECKING, cast 

31from uuid import UUID, uuid4 

32 

33import astropy.time 

34import pandas 

35from felis.simple import Table 

36from lsst.pex.config import Config, ConfigurableField, Field 

37from lsst.resources import ResourcePath, ResourcePathExpression 

38from lsst.sphgeom import Region 

39 

40from .apdbIndex import ApdbIndex 

41from .apdbSchema import ApdbTables 

42from .factory import make_apdb 

43 

44if TYPE_CHECKING: 

45 from .apdbMetadata import ApdbMetadata 

46 from .versionTuple import VersionTuple 

47 

48 

49def _data_file_name(basename: str) -> str: 

50 """Return path name of a data file in sdm_schemas package.""" 

51 return os.path.join("${SDM_SCHEMAS_DIR}", "yml", basename) 

52 

53 

54class ApdbConfig(Config): 

55 """Part of Apdb configuration common to all implementations.""" 

56 

57 read_sources_months = Field[int](doc="Number of months of history to read from DiaSource", default=12) 

58 read_forced_sources_months = Field[int]( 

59 doc="Number of months of history to read from DiaForcedSource", default=12 

60 ) 

61 schema_file = Field[str]( 

62 doc="Location of (YAML) configuration file with standard schema", default=_data_file_name("apdb.yaml") 

63 ) 

64 schema_name = Field[str](doc="Name of the schema in YAML configuration file.", default="ApdbSchema") 

65 extra_schema_file = Field[str]( 

66 doc="Location of (YAML) configuration file with extra schema, " 

67 "definitions in this file are merged with the definitions in " 

68 "'schema_file', extending or replacing parts of the schema.", 

69 default=None, 

70 optional=True, 

71 deprecated="This field is deprecated, its value is not used.", 

72 ) 

73 use_insert_id = Field[bool]( 

74 doc=( 

75 "If True, make and fill additional tables used for getHistory methods. " 

76 "Databases created with earlier versions of APDB may not have these tables, " 

77 "and corresponding methods will not work for them." 

78 ), 

79 default=False, 

80 ) 

81 

82 

83class ApdbTableData(ABC): 

84 """Abstract class for representing table data.""" 

85 

86 @abstractmethod 

87 def column_names(self) -> list[str]: 

88 """Return ordered sequence of column names in the table. 

89 

90 Returns 

91 ------- 

92 names : `list` [`str`] 

93 Column names. 

94 """ 

95 raise NotImplementedError() 

96 

97 @abstractmethod 

98 def rows(self) -> Iterable[tuple]: 

99 """Return table rows, each row is a tuple of values. 

100 

101 Returns 

102 ------- 

103 rows : `iterable` [`tuple`] 

104 Iterable of tuples. 

105 """ 

106 raise NotImplementedError() 

107 

108 

109@dataclass(frozen=True) 

110class ApdbInsertId: 

111 """Class used to identify single insert operation. 

112 

113 Instances of this class are used to identify the units of transfer from 

114 APDB to PPDB. Usually single `ApdbInsertId` corresponds to a single call to 

115 `store` method. 

116 """ 

117 

118 id: UUID 

119 insert_time: astropy.time.Time 

120 """Time of this insert, usually corresponds to visit time 

121 (`astropy.time.Time`). 

122 """ 

123 

124 @classmethod 

125 def new_insert_id(cls, insert_time: astropy.time.Time) -> ApdbInsertId: 

126 """Generate new unique insert identifier.""" 

127 return ApdbInsertId(id=uuid4(), insert_time=insert_time) 

128 

129 

130class Apdb(ABC): 

131 """Abstract interface for APDB.""" 

132 

133 ConfigClass = ApdbConfig 

134 

135 @classmethod 

136 def from_config(cls, config: ApdbConfig) -> Apdb: 

137 """Create Ppdb instance from configuration object. 

138 

139 Parameters 

140 ---------- 

141 config : `ApdbConfig` 

142 Configuration object, type of this object determines type of the 

143 Apdb implementation. 

144 

145 Returns 

146 ------- 

147 apdb : `apdb` 

148 Instance of `Apdb` class. 

149 """ 

150 return make_apdb(config) 

151 

152 @classmethod 

153 def from_uri(cls, uri: ResourcePathExpression) -> Apdb: 

154 """Make Apdb instance from a serialized configuration. 

155 

156 Parameters 

157 ---------- 

158 uri : `~lsst.resources.ResourcePathExpression` 

159 URI or local file path pointing to a file with serialized 

160 configuration, or a string with a "label:" prefix. In the latter 

161 case, the configuration will be looked up from an APDB index file 

162 using the label name that follows the prefix. The APDB index file's 

163 location is determined by the ``DAX_APDB_INDEX_URI`` environment 

164 variable. 

165 

166 Returns 

167 ------- 

168 apdb : `apdb` 

169 Instance of `Apdb` class, the type of the returned instance is 

170 determined by configuration. 

171 """ 

172 if isinstance(uri, str) and uri.startswith("label:"): 

173 tag, _, label = uri.partition(":") 

174 index = ApdbIndex() 

175 # Current format for config files is "pex_config" 

176 format = "pex_config" 

177 uri = index.get_apdb_uri(label, format) 

178 path = ResourcePath(uri) 

179 config_str = path.read().decode() 

180 # Assume that this is ApdbConfig, make_apdb will raise if not. 

181 config = cast(ApdbConfig, Config._fromPython(config_str)) 

182 return make_apdb(config) 

183 

184 @classmethod 

185 @abstractmethod 

186 def apdbImplementationVersion(cls) -> VersionTuple: 

187 """Return version number for current APDB implementation. 

188 

189 Returns 

190 ------- 

191 version : `VersionTuple` 

192 Version of the code defined in implementation class. 

193 """ 

194 raise NotImplementedError() 

195 

196 @abstractmethod 

197 def apdbSchemaVersion(self) -> VersionTuple: 

198 """Return schema version number as defined in config file. 

199 

200 Returns 

201 ------- 

202 version : `VersionTuple` 

203 Version of the schema defined in schema config file. 

204 """ 

205 raise NotImplementedError() 

206 

207 @abstractmethod 

208 def tableDef(self, table: ApdbTables) -> Table | None: 

209 """Return table schema definition for a given table. 

210 

211 Parameters 

212 ---------- 

213 table : `ApdbTables` 

214 One of the known APDB tables. 

215 

216 Returns 

217 ------- 

218 tableSchema : `felis.simple.Table` or `None` 

219 Table schema description, `None` is returned if table is not 

220 defined by this implementation. 

221 """ 

222 raise NotImplementedError() 

223 

224 @abstractmethod 

225 def getDiaObjects(self, region: Region) -> pandas.DataFrame: 

226 """Return catalog of DiaObject instances from a given region. 

227 

228 This method returns only the last version of each DiaObject. Some 

229 records in a returned catalog may be outside the specified region, it 

230 is up to a client to ignore those records or cleanup the catalog before 

231 futher use. 

232 

233 Parameters 

234 ---------- 

235 region : `lsst.sphgeom.Region` 

236 Region to search for DIAObjects. 

237 

238 Returns 

239 ------- 

240 catalog : `pandas.DataFrame` 

241 Catalog containing DiaObject records for a region that may be a 

242 superset of the specified region. 

243 """ 

244 raise NotImplementedError() 

245 

246 @abstractmethod 

247 def getDiaSources( 

248 self, region: Region, object_ids: Iterable[int] | None, visit_time: astropy.time.Time 

249 ) -> pandas.DataFrame | None: 

250 """Return catalog of DiaSource instances from a given region. 

251 

252 Parameters 

253 ---------- 

254 region : `lsst.sphgeom.Region` 

255 Region to search for DIASources. 

256 object_ids : iterable [ `int` ], optional 

257 List of DiaObject IDs to further constrain the set of returned 

258 sources. If `None` then returned sources are not constrained. If 

259 list is empty then empty catalog is returned with a correct 

260 schema. 

261 visit_time : `astropy.time.Time` 

262 Time of the current visit. 

263 

264 Returns 

265 ------- 

266 catalog : `pandas.DataFrame`, or `None` 

267 Catalog containing DiaSource records. `None` is returned if 

268 ``read_sources_months`` configuration parameter is set to 0. 

269 

270 Notes 

271 ----- 

272 This method returns DiaSource catalog for a region with additional 

273 filtering based on DiaObject IDs. Only a subset of DiaSource history 

274 is returned limited by ``read_sources_months`` config parameter, w.r.t. 

275 ``visit_time``. If ``object_ids`` is empty then an empty catalog is 

276 always returned with the correct schema (columns/types). If 

277 ``object_ids`` is `None` then no filtering is performed and some of the 

278 returned records may be outside the specified region. 

279 """ 

280 raise NotImplementedError() 

281 

282 @abstractmethod 

283 def getDiaForcedSources( 

284 self, region: Region, object_ids: Iterable[int] | None, visit_time: astropy.time.Time 

285 ) -> pandas.DataFrame | None: 

286 """Return catalog of DiaForcedSource instances from a given region. 

287 

288 Parameters 

289 ---------- 

290 region : `lsst.sphgeom.Region` 

291 Region to search for DIASources. 

292 object_ids : iterable [ `int` ], optional 

293 List of DiaObject IDs to further constrain the set of returned 

294 sources. If list is empty then empty catalog is returned with a 

295 correct schema. If `None` then returned sources are not 

296 constrained. Some implementations may not support latter case. 

297 visit_time : `astropy.time.Time` 

298 Time of the current visit. 

299 

300 Returns 

301 ------- 

302 catalog : `pandas.DataFrame`, or `None` 

303 Catalog containing DiaSource records. `None` is returned if 

304 ``read_forced_sources_months`` configuration parameter is set to 0. 

305 

306 Raises 

307 ------ 

308 NotImplementedError 

309 May be raised by some implementations if ``object_ids`` is `None`. 

310 

311 Notes 

312 ----- 

313 This method returns DiaForcedSource catalog for a region with 

314 additional filtering based on DiaObject IDs. Only a subset of DiaSource 

315 history is returned limited by ``read_forced_sources_months`` config 

316 parameter, w.r.t. ``visit_time``. If ``object_ids`` is empty then an 

317 empty catalog is always returned with the correct schema 

318 (columns/types). If ``object_ids`` is `None` then no filtering is 

319 performed and some of the returned records may be outside the specified 

320 region. 

321 """ 

322 raise NotImplementedError() 

323 

324 @abstractmethod 

325 def containsVisitDetector(self, visit: int, detector: int) -> bool: 

326 """Test whether data for a given visit-detector is present in the APDB. 

327 

328 Parameters 

329 ---------- 

330 visit, detector : `int` 

331 The ID of the visit-detector to search for. 

332 

333 Returns 

334 ------- 

335 present : `bool` 

336 `True` if some DiaObject, DiaSource, or DiaForcedSource records 

337 exist for the specified observation, `False` otherwise. 

338 """ 

339 raise NotImplementedError() 

340 

341 @abstractmethod 

342 def getInsertIds(self) -> list[ApdbInsertId] | None: 

343 """Return collection of insert identifiers known to the database. 

344 

345 Returns 

346 ------- 

347 ids : `list` [`ApdbInsertId`] or `None` 

348 List of identifiers, they may be time-ordered if database supports 

349 ordering. `None` is returned if database is not configured to store 

350 insert identifiers. 

351 """ 

352 raise NotImplementedError() 

353 

354 @abstractmethod 

355 def deleteInsertIds(self, ids: Iterable[ApdbInsertId]) -> None: 

356 """Remove insert identifiers from the database. 

357 

358 Parameters 

359 ---------- 

360 ids : `iterable` [`ApdbInsertId`] 

361 Insert identifiers, can include items returned from `getInsertIds`. 

362 

363 Notes 

364 ----- 

365 This method causes Apdb to forget about specified identifiers. If there 

366 are any auxiliary data associated with the identifiers, it is also 

367 removed from database (but data in regular tables is not removed). 

368 This method should be called after successful transfer of data from 

369 APDB to PPDB to free space used by history. 

370 """ 

371 raise NotImplementedError() 

372 

373 @abstractmethod 

374 def getDiaObjectsHistory(self, ids: Iterable[ApdbInsertId]) -> ApdbTableData: 

375 """Return catalog of DiaObject instances from a given time period 

376 including the history of each DiaObject. 

377 

378 Parameters 

379 ---------- 

380 ids : `iterable` [`ApdbInsertId`] 

381 Insert identifiers, can include items returned from `getInsertIds`. 

382 

383 Returns 

384 ------- 

385 data : `ApdbTableData` 

386 Catalog containing DiaObject records. In addition to all regular 

387 columns it will contain ``insert_id`` column. 

388 

389 Notes 

390 ----- 

391 This part of API may not be very stable and can change before the 

392 implementation finalizes. 

393 """ 

394 raise NotImplementedError() 

395 

396 @abstractmethod 

397 def getDiaSourcesHistory(self, ids: Iterable[ApdbInsertId]) -> ApdbTableData: 

398 """Return catalog of DiaSource instances from a given time period. 

399 

400 Parameters 

401 ---------- 

402 ids : `iterable` [`ApdbInsertId`] 

403 Insert identifiers, can include items returned from `getInsertIds`. 

404 

405 Returns 

406 ------- 

407 data : `ApdbTableData` 

408 Catalog containing DiaSource records. In addition to all regular 

409 columns it will contain ``insert_id`` column. 

410 

411 Notes 

412 ----- 

413 This part of API may not be very stable and can change before the 

414 implementation finalizes. 

415 """ 

416 raise NotImplementedError() 

417 

418 @abstractmethod 

419 def getDiaForcedSourcesHistory(self, ids: Iterable[ApdbInsertId]) -> ApdbTableData: 

420 """Return catalog of DiaForcedSource instances from a given time 

421 period. 

422 

423 Parameters 

424 ---------- 

425 ids : `iterable` [`ApdbInsertId`] 

426 Insert identifiers, can include items returned from `getInsertIds`. 

427 

428 Returns 

429 ------- 

430 data : `ApdbTableData` 

431 Catalog containing DiaForcedSource records. In addition to all 

432 regular columns it will contain ``insert_id`` column. 

433 

434 Notes 

435 ----- 

436 This part of API may not be very stable and can change before the 

437 implementation finalizes. 

438 """ 

439 raise NotImplementedError() 

440 

441 @abstractmethod 

442 def getSSObjects(self) -> pandas.DataFrame: 

443 """Return catalog of SSObject instances. 

444 

445 Returns 

446 ------- 

447 catalog : `pandas.DataFrame` 

448 Catalog containing SSObject records, all existing records are 

449 returned. 

450 """ 

451 raise NotImplementedError() 

452 

453 @abstractmethod 

454 def store( 

455 self, 

456 visit_time: astropy.time.Time, 

457 objects: pandas.DataFrame, 

458 sources: pandas.DataFrame | None = None, 

459 forced_sources: pandas.DataFrame | None = None, 

460 ) -> None: 

461 """Store all three types of catalogs in the database. 

462 

463 Parameters 

464 ---------- 

465 visit_time : `astropy.time.Time` 

466 Time of the visit. 

467 objects : `pandas.DataFrame` 

468 Catalog with DiaObject records. 

469 sources : `pandas.DataFrame`, optional 

470 Catalog with DiaSource records. 

471 forced_sources : `pandas.DataFrame`, optional 

472 Catalog with DiaForcedSource records. 

473 

474 Notes 

475 ----- 

476 This methods takes DataFrame catalogs, their schema must be 

477 compatible with the schema of APDB table: 

478 

479 - column names must correspond to database table columns 

480 - types and units of the columns must match database definitions, 

481 no unit conversion is performed presently 

482 - columns that have default values in database schema can be 

483 omitted from catalog 

484 - this method knows how to fill interval-related columns of DiaObject 

485 (validityStart, validityEnd) they do not need to appear in a 

486 catalog 

487 - source catalogs have ``diaObjectId`` column associating sources 

488 with objects 

489 """ 

490 raise NotImplementedError() 

491 

492 @abstractmethod 

493 def storeSSObjects(self, objects: pandas.DataFrame) -> None: 

494 """Store or update SSObject catalog. 

495 

496 Parameters 

497 ---------- 

498 objects : `pandas.DataFrame` 

499 Catalog with SSObject records. 

500 

501 Notes 

502 ----- 

503 If SSObjects with matching IDs already exist in the database, their 

504 records will be updated with the information from provided records. 

505 """ 

506 raise NotImplementedError() 

507 

508 @abstractmethod 

509 def reassignDiaSources(self, idMap: Mapping[int, int]) -> None: 

510 """Associate DiaSources with SSObjects, dis-associating them 

511 from DiaObjects. 

512 

513 Parameters 

514 ---------- 

515 idMap : `Mapping` 

516 Maps DiaSource IDs to their new SSObject IDs. 

517 

518 Raises 

519 ------ 

520 ValueError 

521 Raised if DiaSource ID does not exist in the database. 

522 """ 

523 raise NotImplementedError() 

524 

525 @abstractmethod 

526 def dailyJob(self) -> None: 

527 """Implement daily activities like cleanup/vacuum. 

528 

529 What should be done during daily activities is determined by 

530 specific implementation. 

531 """ 

532 raise NotImplementedError() 

533 

534 @abstractmethod 

535 def countUnassociatedObjects(self) -> int: 

536 """Return the number of DiaObjects that have only one DiaSource 

537 associated with them. 

538 

539 Used as part of ap_verify metrics. 

540 

541 Returns 

542 ------- 

543 count : `int` 

544 Number of DiaObjects with exactly one associated DiaSource. 

545 

546 Notes 

547 ----- 

548 This method can be very inefficient or slow in some implementations. 

549 """ 

550 raise NotImplementedError() 

551 

552 @classmethod 

553 def makeField(cls, doc: str) -> ConfigurableField: 

554 """Make a `~lsst.pex.config.ConfigurableField` for Apdb. 

555 

556 Parameters 

557 ---------- 

558 doc : `str` 

559 Help text for the field. 

560 

561 Returns 

562 ------- 

563 configurableField : `lsst.pex.config.ConfigurableField` 

564 A `~lsst.pex.config.ConfigurableField` for Apdb. 

565 """ 

566 return ConfigurableField(doc=doc, target=cls) 

567 

568 @property 

569 @abstractmethod 

570 def metadata(self) -> ApdbMetadata: 

571 """Object controlling access to APDB metadata (`ApdbMetadata`).""" 

572 raise NotImplementedError()