Coverage for python/lsst/dax/apdb/apdb.py: 89%

74 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2024-04-24 09:59 +0000

1# This file is part of dax_apdb. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ["ApdbConfig", "Apdb"] 

25 

26import os 

27from abc import ABC, abstractmethod 

28from collections.abc import Iterable, Mapping 

29from typing import TYPE_CHECKING, cast 

30 

31import astropy.time 

32import pandas 

33from lsst.pex.config import Config, ConfigurableField, Field 

34from lsst.resources import ResourcePath, ResourcePathExpression 

35from lsst.sphgeom import Region 

36 

37from .apdbIndex import ApdbIndex 

38from .apdbSchema import ApdbTables 

39from .factory import make_apdb 

40from .schema_model import Table 

41 

42if TYPE_CHECKING: 

43 from .apdbMetadata import ApdbMetadata 

44 from .versionTuple import VersionTuple 

45 

46 

47def _data_file_name(basename: str) -> str: 

48 """Return path name of a data file in sdm_schemas package.""" 

49 return os.path.join("${SDM_SCHEMAS_DIR}", "yml", basename) 

50 

51 

52class ApdbConfig(Config): 

53 """Part of Apdb configuration common to all implementations.""" 

54 

55 read_sources_months = Field[int](doc="Number of months of history to read from DiaSource", default=12) 

56 read_forced_sources_months = Field[int]( 

57 doc="Number of months of history to read from DiaForcedSource", default=12 

58 ) 

59 schema_file = Field[str]( 

60 doc="Location of (YAML) configuration file with standard schema", default=_data_file_name("apdb.yaml") 

61 ) 

62 schema_name = Field[str](doc="Name of the schema in YAML configuration file.", default="ApdbSchema") 

63 extra_schema_file = Field[str]( 

64 doc="Location of (YAML) configuration file with extra schema, " 

65 "definitions in this file are merged with the definitions in " 

66 "'schema_file', extending or replacing parts of the schema.", 

67 default=None, 

68 optional=True, 

69 deprecated="This field is deprecated, its value is not used.", 

70 ) 

71 use_insert_id = Field[bool]( 

72 doc=( 

73 "If True, make and fill additional tables used for replication. " 

74 "Databases created with earlier versions of APDB may not have these tables, " 

75 "and corresponding methods will not work for them." 

76 ), 

77 default=False, 

78 ) 

79 replica_chunk_seconds = Field[int]( 

80 default=600, 

81 doc="Time extent for replica chunks, new chunks are created every specified number of seconds.", 

82 ) 

83 

84 

85class Apdb(ABC): 

86 """Abstract interface for APDB.""" 

87 

88 ConfigClass = ApdbConfig 

89 

90 @classmethod 

91 def from_config(cls, config: ApdbConfig) -> Apdb: 

92 """Create Ppdb instance from configuration object. 

93 

94 Parameters 

95 ---------- 

96 config : `ApdbConfig` 

97 Configuration object, type of this object determines type of the 

98 Apdb implementation. 

99 

100 Returns 

101 ------- 

102 apdb : `apdb` 

103 Instance of `Apdb` class. 

104 """ 

105 return make_apdb(config) 

106 

107 @classmethod 

108 def from_uri(cls, uri: ResourcePathExpression) -> Apdb: 

109 """Make Apdb instance from a serialized configuration. 

110 

111 Parameters 

112 ---------- 

113 uri : `~lsst.resources.ResourcePathExpression` 

114 URI or local file path pointing to a file with serialized 

115 configuration, or a string with a "label:" prefix. In the latter 

116 case, the configuration will be looked up from an APDB index file 

117 using the label name that follows the prefix. The APDB index file's 

118 location is determined by the ``DAX_APDB_INDEX_URI`` environment 

119 variable. 

120 

121 Returns 

122 ------- 

123 apdb : `apdb` 

124 Instance of `Apdb` class, the type of the returned instance is 

125 determined by configuration. 

126 """ 

127 if isinstance(uri, str) and uri.startswith("label:"): 

128 tag, _, label = uri.partition(":") 

129 index = ApdbIndex() 

130 # Current format for config files is "pex_config" 

131 format = "pex_config" 

132 uri = index.get_apdb_uri(label, format) 

133 path = ResourcePath(uri) 

134 config_str = path.read().decode() 

135 # Assume that this is ApdbConfig, make_apdb will raise if not. 

136 config = cast(ApdbConfig, Config._fromPython(config_str)) 

137 return make_apdb(config) 

138 

139 @classmethod 

140 @abstractmethod 

141 def apdbImplementationVersion(cls) -> VersionTuple: 

142 """Return version number for current APDB implementation. 

143 

144 Returns 

145 ------- 

146 version : `VersionTuple` 

147 Version of the code defined in implementation class. 

148 """ 

149 raise NotImplementedError() 

150 

151 @abstractmethod 

152 def apdbSchemaVersion(self) -> VersionTuple: 

153 """Return schema version number as defined in config file. 

154 

155 Returns 

156 ------- 

157 version : `VersionTuple` 

158 Version of the schema defined in schema config file. 

159 """ 

160 raise NotImplementedError() 

161 

162 @abstractmethod 

163 def tableDef(self, table: ApdbTables) -> Table | None: 

164 """Return table schema definition for a given table. 

165 

166 Parameters 

167 ---------- 

168 table : `ApdbTables` 

169 One of the known APDB tables. 

170 

171 Returns 

172 ------- 

173 tableSchema : `.schema_model.Table` or `None` 

174 Table schema description, `None` is returned if table is not 

175 defined by this implementation. 

176 """ 

177 raise NotImplementedError() 

178 

179 @abstractmethod 

180 def getDiaObjects(self, region: Region) -> pandas.DataFrame: 

181 """Return catalog of DiaObject instances from a given region. 

182 

183 This method returns only the last version of each DiaObject. Some 

184 records in a returned catalog may be outside the specified region, it 

185 is up to a client to ignore those records or cleanup the catalog before 

186 futher use. 

187 

188 Parameters 

189 ---------- 

190 region : `lsst.sphgeom.Region` 

191 Region to search for DIAObjects. 

192 

193 Returns 

194 ------- 

195 catalog : `pandas.DataFrame` 

196 Catalog containing DiaObject records for a region that may be a 

197 superset of the specified region. 

198 """ 

199 raise NotImplementedError() 

200 

201 @abstractmethod 

202 def getDiaSources( 

203 self, region: Region, object_ids: Iterable[int] | None, visit_time: astropy.time.Time 

204 ) -> pandas.DataFrame | None: 

205 """Return catalog of DiaSource instances from a given region. 

206 

207 Parameters 

208 ---------- 

209 region : `lsst.sphgeom.Region` 

210 Region to search for DIASources. 

211 object_ids : iterable [ `int` ], optional 

212 List of DiaObject IDs to further constrain the set of returned 

213 sources. If `None` then returned sources are not constrained. If 

214 list is empty then empty catalog is returned with a correct 

215 schema. 

216 visit_time : `astropy.time.Time` 

217 Time of the current visit. 

218 

219 Returns 

220 ------- 

221 catalog : `pandas.DataFrame`, or `None` 

222 Catalog containing DiaSource records. `None` is returned if 

223 ``read_sources_months`` configuration parameter is set to 0. 

224 

225 Notes 

226 ----- 

227 This method returns DiaSource catalog for a region with additional 

228 filtering based on DiaObject IDs. Only a subset of DiaSource history 

229 is returned limited by ``read_sources_months`` config parameter, w.r.t. 

230 ``visit_time``. If ``object_ids`` is empty then an empty catalog is 

231 always returned with the correct schema (columns/types). If 

232 ``object_ids`` is `None` then no filtering is performed and some of the 

233 returned records may be outside the specified region. 

234 """ 

235 raise NotImplementedError() 

236 

237 @abstractmethod 

238 def getDiaForcedSources( 

239 self, region: Region, object_ids: Iterable[int] | None, visit_time: astropy.time.Time 

240 ) -> pandas.DataFrame | None: 

241 """Return catalog of DiaForcedSource instances from a given region. 

242 

243 Parameters 

244 ---------- 

245 region : `lsst.sphgeom.Region` 

246 Region to search for DIASources. 

247 object_ids : iterable [ `int` ], optional 

248 List of DiaObject IDs to further constrain the set of returned 

249 sources. If list is empty then empty catalog is returned with a 

250 correct schema. If `None` then returned sources are not 

251 constrained. Some implementations may not support latter case. 

252 visit_time : `astropy.time.Time` 

253 Time of the current visit. 

254 

255 Returns 

256 ------- 

257 catalog : `pandas.DataFrame`, or `None` 

258 Catalog containing DiaSource records. `None` is returned if 

259 ``read_forced_sources_months`` configuration parameter is set to 0. 

260 

261 Raises 

262 ------ 

263 NotImplementedError 

264 May be raised by some implementations if ``object_ids`` is `None`. 

265 

266 Notes 

267 ----- 

268 This method returns DiaForcedSource catalog for a region with 

269 additional filtering based on DiaObject IDs. Only a subset of DiaSource 

270 history is returned limited by ``read_forced_sources_months`` config 

271 parameter, w.r.t. ``visit_time``. If ``object_ids`` is empty then an 

272 empty catalog is always returned with the correct schema 

273 (columns/types). If ``object_ids`` is `None` then no filtering is 

274 performed and some of the returned records may be outside the specified 

275 region. 

276 """ 

277 raise NotImplementedError() 

278 

279 @abstractmethod 

280 def containsVisitDetector(self, visit: int, detector: int) -> bool: 

281 """Test whether data for a given visit-detector is present in the APDB. 

282 

283 Parameters 

284 ---------- 

285 visit, detector : `int` 

286 The ID of the visit-detector to search for. 

287 

288 Returns 

289 ------- 

290 present : `bool` 

291 `True` if some DiaObject, DiaSource, or DiaForcedSource records 

292 exist for the specified observation, `False` otherwise. 

293 """ 

294 raise NotImplementedError() 

295 

296 @abstractmethod 

297 def getSSObjects(self) -> pandas.DataFrame: 

298 """Return catalog of SSObject instances. 

299 

300 Returns 

301 ------- 

302 catalog : `pandas.DataFrame` 

303 Catalog containing SSObject records, all existing records are 

304 returned. 

305 """ 

306 raise NotImplementedError() 

307 

308 @abstractmethod 

309 def store( 

310 self, 

311 visit_time: astropy.time.Time, 

312 objects: pandas.DataFrame, 

313 sources: pandas.DataFrame | None = None, 

314 forced_sources: pandas.DataFrame | None = None, 

315 ) -> None: 

316 """Store all three types of catalogs in the database. 

317 

318 Parameters 

319 ---------- 

320 visit_time : `astropy.time.Time` 

321 Time of the visit. 

322 objects : `pandas.DataFrame` 

323 Catalog with DiaObject records. 

324 sources : `pandas.DataFrame`, optional 

325 Catalog with DiaSource records. 

326 forced_sources : `pandas.DataFrame`, optional 

327 Catalog with DiaForcedSource records. 

328 

329 Notes 

330 ----- 

331 This methods takes DataFrame catalogs, their schema must be 

332 compatible with the schema of APDB table: 

333 

334 - column names must correspond to database table columns 

335 - types and units of the columns must match database definitions, 

336 no unit conversion is performed presently 

337 - columns that have default values in database schema can be 

338 omitted from catalog 

339 - this method knows how to fill interval-related columns of DiaObject 

340 (validityStart, validityEnd) they do not need to appear in a 

341 catalog 

342 - source catalogs have ``diaObjectId`` column associating sources 

343 with objects 

344 """ 

345 raise NotImplementedError() 

346 

347 @abstractmethod 

348 def storeSSObjects(self, objects: pandas.DataFrame) -> None: 

349 """Store or update SSObject catalog. 

350 

351 Parameters 

352 ---------- 

353 objects : `pandas.DataFrame` 

354 Catalog with SSObject records. 

355 

356 Notes 

357 ----- 

358 If SSObjects with matching IDs already exist in the database, their 

359 records will be updated with the information from provided records. 

360 """ 

361 raise NotImplementedError() 

362 

363 @abstractmethod 

364 def reassignDiaSources(self, idMap: Mapping[int, int]) -> None: 

365 """Associate DiaSources with SSObjects, dis-associating them 

366 from DiaObjects. 

367 

368 Parameters 

369 ---------- 

370 idMap : `Mapping` 

371 Maps DiaSource IDs to their new SSObject IDs. 

372 

373 Raises 

374 ------ 

375 ValueError 

376 Raised if DiaSource ID does not exist in the database. 

377 """ 

378 raise NotImplementedError() 

379 

380 @abstractmethod 

381 def dailyJob(self) -> None: 

382 """Implement daily activities like cleanup/vacuum. 

383 

384 What should be done during daily activities is determined by 

385 specific implementation. 

386 """ 

387 raise NotImplementedError() 

388 

389 @abstractmethod 

390 def countUnassociatedObjects(self) -> int: 

391 """Return the number of DiaObjects that have only one DiaSource 

392 associated with them. 

393 

394 Used as part of ap_verify metrics. 

395 

396 Returns 

397 ------- 

398 count : `int` 

399 Number of DiaObjects with exactly one associated DiaSource. 

400 

401 Notes 

402 ----- 

403 This method can be very inefficient or slow in some implementations. 

404 """ 

405 raise NotImplementedError() 

406 

407 @classmethod 

408 def makeField(cls, doc: str) -> ConfigurableField: 

409 """Make a `~lsst.pex.config.ConfigurableField` for Apdb. 

410 

411 Parameters 

412 ---------- 

413 doc : `str` 

414 Help text for the field. 

415 

416 Returns 

417 ------- 

418 configurableField : `lsst.pex.config.ConfigurableField` 

419 A `~lsst.pex.config.ConfigurableField` for Apdb. 

420 """ 

421 return ConfigurableField(doc=doc, target=cls) 

422 

423 @property 

424 @abstractmethod 

425 def metadata(self) -> ApdbMetadata: 

426 """Object controlling access to APDB metadata (`ApdbMetadata`).""" 

427 raise NotImplementedError()