Coverage for python/lsst/dax/apdb/apdb.py: 88%

69 statements  

« prev     ^ index     » next       coverage.py v7.5.1, created at 2024-05-16 03:20 -0700

1# This file is part of dax_apdb. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ["ApdbConfig", "Apdb"] 

25 

26import os 

27from abc import ABC, abstractmethod 

28from collections.abc import Iterable, Mapping 

29from typing import TYPE_CHECKING, cast 

30 

31import astropy.time 

32import pandas 

33from lsst.pex.config import Config, ConfigurableField, Field 

34from lsst.resources import ResourcePath, ResourcePathExpression 

35from lsst.sphgeom import Region 

36 

37from .apdbIndex import ApdbIndex 

38from .apdbSchema import ApdbTables 

39from .factory import make_apdb 

40from .schema_model import Table 

41 

42if TYPE_CHECKING: 

43 from .apdbMetadata import ApdbMetadata 

44 

45 

46def _data_file_name(basename: str) -> str: 

47 """Return path name of a data file in sdm_schemas package.""" 

48 return os.path.join("${SDM_SCHEMAS_DIR}", "yml", basename) 

49 

50 

51class ApdbConfig(Config): 

52 """Part of Apdb configuration common to all implementations.""" 

53 

54 read_sources_months = Field[int](doc="Number of months of history to read from DiaSource", default=12) 

55 read_forced_sources_months = Field[int]( 

56 doc="Number of months of history to read from DiaForcedSource", default=12 

57 ) 

58 schema_file = Field[str]( 

59 doc="Location of (YAML) configuration file with standard schema", default=_data_file_name("apdb.yaml") 

60 ) 

61 schema_name = Field[str](doc="Name of the schema in YAML configuration file.", default="ApdbSchema") 

62 extra_schema_file = Field[str]( 

63 doc="Location of (YAML) configuration file with extra schema, " 

64 "definitions in this file are merged with the definitions in " 

65 "'schema_file', extending or replacing parts of the schema.", 

66 default=None, 

67 optional=True, 

68 deprecated="This field is deprecated, its value is not used.", 

69 ) 

70 use_insert_id = Field[bool]( 

71 doc=( 

72 "If True, make and fill additional tables used for replication. " 

73 "Databases created with earlier versions of APDB may not have these tables, " 

74 "and corresponding methods will not work for them." 

75 ), 

76 default=False, 

77 ) 

78 replica_chunk_seconds = Field[int]( 

79 default=600, 

80 doc="Time extent for replica chunks, new chunks are created every specified number of seconds.", 

81 ) 

82 

83 

84class Apdb(ABC): 

85 """Abstract interface for APDB.""" 

86 

87 ConfigClass = ApdbConfig 

88 

89 @classmethod 

90 def from_config(cls, config: ApdbConfig) -> Apdb: 

91 """Create Ppdb instance from configuration object. 

92 

93 Parameters 

94 ---------- 

95 config : `ApdbConfig` 

96 Configuration object, type of this object determines type of the 

97 Apdb implementation. 

98 

99 Returns 

100 ------- 

101 apdb : `apdb` 

102 Instance of `Apdb` class. 

103 """ 

104 return make_apdb(config) 

105 

106 @classmethod 

107 def from_uri(cls, uri: ResourcePathExpression) -> Apdb: 

108 """Make Apdb instance from a serialized configuration. 

109 

110 Parameters 

111 ---------- 

112 uri : `~lsst.resources.ResourcePathExpression` 

113 URI or local file path pointing to a file with serialized 

114 configuration, or a string with a "label:" prefix. In the latter 

115 case, the configuration will be looked up from an APDB index file 

116 using the label name that follows the prefix. The APDB index file's 

117 location is determined by the ``DAX_APDB_INDEX_URI`` environment 

118 variable. 

119 

120 Returns 

121 ------- 

122 apdb : `apdb` 

123 Instance of `Apdb` class, the type of the returned instance is 

124 determined by configuration. 

125 """ 

126 if isinstance(uri, str) and uri.startswith("label:"): 

127 tag, _, label = uri.partition(":") 

128 index = ApdbIndex() 

129 # Current format for config files is "pex_config" 

130 format = "pex_config" 

131 uri = index.get_apdb_uri(label, format) 

132 path = ResourcePath(uri) 

133 config_str = path.read().decode() 

134 # Assume that this is ApdbConfig, make_apdb will raise if not. 

135 config = cast(ApdbConfig, Config._fromPython(config_str)) 

136 return make_apdb(config) 

137 

138 @abstractmethod 

139 def tableDef(self, table: ApdbTables) -> Table | None: 

140 """Return table schema definition for a given table. 

141 

142 Parameters 

143 ---------- 

144 table : `ApdbTables` 

145 One of the known APDB tables. 

146 

147 Returns 

148 ------- 

149 tableSchema : `.schema_model.Table` or `None` 

150 Table schema description, `None` is returned if table is not 

151 defined by this implementation. 

152 """ 

153 raise NotImplementedError() 

154 

155 @abstractmethod 

156 def getDiaObjects(self, region: Region) -> pandas.DataFrame: 

157 """Return catalog of DiaObject instances from a given region. 

158 

159 This method returns only the last version of each DiaObject, 

160 and may return only the subset of the DiaObject columns needed 

161 for AP association. Some 

162 records in a returned catalog may be outside the specified region, it 

163 is up to a client to ignore those records or cleanup the catalog before 

164 futher use. 

165 

166 Parameters 

167 ---------- 

168 region : `lsst.sphgeom.Region` 

169 Region to search for DIAObjects. 

170 

171 Returns 

172 ------- 

173 catalog : `pandas.DataFrame` 

174 Catalog containing DiaObject records for a region that may be a 

175 superset of the specified region. 

176 """ 

177 raise NotImplementedError() 

178 

179 @abstractmethod 

180 def getDiaSources( 

181 self, region: Region, object_ids: Iterable[int] | None, visit_time: astropy.time.Time 

182 ) -> pandas.DataFrame | None: 

183 """Return catalog of DiaSource instances from a given region. 

184 

185 Parameters 

186 ---------- 

187 region : `lsst.sphgeom.Region` 

188 Region to search for DIASources. 

189 object_ids : iterable [ `int` ], optional 

190 List of DiaObject IDs to further constrain the set of returned 

191 sources. If `None` then returned sources are not constrained. If 

192 list is empty then empty catalog is returned with a correct 

193 schema. 

194 visit_time : `astropy.time.Time` 

195 Time of the current visit. 

196 

197 Returns 

198 ------- 

199 catalog : `pandas.DataFrame`, or `None` 

200 Catalog containing DiaSource records. `None` is returned if 

201 ``read_sources_months`` configuration parameter is set to 0. 

202 

203 Notes 

204 ----- 

205 This method returns DiaSource catalog for a region with additional 

206 filtering based on DiaObject IDs. Only a subset of DiaSource history 

207 is returned limited by ``read_sources_months`` config parameter, w.r.t. 

208 ``visit_time``. If ``object_ids`` is empty then an empty catalog is 

209 always returned with the correct schema (columns/types). If 

210 ``object_ids`` is `None` then no filtering is performed and some of the 

211 returned records may be outside the specified region. 

212 """ 

213 raise NotImplementedError() 

214 

215 @abstractmethod 

216 def getDiaForcedSources( 

217 self, region: Region, object_ids: Iterable[int] | None, visit_time: astropy.time.Time 

218 ) -> pandas.DataFrame | None: 

219 """Return catalog of DiaForcedSource instances from a given region. 

220 

221 Parameters 

222 ---------- 

223 region : `lsst.sphgeom.Region` 

224 Region to search for DIASources. 

225 object_ids : iterable [ `int` ], optional 

226 List of DiaObject IDs to further constrain the set of returned 

227 sources. If list is empty then empty catalog is returned with a 

228 correct schema. If `None` then returned sources are not 

229 constrained. Some implementations may not support latter case. 

230 visit_time : `astropy.time.Time` 

231 Time of the current visit. 

232 

233 Returns 

234 ------- 

235 catalog : `pandas.DataFrame`, or `None` 

236 Catalog containing DiaSource records. `None` is returned if 

237 ``read_forced_sources_months`` configuration parameter is set to 0. 

238 

239 Raises 

240 ------ 

241 NotImplementedError 

242 May be raised by some implementations if ``object_ids`` is `None`. 

243 

244 Notes 

245 ----- 

246 This method returns DiaForcedSource catalog for a region with 

247 additional filtering based on DiaObject IDs. Only a subset of DiaSource 

248 history is returned limited by ``read_forced_sources_months`` config 

249 parameter, w.r.t. ``visit_time``. If ``object_ids`` is empty then an 

250 empty catalog is always returned with the correct schema 

251 (columns/types). If ``object_ids`` is `None` then no filtering is 

252 performed and some of the returned records may be outside the specified 

253 region. 

254 """ 

255 raise NotImplementedError() 

256 

257 @abstractmethod 

258 def containsVisitDetector(self, visit: int, detector: int) -> bool: 

259 """Test whether any sources for a given visit-detector are present in 

260 the APDB. 

261 

262 Parameters 

263 ---------- 

264 visit, detector : `int` 

265 The ID of the visit-detector to search for. 

266 

267 Returns 

268 ------- 

269 present : `bool` 

270 `True` if at least one DiaSource or DiaForcedSource record 

271 may exist for the specified observation, `False` otherwise. 

272 """ 

273 raise NotImplementedError() 

274 

275 @abstractmethod 

276 def getSSObjects(self) -> pandas.DataFrame: 

277 """Return catalog of SSObject instances. 

278 

279 Returns 

280 ------- 

281 catalog : `pandas.DataFrame` 

282 Catalog containing SSObject records, all existing records are 

283 returned. 

284 """ 

285 raise NotImplementedError() 

286 

287 @abstractmethod 

288 def store( 

289 self, 

290 visit_time: astropy.time.Time, 

291 objects: pandas.DataFrame, 

292 sources: pandas.DataFrame | None = None, 

293 forced_sources: pandas.DataFrame | None = None, 

294 ) -> None: 

295 """Store all three types of catalogs in the database. 

296 

297 Parameters 

298 ---------- 

299 visit_time : `astropy.time.Time` 

300 Time of the visit. 

301 objects : `pandas.DataFrame` 

302 Catalog with DiaObject records. 

303 sources : `pandas.DataFrame`, optional 

304 Catalog with DiaSource records. 

305 forced_sources : `pandas.DataFrame`, optional 

306 Catalog with DiaForcedSource records. 

307 

308 Notes 

309 ----- 

310 This methods takes DataFrame catalogs, their schema must be 

311 compatible with the schema of APDB table: 

312 

313 - column names must correspond to database table columns 

314 - types and units of the columns must match database definitions, 

315 no unit conversion is performed presently 

316 - columns that have default values in database schema can be 

317 omitted from catalog 

318 - this method knows how to fill interval-related columns of DiaObject 

319 (validityStart, validityEnd) they do not need to appear in a 

320 catalog 

321 - source catalogs have ``diaObjectId`` column associating sources 

322 with objects 

323 

324 This operation need not be atomic, but DiaSources and DiaForcedSources 

325 will not be stored until all DiaObjects are stored. 

326 """ 

327 raise NotImplementedError() 

328 

329 @abstractmethod 

330 def storeSSObjects(self, objects: pandas.DataFrame) -> None: 

331 """Store or update SSObject catalog. 

332 

333 Parameters 

334 ---------- 

335 objects : `pandas.DataFrame` 

336 Catalog with SSObject records. 

337 

338 Notes 

339 ----- 

340 If SSObjects with matching IDs already exist in the database, their 

341 records will be updated with the information from provided records. 

342 """ 

343 raise NotImplementedError() 

344 

345 @abstractmethod 

346 def reassignDiaSources(self, idMap: Mapping[int, int]) -> None: 

347 """Associate DiaSources with SSObjects, dis-associating them 

348 from DiaObjects. 

349 

350 Parameters 

351 ---------- 

352 idMap : `Mapping` 

353 Maps DiaSource IDs to their new SSObject IDs. 

354 

355 Raises 

356 ------ 

357 ValueError 

358 Raised if DiaSource ID does not exist in the database. 

359 """ 

360 raise NotImplementedError() 

361 

362 @abstractmethod 

363 def dailyJob(self) -> None: 

364 """Implement daily activities like cleanup/vacuum. 

365 

366 What should be done during daily activities is determined by 

367 specific implementation. 

368 """ 

369 raise NotImplementedError() 

370 

371 @abstractmethod 

372 def countUnassociatedObjects(self) -> int: 

373 """Return the number of DiaObjects that have only one DiaSource 

374 associated with them. 

375 

376 Used as part of ap_verify metrics. 

377 

378 Returns 

379 ------- 

380 count : `int` 

381 Number of DiaObjects with exactly one associated DiaSource. 

382 

383 Notes 

384 ----- 

385 This method can be very inefficient or slow in some implementations. 

386 """ 

387 raise NotImplementedError() 

388 

389 @classmethod 

390 def makeField(cls, doc: str) -> ConfigurableField: 

391 """Make a `~lsst.pex.config.ConfigurableField` for Apdb. 

392 

393 Parameters 

394 ---------- 

395 doc : `str` 

396 Help text for the field. 

397 

398 Returns 

399 ------- 

400 configurableField : `lsst.pex.config.ConfigurableField` 

401 A `~lsst.pex.config.ConfigurableField` for Apdb. 

402 """ 

403 return ConfigurableField(doc=doc, target=cls) 

404 

405 @property 

406 @abstractmethod 

407 def metadata(self) -> ApdbMetadata: 

408 """Object controlling access to APDB metadata (`ApdbMetadata`).""" 

409 raise NotImplementedError()