Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of ap_association. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""A simple implementation of source association task for ap_verify. 

23""" 

24 

25__all__ = ["AssociationConfig", "AssociationTask"] 

26 

27import numpy as np 

28import pandas as pd 

29from scipy.spatial import cKDTree 

30 

31import lsst.geom as geom 

32import lsst.pex.config as pexConfig 

33import lsst.pipe.base as pipeBase 

34 

35from .diaCalculation import DiaObjectCalculationTask 

36 

37# Enforce an error for unsafe column/array value setting in pandas. 

38pd.options.mode.chained_assignment = 'raise' 

39 

40 

41class AssociationConfig(pexConfig.Config): 

42 """Config class for AssociationTask. 

43 """ 

44 maxDistArcSeconds = pexConfig.Field( 

45 dtype=float, 

46 doc='Maximum distance in arcseconds to test for a DIASource to be a ' 

47 'match to a DIAObject.', 

48 default=1.0, 

49 ) 

50 diaCalculation = pexConfig.ConfigurableField( 

51 target=DiaObjectCalculationTask, 

52 doc="Task to compute summary statistics for DiaObjects.", 

53 ) 

54 

55 def setDefaults(self): 

56 self.diaCalculation.plugins = ["ap_meanPosition", 

57 "ap_HTMIndex", 

58 "ap_nDiaSources", 

59 "ap_diaObjectFlag", 

60 "ap_meanFlux", 

61 "ap_percentileFlux", 

62 "ap_sigmaFlux", 

63 "ap_chi2Flux", 

64 "ap_madFlux", 

65 "ap_skewFlux", 

66 "ap_minMaxFlux", 

67 "ap_maxSlopeFlux", 

68 "ap_meanErrFlux", 

69 "ap_linearFit", 

70 "ap_stetsonJ", 

71 "ap_meanTotFlux", 

72 "ap_sigmaTotFlux"] 

73 

74 def validate(self): 

75 if "ap_HTMIndex" not in self.diaCalculation.plugins: 

76 raise ValueError("AssociationTask requires the ap_HTMIndex plugin " 

77 "be enabled for proper insertion into the Apdb.") 

78 

79 

80class AssociationTask(pipeBase.Task): 

81 """Associate DIAOSources into existing DIAObjects. 

82 

83 This task performs the association of detected DIASources in a visit 

84 with the previous DIAObjects detected over time. It also creates new 

85 DIAObjects out of DIASources that cannot be associated with previously 

86 detected DIAObjects. 

87 """ 

88 

89 ConfigClass = AssociationConfig 

90 _DefaultName = "association" 

91 

92 def __init__(self, **kwargs): 

93 pipeBase.Task.__init__(self, **kwargs) 

94 self.makeSubtask("diaCalculation") 

95 

96 @pipeBase.timeMethod 

97 def run(self, 

98 diaSources, 

99 diaObjects, 

100 diaSourceHistory): 

101 """Associate the new DiaSources with existing or new DiaObjects, 

102 updating the DiaObjects. 

103 

104 Parameters 

105 ---------- 

106 diaSources : `pandas.DataFrame` 

107 New DIASources to be associated with existing DIAObjects. 

108 diaObjects : `pandas.DataFrame` 

109 Existing diaObjects from the Apdb. 

110 diaSourceHistory : `pandas.DataFrame` 

111 12 month DiaSource history of the loaded ``diaObjects``. 

112 

113 Returns 

114 ------- 

115 result : `lsst.pipe.base.Struct` 

116 Results struct with components. 

117 

118 - ``diaObjects`` : Complete set of dia_objects covering the input 

119 exposure. Catalog contains newly created, updated, and untouched 

120 diaObjects. (`pandas.DataFrame`) 

121 - ``updatedDiaObjects`` : Subset of DiaObjects that were updated 

122 or created during processing. (`pandas.DataFrame`) 

123 - ``diaSources`` : DiaSources detected in this ccdVisit with 

124 associated diaObjectIds. (`pandas.DataFrame`) 

125 """ 

126 diaSources = self.check_dia_source_radec(diaSources) 

127 

128 matchResult = self.associate_sources(diaObjects, diaSources) 

129 

130 diaObjects = diaObjects.append(matchResult.new_dia_objects, 

131 sort=True) 

132 # Now that we know the DiaObjects our new DiaSources are associated 

133 # with, we index the new DiaSources the same way as the full history 

134 # and merge the tables. 

135 diaSources.set_index(["diaObjectId", "filterName", "diaSourceId"], 

136 drop=False, 

137 inplace=True) 

138 mergedDiaSourceHistory = diaSourceHistory.append(diaSources, sort=True) 

139 

140 # Get the current filter being processed. 

141 filterName = diaSources["filterName"][0] 

142 

143 # Update previously existing DIAObjects with the information from their 

144 # newly association DIASources and create new DIAObjects from 

145 # unassociated sources. 

146 updatedResults = self.diaCalculation.run( 

147 diaObjects, 

148 mergedDiaSourceHistory, 

149 matchResult.associated_dia_object_ids, 

150 filterName) 

151 

152 return pipeBase.Struct( 

153 diaObjects=updatedResults.diaObjectCat, 

154 updatedDiaObjects=updatedResults.updatedDiaObjects, 

155 diaSources=diaSources, 

156 ) 

157 

158 def check_dia_source_radec(self, dia_sources): 

159 """Check that all DiaSources have non-NaN values for RA/DEC. 

160 

161 If one or more DiaSources are found to have NaN values, throw a 

162 warning to the log with the ids of the offending sources. Drop them 

163 from the table. 

164 

165 Parameters 

166 ---------- 

167 dia_sources : `pandas.DataFrame` 

168 Input DiaSources to check for NaN values. 

169 

170 Returns 

171 ------- 

172 trimmed_sources : `pandas.DataFrame` 

173 DataFrame of DiaSources trimmed of all entries with NaN values for 

174 RA/DEC. 

175 """ 

176 nan_mask = (dia_sources.loc[:, "ra"].isnull() 

177 | dia_sources.loc[:, "decl"].isnull()) 

178 if np.any(nan_mask): 

179 nan_idxs = np.argwhere(nan_mask.to_numpy()).flatten() 

180 for nan_idx in nan_idxs: 

181 self.log.warning( 

182 "DiaSource %i has NaN value for RA/DEC, " 

183 "dropping from association." % 

184 dia_sources.loc[nan_idx, "diaSourceId"]) 

185 dia_sources = dia_sources[~nan_mask] 

186 return dia_sources 

187 

188 @pipeBase.timeMethod 

189 def associate_sources(self, dia_objects, dia_sources): 

190 """Associate the input DIASources with the catalog of DIAObjects. 

191 

192 DiaObject DataFrame must be indexed on ``diaObjectId``. 

193 

194 Parameters 

195 ---------- 

196 dia_objects : `pandas.DataFrame` 

197 Catalog of DIAObjects to attempt to associate the input 

198 DIASources into. 

199 dia_sources : `pandas.DataFrame` 

200 DIASources to associate into the DIAObjectCollection. 

201 

202 Returns 

203 ------- 

204 result : `lsst.pipeBase.Struct` 

205 Results struct with components: 

206 

207 - ``updated_and_new_dia_object_ids`` : ids of new and updated 

208 dia_objects as the result of association. (`list` of `int`). 

209 - ``new_dia_objects`` : Newly created DiaObjects from 

210 unassociated diaSources. (`pandas.DataFrame`) 

211 - ``n_updated_dia_objects`` : Number of previously known 

212 dia_objects with newly associated DIASources. (`int`). 

213 - ``n_new_dia_objects`` : Number of newly created DIAObjects from 

214 unassociated DIASources (`int`). 

215 - ``n_unupdated_dia_objects`` : Number of previous DIAObjects that 

216 were not associated to a new DIASource (`int`). 

217 """ 

218 

219 scores = self.score( 

220 dia_objects, dia_sources, 

221 self.config.maxDistArcSeconds * geom.arcseconds) 

222 match_result = self.match(dia_objects, dia_sources, scores) 

223 

224 self._add_association_meta_data(match_result) 

225 

226 return match_result 

227 

228 @pipeBase.timeMethod 

229 def score(self, dia_objects, dia_sources, max_dist): 

230 """Compute a quality score for each dia_source/dia_object pair 

231 between this catalog of DIAObjects and the input DIASource catalog. 

232 

233 ``max_dist`` sets maximum separation in arcseconds to consider a 

234 dia_source a possible match to a dia_object. If the pair is 

235 beyond this distance no score is computed. 

236 

237 Parameters 

238 ---------- 

239 dia_objects : `pandas.DataFrame` 

240 A contiguous catalog of DIAObjects to score against dia_sources. 

241 dia_sources : `pandas.DataFrame` 

242 A contiguous catalog of dia_sources to "score" based on distance 

243 and (in the future) other metrics. 

244 max_dist : `lsst.geom.Angle` 

245 Maximum allowed distance to compute a score for a given DIAObject 

246 DIASource pair. 

247 

248 Returns 

249 ------- 

250 result : `lsst.pipe.base.Struct` 

251 Results struct with components: 

252 

253 - ``scores``: array of floats of match quality updated DIAObjects 

254 (array-like of `float`). 

255 - ``obj_idxs``: indexes of the matched DIAObjects in the catalog. 

256 (array-like of `int`) 

257 - ``obj_ids``: array of floats of match quality updated DIAObjects 

258 (array-like of `int`). 

259 

260 Default values for these arrays are 

261 INF, -1, and -1 respectively for unassociated sources. 

262 """ 

263 scores = np.full(len(dia_sources), np.inf, dtype=np.float64) 

264 obj_idxs = np.full(len(dia_sources), -1, dtype=np.int) 

265 obj_ids = np.full(len(dia_sources), -1, dtype=np.int) 

266 

267 if len(dia_objects) == 0: 

268 return pipeBase.Struct( 

269 scores=scores, 

270 obj_idxs=obj_idxs, 

271 obj_ids=obj_ids) 

272 

273 spatial_tree = self._make_spatial_tree(dia_objects) 

274 

275 max_dist_rad = max_dist.asRadians() 

276 

277 vectors = self._radec_to_xyz(dia_sources) 

278 

279 scores, obj_idxs = spatial_tree.query( 

280 vectors, 

281 distance_upper_bound=max_dist_rad) 

282 matched_src_idxs = np.argwhere(np.isfinite(scores)) 

283 obj_ids[matched_src_idxs] = dia_objects.index.to_numpy()[ 

284 obj_idxs[matched_src_idxs]] 

285 

286 return pipeBase.Struct( 

287 scores=scores, 

288 obj_idxs=obj_idxs, 

289 obj_ids=obj_ids) 

290 

291 def _make_spatial_tree(self, dia_objects): 

292 """Create a searchable kd-tree the input dia_object positions. 

293 

294 Parameters 

295 ---------- 

296 dia_objects : `pandas.DataFrame` 

297 A catalog of DIAObjects to create the tree from. 

298 

299 Returns 

300 ------- 

301 kd_tree : `scipy.spatical.cKDTree` 

302 Searchable kd-tree created from the positions of the DIAObjects. 

303 """ 

304 vectors = self._radec_to_xyz(dia_objects) 

305 return cKDTree(vectors) 

306 

307 def _radec_to_xyz(self, catalog): 

308 """Convert input ra/dec coordinates to spherical unit-vectors. 

309 

310 Parameters 

311 ---------- 

312 catalog : `pandas.DataFrame` 

313 Catalog to produce spherical unit-vector from. 

314 

315 Returns 

316 ------- 

317 vectors : `numpy.ndarray`, (N, 3) 

318 Output unit-vectors 

319 """ 

320 ras = np.radians(catalog["ra"]) 

321 decs = np.radians(catalog["decl"]) 

322 vectors = np.empty((len(ras), 3)) 

323 

324 sin_dec = np.sin(np.pi / 2 - decs) 

325 vectors[:, 0] = sin_dec * np.cos(ras) 

326 vectors[:, 1] = sin_dec * np.sin(ras) 

327 vectors[:, 2] = np.cos(np.pi / 2 - decs) 

328 

329 return vectors 

330 

331 @pipeBase.timeMethod 

332 def match(self, dia_objects, dia_sources, score_struct): 

333 """Match DIAsources to DIAObjects given a score and create new 

334 DIAObject Ids for new unassociated DIASources. 

335 

336 Parameters 

337 ---------- 

338 dia_objects : `pandas.DataFrame` 

339 A SourceCatalog of DIAObjects to associate to DIASources. 

340 dia_sources : `pandas.DataFrame` 

341 A contiguous catalog of dia_sources for which the set of scores 

342 has been computed on with DIAObjectCollection.score. 

343 score_struct : `lsst.pipe.base.Struct` 

344 Results struct with components: 

345 

346 - ``scores``: array of floats of match quality 

347 updated DIAObjects (array-like of `float`). 

348 - ``obj_ids``: array of floats of match quality 

349 updated DIAObjects (array-like of `int`). 

350 - ``obj_idxs``: indexes of the matched DIAObjects in the catalog. 

351 (array-like of `int`) 

352 

353 Default values for these arrays are 

354 INF, -1 and -1 respectively for unassociated sources. 

355 

356 Returns 

357 ------- 

358 result : `lsst.pipeBase.Struct` 

359 Results struct with components: 

360 

361 - ``updated_and_new_dia_object_ids`` : ids of new and updated 

362 dia_objects as the result of association. (`list` of `int`). 

363 - ``new_dia_objects`` : Newly created DiaObjects from unassociated 

364 diaSources. (`pandas.DataFrame`) 

365 - ``n_updated_dia_objects`` : Number of previously know dia_objects 

366 with newly associated DIASources. (`int`). 

367 - ``n_new_dia_objects`` : Number of newly created DIAObjects from 

368 unassociated DIASources (`int`). 

369 - ``n_unupdated_dia_objects`` : Number of previous DIAObjects that 

370 were not associated to a new DIASource (`int`). 

371 """ 

372 

373 n_previous_dia_objects = len(dia_objects) 

374 used_dia_object = np.zeros(n_previous_dia_objects, dtype=np.bool) 

375 used_dia_source = np.zeros(len(dia_sources), dtype=np.bool) 

376 associated_dia_object_ids = np.zeros(len(dia_sources), 

377 dtype=np.uint64) 

378 new_dia_objects = [] 

379 

380 n_updated_dia_objects = 0 

381 n_new_dia_objects = 0 

382 

383 # We sort from best match to worst to effectively perform a 

384 # "handshake" match where both the DIASources and DIAObjects agree 

385 # their the best match. By sorting this way, scores with NaN (those 

386 # sources that have no match and will create new DIAObjects) will be 

387 # placed at the end of the array. 

388 score_args = score_struct.scores.argsort(axis=None) 

389 for score_idx in score_args: 

390 if not np.isfinite(score_struct.scores[score_idx]): 

391 # Thanks to the sorting the rest of the sources will be 

392 # NaN for their score. We therefore exit the loop to append 

393 # sources to a existing DIAObject, leaving these for 

394 # the loop creating new objects. 

395 break 

396 dia_obj_idx = score_struct.obj_idxs[score_idx] 

397 if used_dia_object[dia_obj_idx]: 

398 continue 

399 used_dia_object[dia_obj_idx] = True 

400 used_dia_source[score_idx] = True 

401 obj_id = score_struct.obj_ids[score_idx] 

402 associated_dia_object_ids[score_idx] = obj_id 

403 n_updated_dia_objects += 1 

404 dia_sources.loc[score_idx, "diaObjectId"] = obj_id 

405 

406 # Argwhere returns a array shape (N, 1) so we access the index 

407 # thusly to retrieve the value rather than the tuple 

408 for (src_idx,) in np.argwhere(np.logical_not(used_dia_source)): 

409 src_id = dia_sources.loc[src_idx, "diaSourceId"] 

410 new_dia_objects.append(self._initialize_dia_object(src_id)) 

411 associated_dia_object_ids[src_idx] = src_id 

412 dia_sources.loc[src_idx, "diaObjectId"] = src_id 

413 n_new_dia_objects += 1 

414 

415 if len(new_dia_objects) > 0: 

416 new_dia_objects = pd.DataFrame(data=new_dia_objects) 

417 else: 

418 # Create a junk DiaObject to get the columns. 

419 tmpObj = self._initialize_dia_object(0) 

420 new_dia_objects = pd.DataFrame(data=new_dia_objects, 

421 columns=tmpObj.keys()) 

422 new_dia_objects.set_index("diaObjectId", inplace=True, drop=False) 

423 

424 # Return the ids of the DIAObjects in this DIAObjectCollection that 

425 # were updated or newly created. 

426 n_unassociated_dia_objects = \ 

427 n_previous_dia_objects - n_updated_dia_objects 

428 return pipeBase.Struct( 

429 associated_dia_object_ids=associated_dia_object_ids, 

430 new_dia_objects=new_dia_objects, 

431 n_updated_dia_objects=n_updated_dia_objects, 

432 n_new_dia_objects=n_new_dia_objects, 

433 n_unassociated_dia_objects=n_unassociated_dia_objects,) 

434 

435 def _initialize_dia_object(self, objId): 

436 """Create a new DiaObject with values required to be initialized by the 

437 Ppdb. 

438 

439 Parameters 

440 ---------- 

441 objid : `int` 

442 ``diaObjectId`` value for the of the new DiaObject. 

443 

444 Returns 

445 ------- 

446 diaObject : `dict` 

447 Newly created DiaObject with keys: 

448 

449 ``diaObjectId`` 

450 Unique DiaObjectId (`int`). 

451 ``pmParallaxNdata`` 

452 Number of data points used for parallax calculation (`int`). 

453 ``nearbyObj1`` 

454 Id of the a nearbyObject in the Object table (`int`). 

455 ``nearbyObj2`` 

456 Id of the a nearbyObject in the Object table (`int`). 

457 ``nearbyObj3`` 

458 Id of the a nearbyObject in the Object table (`int`). 

459 ``?PSFluxData`` 

460 Number of data points used to calculate point source flux 

461 summary statistics in each bandpass (`int`). 

462 """ 

463 new_dia_object = {"diaObjectId": objId, 

464 "pmParallaxNdata": 0, 

465 "nearbyObj1": 0, 

466 "nearbyObj2": 0, 

467 "nearbyObj3": 0, 

468 "flags": 0} 

469 for f in ["u", "g", "r", "i", "z", "y"]: 

470 new_dia_object["%sPSFluxNdata" % f] = 0 

471 return new_dia_object 

472 

473 def _add_association_meta_data(self, match_result): 

474 """Store summaries of the association step in the task metadata. 

475 

476 Parameters 

477 ---------- 

478 match_result : `lsst.pipeBase.Struct` 

479 Results struct with components: 

480 

481 - ``updated_and_new_dia_object_ids`` : ids new and updated 

482 dia_objects in the collection (`list` of `int`). 

483 - ``n_updated_dia_objects`` : Number of previously know dia_objects 

484 with newly associated DIASources. (`int`). 

485 - ``n_new_dia_objects`` : Number of newly created DIAObjects from 

486 unassociated DIASources (`int`). 

487 - ``n_unupdated_dia_objects`` : Number of previous DIAObjects that 

488 were not associated to a new DIASource (`int`). 

489 """ 

490 self.metadata.add('numUpdatedDiaObjects', 

491 match_result.n_updated_dia_objects) 

492 self.metadata.add('numNewDiaObjects', 

493 match_result.n_new_dia_objects) 

494 self.metadata.add('numUnassociatedDiaObjects', 

495 match_result.n_unassociated_dia_objects)