Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of ap_association. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""A simple implementation of source association task for ap_verify. 

23""" 

24 

25__all__ = ["AssociationConfig", "AssociationTask"] 

26 

27import numpy as np 

28import pandas as pd 

29from scipy.spatial import cKDTree 

30 

31import lsst.geom as geom 

32import lsst.pex.config as pexConfig 

33import lsst.pipe.base as pipeBase 

34 

35# Enforce an error for unsafe column/array value setting in pandas. 

36pd.options.mode.chained_assignment = 'raise' 

37 

38 

39class AssociationConfig(pexConfig.Config): 

40 """Config class for AssociationTask. 

41 """ 

42 maxDistArcSeconds = pexConfig.Field( 

43 dtype=float, 

44 doc='Maximum distance in arcseconds to test for a DIASource to be a ' 

45 'match to a DIAObject.', 

46 default=1.0, 

47 ) 

48 

49 

50class AssociationTask(pipeBase.Task): 

51 """Associate DIAOSources into existing DIAObjects. 

52 

53 This task performs the association of detected DIASources in a visit 

54 with the previous DIAObjects detected over time. It also creates new 

55 DIAObjects out of DIASources that cannot be associated with previously 

56 detected DIAObjects. 

57 """ 

58 

59 ConfigClass = AssociationConfig 

60 _DefaultName = "association" 

61 

62 @pipeBase.timeMethod 

63 def run(self, 

64 diaSources, 

65 diaObjects, 

66 diaSourceHistory): 

67 """Associate the new DiaSources with existing or new DiaObjects, 

68 updating the DiaObjects. 

69 

70 Parameters 

71 ---------- 

72 diaSources : `pandas.DataFrame` 

73 New DIASources to be associated with existing DIAObjects. 

74 diaObjects : `pandas.DataFrame` 

75 Existing diaObjects from the Apdb. 

76 diaSourceHistory : `pandas.DataFrame` 

77 12 month DiaSource history of the loaded ``diaObjects``. 

78 

79 Returns 

80 ------- 

81 result : `lsst.pipe.base.Struct` 

82 Results struct with components. 

83 

84 - ``diaObjects`` : Complete set of dia_objects covering the input 

85 exposure. Catalog contains newly created, updated, and untouched 

86 diaObjects. (`pandas.DataFrame`) 

87 - ``updatedDiaObjects`` : Subset of DiaObjects that were updated 

88 or created during processing. (`pandas.DataFrame`) 

89 - ``matchedDiaObjectIds`` : DiaSources detected in this ccdVisit with 

90 associated diaObjectIds. (`numpy.ndarray`) 

91 """ 

92 diaSources = self.check_dia_source_radec(diaSources) 

93 

94 matchResult = self.associate_sources(diaObjects, diaSources) 

95 

96 # Now that we know the DiaObjects our new DiaSources are associated 

97 # with, we index the new DiaSources the same way as the full history 

98 # and merge the tables. 

99 diaSources.set_index(["diaObjectId", "filterName", "diaSourceId"], 

100 drop=False, 

101 inplace=True) 

102 # Append the newly created DiaObjectds. 

103 diaObjects = diaObjects.append(matchResult.new_dia_objects, 

104 sort=True) 

105 # Double check to make sure there are no duplicates in the DiaObject 

106 # table after association. 

107 if diaObjects.index.has_duplicates: 

108 raise RuntimeError( 

109 "Duplicate DiaObjects created after association. This is " 

110 "likely due to re-running data with an already populated " 

111 "Apdb. If this was not the case then there was an unexpected " 

112 "failure in Association while matching and creating new " 

113 "DiaObjects and should be reported. Exiting.") 

114 

115 return pipeBase.Struct( 

116 diaObjects=diaObjects, 

117 diaSources=diaSources, 

118 matchedDiaObjectIds=matchResult.associated_dia_object_ids, 

119 ) 

120 

121 def check_dia_source_radec(self, dia_sources): 

122 """Check that all DiaSources have non-NaN values for RA/DEC. 

123 

124 If one or more DiaSources are found to have NaN values, throw a 

125 warning to the log with the ids of the offending sources. Drop them 

126 from the table. 

127 

128 Parameters 

129 ---------- 

130 dia_sources : `pandas.DataFrame` 

131 Input DiaSources to check for NaN values. 

132 

133 Returns 

134 ------- 

135 trimmed_sources : `pandas.DataFrame` 

136 DataFrame of DiaSources trimmed of all entries with NaN values for 

137 RA/DEC. 

138 """ 

139 nan_mask = (dia_sources.loc[:, "ra"].isnull() 

140 | dia_sources.loc[:, "decl"].isnull()) 

141 if np.any(nan_mask): 

142 nan_idxs = np.argwhere(nan_mask.to_numpy()).flatten() 

143 for nan_idx in nan_idxs: 

144 self.log.warning( 

145 "DiaSource %i has NaN value for RA/DEC, " 

146 "dropping from association." % 

147 dia_sources.loc[nan_idx, "diaSourceId"]) 

148 dia_sources = dia_sources[~nan_mask] 

149 return dia_sources 

150 

151 @pipeBase.timeMethod 

152 def associate_sources(self, dia_objects, dia_sources): 

153 """Associate the input DIASources with the catalog of DIAObjects. 

154 

155 DiaObject DataFrame must be indexed on ``diaObjectId``. 

156 

157 Parameters 

158 ---------- 

159 dia_objects : `pandas.DataFrame` 

160 Catalog of DIAObjects to attempt to associate the input 

161 DIASources into. 

162 dia_sources : `pandas.DataFrame` 

163 DIASources to associate into the DIAObjectCollection. 

164 

165 Returns 

166 ------- 

167 result : `lsst.pipeBase.Struct` 

168 Results struct with components: 

169 

170 - ``updated_and_new_dia_object_ids`` : ids of new and updated 

171 dia_objects as the result of association. (`list` of `int`). 

172 - ``new_dia_objects`` : Newly created DiaObjects from 

173 unassociated diaSources. (`pandas.DataFrame`) 

174 - ``n_updated_dia_objects`` : Number of previously known 

175 dia_objects with newly associated DIASources. (`int`). 

176 - ``n_new_dia_objects`` : Number of newly created DIAObjects from 

177 unassociated DIASources (`int`). 

178 - ``n_unupdated_dia_objects`` : Number of previous DIAObjects that 

179 were not associated to a new DIASource (`int`). 

180 """ 

181 

182 scores = self.score( 

183 dia_objects, dia_sources, 

184 self.config.maxDistArcSeconds * geom.arcseconds) 

185 match_result = self.match(dia_objects, dia_sources, scores) 

186 

187 self._add_association_meta_data(match_result) 

188 

189 return match_result 

190 

191 @pipeBase.timeMethod 

192 def score(self, dia_objects, dia_sources, max_dist): 

193 """Compute a quality score for each dia_source/dia_object pair 

194 between this catalog of DIAObjects and the input DIASource catalog. 

195 

196 ``max_dist`` sets maximum separation in arcseconds to consider a 

197 dia_source a possible match to a dia_object. If the pair is 

198 beyond this distance no score is computed. 

199 

200 Parameters 

201 ---------- 

202 dia_objects : `pandas.DataFrame` 

203 A contiguous catalog of DIAObjects to score against dia_sources. 

204 dia_sources : `pandas.DataFrame` 

205 A contiguous catalog of dia_sources to "score" based on distance 

206 and (in the future) other metrics. 

207 max_dist : `lsst.geom.Angle` 

208 Maximum allowed distance to compute a score for a given DIAObject 

209 DIASource pair. 

210 

211 Returns 

212 ------- 

213 result : `lsst.pipe.base.Struct` 

214 Results struct with components: 

215 

216 - ``scores``: array of floats of match quality updated DIAObjects 

217 (array-like of `float`). 

218 - ``obj_idxs``: indexes of the matched DIAObjects in the catalog. 

219 (array-like of `int`) 

220 - ``obj_ids``: array of floats of match quality updated DIAObjects 

221 (array-like of `int`). 

222 

223 Default values for these arrays are 

224 INF, -1, and -1 respectively for unassociated sources. 

225 """ 

226 scores = np.full(len(dia_sources), np.inf, dtype=np.float64) 

227 obj_idxs = np.full(len(dia_sources), -1, dtype=np.int64) 

228 obj_ids = np.full(len(dia_sources), 0, dtype=np.int64) 

229 

230 if len(dia_objects) == 0: 

231 return pipeBase.Struct( 

232 scores=scores, 

233 obj_idxs=obj_idxs, 

234 obj_ids=obj_ids) 

235 

236 spatial_tree = self._make_spatial_tree(dia_objects) 

237 

238 max_dist_rad = max_dist.asRadians() 

239 

240 vectors = self._radec_to_xyz(dia_sources) 

241 

242 scores, obj_idxs = spatial_tree.query( 

243 vectors, 

244 distance_upper_bound=max_dist_rad) 

245 matched_src_idxs = np.argwhere(np.isfinite(scores)) 

246 obj_ids[matched_src_idxs] = dia_objects.index.to_numpy()[ 

247 obj_idxs[matched_src_idxs]] 

248 

249 return pipeBase.Struct( 

250 scores=scores, 

251 obj_idxs=obj_idxs, 

252 obj_ids=obj_ids) 

253 

254 def _make_spatial_tree(self, dia_objects): 

255 """Create a searchable kd-tree the input dia_object positions. 

256 

257 Parameters 

258 ---------- 

259 dia_objects : `pandas.DataFrame` 

260 A catalog of DIAObjects to create the tree from. 

261 

262 Returns 

263 ------- 

264 kd_tree : `scipy.spatical.cKDTree` 

265 Searchable kd-tree created from the positions of the DIAObjects. 

266 """ 

267 vectors = self._radec_to_xyz(dia_objects) 

268 return cKDTree(vectors) 

269 

270 def _radec_to_xyz(self, catalog): 

271 """Convert input ra/dec coordinates to spherical unit-vectors. 

272 

273 Parameters 

274 ---------- 

275 catalog : `pandas.DataFrame` 

276 Catalog to produce spherical unit-vector from. 

277 

278 Returns 

279 ------- 

280 vectors : `numpy.ndarray`, (N, 3) 

281 Output unit-vectors 

282 """ 

283 ras = np.radians(catalog["ra"]) 

284 decs = np.radians(catalog["decl"]) 

285 vectors = np.empty((len(ras), 3)) 

286 

287 sin_dec = np.sin(np.pi / 2 - decs) 

288 vectors[:, 0] = sin_dec * np.cos(ras) 

289 vectors[:, 1] = sin_dec * np.sin(ras) 

290 vectors[:, 2] = np.cos(np.pi / 2 - decs) 

291 

292 return vectors 

293 

294 @pipeBase.timeMethod 

295 def match(self, dia_objects, dia_sources, score_struct): 

296 """Match DIAsources to DIAObjects given a score and create new 

297 DIAObject Ids for new unassociated DIASources. 

298 

299 Parameters 

300 ---------- 

301 dia_objects : `pandas.DataFrame` 

302 A SourceCatalog of DIAObjects to associate to DIASources. 

303 dia_sources : `pandas.DataFrame` 

304 A contiguous catalog of dia_sources for which the set of scores 

305 has been computed on with DIAObjectCollection.score. 

306 score_struct : `lsst.pipe.base.Struct` 

307 Results struct with components: 

308 

309 - ``scores``: array of floats of match quality 

310 updated DIAObjects (array-like of `float`). 

311 - ``obj_ids``: array of floats of match quality 

312 updated DIAObjects (array-like of `int`). 

313 - ``obj_idxs``: indexes of the matched DIAObjects in the catalog. 

314 (array-like of `int`) 

315 

316 Default values for these arrays are 

317 INF, -1 and -1 respectively for unassociated sources. 

318 

319 Returns 

320 ------- 

321 result : `lsst.pipeBase.Struct` 

322 Results struct with components: 

323 

324 - ``updated_and_new_dia_object_ids`` : ids of new and updated 

325 dia_objects as the result of association. (`list` of `int`). 

326 - ``new_dia_objects`` : Newly created DiaObjects from unassociated 

327 diaSources. (`pandas.DataFrame`) 

328 - ``n_updated_dia_objects`` : Number of previously know dia_objects 

329 with newly associated DIASources. (`int`). 

330 - ``n_new_dia_objects`` : Number of newly created DIAObjects from 

331 unassociated DIASources (`int`). 

332 - ``n_unupdated_dia_objects`` : Number of previous DIAObjects that 

333 were not associated to a new DIASource (`int`). 

334 """ 

335 

336 n_previous_dia_objects = len(dia_objects) 

337 used_dia_object = np.zeros(n_previous_dia_objects, dtype=bool) 

338 used_dia_source = np.zeros(len(dia_sources), dtype=bool) 

339 associated_dia_object_ids = np.zeros(len(dia_sources), 

340 dtype=np.uint64) 

341 new_dia_objects = [] 

342 

343 n_updated_dia_objects = 0 

344 n_new_dia_objects = 0 

345 

346 # We sort from best match to worst to effectively perform a 

347 # "handshake" match where both the DIASources and DIAObjects agree 

348 # their the best match. By sorting this way, scores with NaN (those 

349 # sources that have no match and will create new DIAObjects) will be 

350 # placed at the end of the array. 

351 score_args = score_struct.scores.argsort(axis=None) 

352 for score_idx in score_args: 

353 if not np.isfinite(score_struct.scores[score_idx]): 

354 # Thanks to the sorting the rest of the sources will be 

355 # NaN for their score. We therefore exit the loop to append 

356 # sources to a existing DIAObject, leaving these for 

357 # the loop creating new objects. 

358 break 

359 dia_obj_idx = score_struct.obj_idxs[score_idx] 

360 if used_dia_object[dia_obj_idx]: 

361 continue 

362 used_dia_object[dia_obj_idx] = True 

363 used_dia_source[score_idx] = True 

364 obj_id = score_struct.obj_ids[score_idx] 

365 associated_dia_object_ids[score_idx] = obj_id 

366 n_updated_dia_objects += 1 

367 dia_sources.loc[score_idx, "diaObjectId"] = obj_id 

368 

369 # Argwhere returns a array shape (N, 1) so we access the index 

370 # thusly to retrieve the value rather than the tuple 

371 for (src_idx,) in np.argwhere(np.logical_not(used_dia_source)): 

372 src_id = dia_sources.loc[src_idx, "diaSourceId"] 

373 new_dia_objects.append(self._initialize_dia_object(src_id)) 

374 associated_dia_object_ids[src_idx] = src_id 

375 dia_sources.loc[src_idx, "diaObjectId"] = src_id 

376 n_new_dia_objects += 1 

377 

378 if len(new_dia_objects) > 0: 

379 new_dia_objects = pd.DataFrame(data=new_dia_objects) 

380 else: 

381 # Create a junk DiaObject to get the columns. 

382 tmpObj = self._initialize_dia_object(0) 

383 new_dia_objects = pd.DataFrame(data=new_dia_objects, 

384 columns=tmpObj.keys()) 

385 new_dia_objects.set_index("diaObjectId", inplace=True, drop=False) 

386 

387 # Return the ids of the DIAObjects in this DIAObjectCollection that 

388 # were updated or newly created. 

389 n_unassociated_dia_objects = \ 

390 n_previous_dia_objects - n_updated_dia_objects 

391 return pipeBase.Struct( 

392 associated_dia_object_ids=associated_dia_object_ids, 

393 new_dia_objects=new_dia_objects, 

394 n_updated_dia_objects=n_updated_dia_objects, 

395 n_new_dia_objects=n_new_dia_objects, 

396 n_unassociated_dia_objects=n_unassociated_dia_objects,) 

397 

398 def _initialize_dia_object(self, objId): 

399 """Create a new DiaObject with values required to be initialized by the 

400 Ppdb. 

401 

402 Parameters 

403 ---------- 

404 objid : `int` 

405 ``diaObjectId`` value for the of the new DiaObject. 

406 

407 Returns 

408 ------- 

409 diaObject : `dict` 

410 Newly created DiaObject with keys: 

411 

412 ``diaObjectId`` 

413 Unique DiaObjectId (`int`). 

414 ``pmParallaxNdata`` 

415 Number of data points used for parallax calculation (`int`). 

416 ``nearbyObj1`` 

417 Id of the a nearbyObject in the Object table (`int`). 

418 ``nearbyObj2`` 

419 Id of the a nearbyObject in the Object table (`int`). 

420 ``nearbyObj3`` 

421 Id of the a nearbyObject in the Object table (`int`). 

422 ``?PSFluxData`` 

423 Number of data points used to calculate point source flux 

424 summary statistics in each bandpass (`int`). 

425 """ 

426 new_dia_object = {"diaObjectId": objId, 

427 "pmParallaxNdata": 0, 

428 "nearbyObj1": 0, 

429 "nearbyObj2": 0, 

430 "nearbyObj3": 0, 

431 "flags": 0} 

432 for f in ["u", "g", "r", "i", "z", "y"]: 

433 new_dia_object["%sPSFluxNdata" % f] = 0 

434 return new_dia_object 

435 

436 def _add_association_meta_data(self, match_result): 

437 """Store summaries of the association step in the task metadata. 

438 

439 Parameters 

440 ---------- 

441 match_result : `lsst.pipeBase.Struct` 

442 Results struct with components: 

443 

444 - ``updated_and_new_dia_object_ids`` : ids new and updated 

445 dia_objects in the collection (`list` of `int`). 

446 - ``n_updated_dia_objects`` : Number of previously know dia_objects 

447 with newly associated DIASources. (`int`). 

448 - ``n_new_dia_objects`` : Number of newly created DIAObjects from 

449 unassociated DIASources (`int`). 

450 - ``n_unupdated_dia_objects`` : Number of previous DIAObjects that 

451 were not associated to a new DIASource (`int`). 

452 """ 

453 self.metadata.add('numUpdatedDiaObjects', 

454 match_result.n_updated_dia_objects) 

455 self.metadata.add('numNewDiaObjects', 

456 match_result.n_new_dia_objects) 

457 self.metadata.add('numUnassociatedDiaObjects', 

458 match_result.n_unassociated_dia_objects)