Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of ap_association. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""A simple implementation of source association task for ap_verify. 

23""" 

24 

25__all__ = ["AssociationConfig", "AssociationTask"] 

26 

27import numpy as np 

28import pandas as pd 

29from scipy.spatial import cKDTree 

30 

31import lsst.geom as geom 

32import lsst.pex.config as pexConfig 

33import lsst.pipe.base as pipeBase 

34 

35# Enforce an error for unsafe column/array value setting in pandas. 

36pd.options.mode.chained_assignment = 'raise' 

37 

38 

39class AssociationConfig(pexConfig.Config): 

40 """Config class for AssociationTask. 

41 """ 

42 maxDistArcSeconds = pexConfig.Field( 

43 dtype=float, 

44 doc='Maximum distance in arcseconds to test for a DIASource to be a ' 

45 'match to a DIAObject.', 

46 default=1.0, 

47 ) 

48 

49 

50class AssociationTask(pipeBase.Task): 

51 """Associate DIAOSources into existing DIAObjects. 

52 

53 This task performs the association of detected DIASources in a visit 

54 with the previous DIAObjects detected over time. It also creates new 

55 DIAObjects out of DIASources that cannot be associated with previously 

56 detected DIAObjects. 

57 """ 

58 

59 ConfigClass = AssociationConfig 

60 _DefaultName = "association" 

61 

62 @pipeBase.timeMethod 

63 def run(self, 

64 diaSources, 

65 diaObjects): 

66 """Associate the new DiaSources with existing DiaObjects. 

67 

68 Parameters 

69 ---------- 

70 diaSources : `pandas.DataFrame` 

71 New DIASources to be associated with existing DIAObjects. 

72 diaObjects : `pandas.DataFrame` 

73 Existing diaObjects from the Apdb. 

74 

75 Returns 

76 ------- 

77 result : `lsst.pipe.base.Struct` 

78 Results struct with components. 

79 

80 - ``"matchedDiaSources"`` : DiaSources that were matched. Matched 

81 Sources have their diaObjectId updated and set to the id of the 

82 diaObject they were matched to. (`pandas.DataFrame`) 

83 - ``"unAssocDiaSources"`` : DiaSources that were not matched. 

84 Unassociated sources have their diaObject set to 0 as they 

85 were not associated with any existing DiaObjects. 

86 (`pandas.DataFrame`) 

87 - ``"nUpdatedDiaObjects"`` : Number of DiaObjects that were 

88 matched to new DiaSources. (`int`) 

89 - ``"nUnassociatedDiaObjects"`` : Number of DiaObjects that were 

90 not matched a new DiaSource. (`int`) 

91 """ 

92 diaSources = self.check_dia_source_radec(diaSources) 

93 if len(diaObjects) == 0: 

94 return pipeBase.Struct( 

95 matchedDiaSources=pd.DataFrame(columns=diaSources.columns), 

96 unAssocDiaSources=diaSources, 

97 nUpdatedDiaObjects=0, 

98 nUnassociatedDiaObjects=0) 

99 

100 matchResult = self.associate_sources(diaObjects, diaSources) 

101 

102 mask = matchResult.diaSources["diaObjectId"] != 0 

103 

104 return pipeBase.Struct( 

105 matchedDiaSources=matchResult.diaSources[mask].reset_index(drop=True), 

106 unAssocDiaSources=matchResult.diaSources[~mask].reset_index(drop=True), 

107 nUpdatedDiaObjects=matchResult.nUpdatedDiaObjects, 

108 nUnassociatedDiaObjects=matchResult.nUnassociatedDiaObjects) 

109 

110 def check_dia_source_radec(self, dia_sources): 

111 """Check that all DiaSources have non-NaN values for RA/DEC. 

112 

113 If one or more DiaSources are found to have NaN values, throw a 

114 warning to the log with the ids of the offending sources. Drop them 

115 from the table. 

116 

117 Parameters 

118 ---------- 

119 dia_sources : `pandas.DataFrame` 

120 Input DiaSources to check for NaN values. 

121 

122 Returns 

123 ------- 

124 trimmed_sources : `pandas.DataFrame` 

125 DataFrame of DiaSources trimmed of all entries with NaN values for 

126 RA/DEC. 

127 """ 

128 nan_mask = (dia_sources.loc[:, "ra"].isnull() 

129 | dia_sources.loc[:, "decl"].isnull()) 

130 if np.any(nan_mask): 

131 nan_idxs = np.argwhere(nan_mask.to_numpy()).flatten() 

132 for nan_idx in nan_idxs: 

133 self.log.warning( 

134 "DiaSource %i has NaN value for RA/DEC, " 

135 "dropping from association." % 

136 dia_sources.loc[nan_idx, "diaSourceId"]) 

137 dia_sources = dia_sources[~nan_mask] 

138 return dia_sources 

139 

140 @pipeBase.timeMethod 

141 def associate_sources(self, dia_objects, dia_sources): 

142 """Associate the input DIASources with the catalog of DIAObjects. 

143 

144 DiaObject DataFrame must be indexed on ``diaObjectId``. 

145 

146 Parameters 

147 ---------- 

148 dia_objects : `pandas.DataFrame` 

149 Catalog of DIAObjects to attempt to associate the input 

150 DIASources into. 

151 dia_sources : `pandas.DataFrame` 

152 DIASources to associate into the DIAObjectCollection. 

153 

154 Returns 

155 ------- 

156 result : `lsst.pipe.base.Struct` 

157 Results struct with components. 

158 

159 - ``"diaSources"`` : Full set of diaSources both matched and not. 

160 (`pandas.DataFrame`) 

161 - ``"nUpdatedDiaObjects"`` : Number of DiaObjects that were 

162 associated. (`int`) 

163 - ``"nUnassociatedDiaObjects"`` : Number of DiaObjects that were 

164 not matched a new DiaSource. (`int`) 

165 """ 

166 scores = self.score( 

167 dia_objects, dia_sources, 

168 self.config.maxDistArcSeconds * geom.arcseconds) 

169 match_result = self.match(dia_objects, dia_sources, scores) 

170 

171 return match_result 

172 

173 @pipeBase.timeMethod 

174 def score(self, dia_objects, dia_sources, max_dist): 

175 """Compute a quality score for each dia_source/dia_object pair 

176 between this catalog of DIAObjects and the input DIASource catalog. 

177 

178 ``max_dist`` sets maximum separation in arcseconds to consider a 

179 dia_source a possible match to a dia_object. If the pair is 

180 beyond this distance no score is computed. 

181 

182 Parameters 

183 ---------- 

184 dia_objects : `pandas.DataFrame` 

185 A contiguous catalog of DIAObjects to score against dia_sources. 

186 dia_sources : `pandas.DataFrame` 

187 A contiguous catalog of dia_sources to "score" based on distance 

188 and (in the future) other metrics. 

189 max_dist : `lsst.geom.Angle` 

190 Maximum allowed distance to compute a score for a given DIAObject 

191 DIASource pair. 

192 

193 Returns 

194 ------- 

195 result : `lsst.pipe.base.Struct` 

196 Results struct with components: 

197 

198 - ``"scores"``: array of floats of match quality updated DIAObjects 

199 (array-like of `float`). 

200 - ``"obj_idxs"``: indexes of the matched DIAObjects in the catalog. 

201 (array-like of `int`) 

202 - ``"obj_ids"``: array of floats of match quality updated DIAObjects 

203 (array-like of `int`). 

204 

205 Default values for these arrays are 

206 INF, -1, and -1 respectively for unassociated sources. 

207 """ 

208 scores = np.full(len(dia_sources), np.inf, dtype=np.float64) 

209 obj_idxs = np.full(len(dia_sources), -1, dtype=np.int64) 

210 obj_ids = np.full(len(dia_sources), 0, dtype=np.int64) 

211 

212 if len(dia_objects) == 0: 

213 return pipeBase.Struct( 

214 scores=scores, 

215 obj_idxs=obj_idxs, 

216 obj_ids=obj_ids) 

217 

218 spatial_tree = self._make_spatial_tree(dia_objects) 

219 

220 max_dist_rad = max_dist.asRadians() 

221 

222 vectors = self._radec_to_xyz(dia_sources) 

223 

224 scores, obj_idxs = spatial_tree.query( 

225 vectors, 

226 distance_upper_bound=max_dist_rad) 

227 matched_src_idxs = np.argwhere(np.isfinite(scores)) 

228 obj_ids[matched_src_idxs] = dia_objects.index.to_numpy()[ 

229 obj_idxs[matched_src_idxs]] 

230 

231 return pipeBase.Struct( 

232 scores=scores, 

233 obj_idxs=obj_idxs, 

234 obj_ids=obj_ids) 

235 

236 def _make_spatial_tree(self, dia_objects): 

237 """Create a searchable kd-tree the input dia_object positions. 

238 

239 Parameters 

240 ---------- 

241 dia_objects : `pandas.DataFrame` 

242 A catalog of DIAObjects to create the tree from. 

243 

244 Returns 

245 ------- 

246 kd_tree : `scipy.spatical.cKDTree` 

247 Searchable kd-tree created from the positions of the DIAObjects. 

248 """ 

249 vectors = self._radec_to_xyz(dia_objects) 

250 return cKDTree(vectors) 

251 

252 def _radec_to_xyz(self, catalog): 

253 """Convert input ra/dec coordinates to spherical unit-vectors. 

254 

255 Parameters 

256 ---------- 

257 catalog : `pandas.DataFrame` 

258 Catalog to produce spherical unit-vector from. 

259 

260 Returns 

261 ------- 

262 vectors : `numpy.ndarray`, (N, 3) 

263 Output unit-vectors 

264 """ 

265 ras = np.radians(catalog["ra"]) 

266 decs = np.radians(catalog["decl"]) 

267 vectors = np.empty((len(ras), 3)) 

268 

269 sin_dec = np.sin(np.pi / 2 - decs) 

270 vectors[:, 0] = sin_dec * np.cos(ras) 

271 vectors[:, 1] = sin_dec * np.sin(ras) 

272 vectors[:, 2] = np.cos(np.pi / 2 - decs) 

273 

274 return vectors 

275 

276 @pipeBase.timeMethod 

277 def match(self, dia_objects, dia_sources, score_struct): 

278 """Match DIAsources to DiaObjects given a score. 

279 

280 Parameters 

281 ---------- 

282 dia_objects : `pandas.DataFrame` 

283 A SourceCatalog of DIAObjects to associate to DIASources. 

284 dia_sources : `pandas.DataFrame` 

285 A contiguous catalog of dia_sources for which the set of scores 

286 has been computed on with DIAObjectCollection.score. 

287 score_struct : `lsst.pipe.base.Struct` 

288 Results struct with components: 

289 

290 - ``"scores"``: array of floats of match quality 

291 updated DIAObjects (array-like of `float`). 

292 - ``"obj_ids"``: array of floats of match quality 

293 updated DIAObjects (array-like of `int`). 

294 - ``"obj_idxs"``: indexes of the matched DIAObjects in the catalog. 

295 (array-like of `int`) 

296 

297 Default values for these arrays are 

298 INF, -1 and -1 respectively for unassociated sources. 

299 

300 Returns 

301 ------- 

302 result : `lsst.pipe.base.Struct` 

303 Results struct with components. 

304 

305 - ``"diaSources"`` : Full set of diaSources both matched and not. 

306 (`pandas.DataFrame`) 

307 - ``"nUpdatedDiaObjects"`` : Number of DiaObjects that were 

308 associated. (`int`) 

309 - ``"nUnassociatedDiaObjects"`` : Number of DiaObjects that were 

310 not matched a new DiaSource. (`int`) 

311 """ 

312 n_previous_dia_objects = len(dia_objects) 

313 used_dia_object = np.zeros(n_previous_dia_objects, dtype=bool) 

314 used_dia_source = np.zeros(len(dia_sources), dtype=bool) 

315 associated_dia_object_ids = np.zeros(len(dia_sources), 

316 dtype=np.uint64) 

317 n_updated_dia_objects = 0 

318 

319 # We sort from best match to worst to effectively perform a 

320 # "handshake" match where both the DIASources and DIAObjects agree 

321 # their the best match. By sorting this way, scores with NaN (those 

322 # sources that have no match and will create new DIAObjects) will be 

323 # placed at the end of the array. 

324 score_args = score_struct.scores.argsort(axis=None) 

325 for score_idx in score_args: 

326 if not np.isfinite(score_struct.scores[score_idx]): 

327 # Thanks to the sorting the rest of the sources will be 

328 # NaN for their score. We therefore exit the loop to append 

329 # sources to a existing DIAObject, leaving these for 

330 # the loop creating new objects. 

331 break 

332 dia_obj_idx = score_struct.obj_idxs[score_idx] 

333 if used_dia_object[dia_obj_idx]: 

334 continue 

335 used_dia_object[dia_obj_idx] = True 

336 used_dia_source[score_idx] = True 

337 obj_id = score_struct.obj_ids[score_idx] 

338 associated_dia_object_ids[score_idx] = obj_id 

339 dia_sources.loc[score_idx, "diaObjectId"] = obj_id 

340 n_updated_dia_objects += 1 

341 

342 return pipeBase.Struct( 

343 diaSources=dia_sources, 

344 nUpdatedDiaObjects=n_updated_dia_objects, 

345 nUnassociatedDiaObjects=(n_previous_dia_objects 

346 - n_updated_dia_objects))