Coverage for python/lsst/pipe/tasks/simpleAssociation.py: 20%

83 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2024-05-03 03:41 -0700

1# This file is part of pipe_tasks. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""Simple association algorithm for DRP. 

23Adapted from http://github.com/LSSTDESC/dia_pipe 

24""" 

25__all__ = ["SimpleAssociationConfig", "SimpleAssociationTask"] 

26 

27import numpy as np 

28import pandas as pd 

29 

30import lsst.afw.table as afwTable 

31import lsst.geom as geom 

32import lsst.pex.config as pexConfig 

33import lsst.pipe.base as pipeBase 

34from lsst.meas.base import IdGenerator 

35 

36from .associationUtils import query_disc, eq2xyz, toIndex 

37 

38 

39class SimpleAssociationConfig(pexConfig.Config): 

40 """Configuration parameters for the SimpleAssociationTask 

41 """ 

42 tolerance = pexConfig.Field( 

43 dtype=float, 

44 doc='maximum distance to match sources together in arcsec', 

45 default=0.5 

46 ) 

47 nside = pexConfig.Field( 

48 dtype=int, 

49 doc='Healpix nside value used for indexing', 

50 default=2**18, 

51 ) 

52 

53 

54class SimpleAssociationTask(pipeBase.Task): 

55 """Construct DiaObjects from a DataFrame of DIASources by spatially 

56 associating the sources. 

57 

58 Represents a simple, brute force algorithm, 2-way matching of DiaSources 

59 into. DiaObjects. Algorithm picks the nearest, first match within the 

60 matching radius of a DiaObject to associate a source to for simplicity. 

61 """ 

62 ConfigClass = SimpleAssociationConfig 

63 _DefaultName = "simpleAssociation" 

64 

65 def run(self, diaSources, idGenerator=None): 

66 """Associate DiaSources into a collection of DiaObjects using a 

67 brute force matching algorithm. 

68 

69 Reproducible is for the same input data is assured by ordering the 

70 DiaSource data by ccdVisit ordering. 

71 

72 Parameters 

73 ---------- 

74 diaSources : `pandas.DataFrame` 

75 DiaSources grouped by CcdVisitId to spatially associate into 

76 DiaObjects. 

77 idGenerator : `lsst.meas.base.IdGenerator`, optional 

78 Object that generates Object IDs and random number generator seeds. 

79 

80 Returns 

81 ------- 

82 results : `lsst.pipe.base.Struct` 

83 Results struct with attributes: 

84 

85 ``assocDiaSources`` 

86 Table of DiaSources with updated values for the DiaObjects 

87 they are spatially associated to (`pandas.DataFrame`). 

88 ``diaObjects`` 

89 Table of DiaObjects from matching DiaSources 

90 (`pandas.DataFrame`). 

91 

92 """ 

93 

94 # Expected indexes include diaSourceId or meaningless range index 

95 # If meaningless range index, drop it, else keep it. 

96 doDropIndex = diaSources.index.names[0] is None 

97 diaSources.reset_index(inplace=True, drop=doDropIndex) 

98 

99 # Sort by ccdVisit and diaSourceId to get a reproducible ordering for 

100 # the association. 

101 diaSources.set_index(["ccdVisitId", "diaSourceId"], inplace=True) 

102 

103 # Empty lists to store matching and location data. 

104 diaObjectCat = [] 

105 diaObjectCoords = [] 

106 healPixIndices = [] 

107 

108 # Create Id factory and catalog for creating DiaObjectIds. 

109 if idGenerator is None: 

110 idGenerator = IdGenerator() 

111 idCat = idGenerator.make_source_catalog(afwTable.SourceTable.makeMinimalSchema()) 

112 

113 for ccdVisit in diaSources.index.levels[0]: 

114 # For the first ccdVisit, just copy the DiaSource info into the 

115 # diaObject data to create the first set of Objects. 

116 ccdVisitSources = diaSources.loc[ccdVisit] 

117 if len(diaObjectCat) == 0: 

118 for diaSourceId, diaSrc in ccdVisitSources.iterrows(): 

119 self.addNewDiaObject(diaSrc, 

120 diaSources, 

121 ccdVisit, 

122 diaSourceId, 

123 diaObjectCat, 

124 idCat, 

125 diaObjectCoords, 

126 healPixIndices) 

127 continue 

128 # Temp list to store DiaObjects already used for this ccdVisit. 

129 usedMatchIndicies = [] 

130 # Run over subsequent data. 

131 for diaSourceId, diaSrc in ccdVisitSources.iterrows(): 

132 # Find matches. 

133 matchResult = self.findMatches(diaSrc["ra"], 

134 diaSrc["dec"], 

135 2*self.config.tolerance, 

136 healPixIndices, 

137 diaObjectCat) 

138 dists = matchResult.dists 

139 matches = matchResult.matches 

140 # Create a new DiaObject if no match found. 

141 if dists is None: 

142 self.addNewDiaObject(diaSrc, 

143 diaSources, 

144 ccdVisit, 

145 diaSourceId, 

146 diaObjectCat, 

147 idCat, 

148 diaObjectCoords, 

149 healPixIndices) 

150 continue 

151 # If matched, update catalogs and arrays. 

152 if np.min(dists) < np.deg2rad(self.config.tolerance/3600): 

153 matchDistArg = np.argmin(dists) 

154 matchIndex = matches[matchDistArg] 

155 # Test to see if the DiaObject has been used. 

156 if np.isin([matchIndex], usedMatchIndicies).sum() < 1: 

157 self.updateCatalogs(matchIndex, 

158 diaSrc, 

159 diaSources, 

160 ccdVisit, 

161 diaSourceId, 

162 diaObjectCat, 

163 diaObjectCoords, 

164 healPixIndices) 

165 usedMatchIndicies.append(matchIndex) 

166 # If the matched DiaObject has already been used, create a 

167 # new DiaObject for this DiaSource. 

168 else: 

169 self.addNewDiaObject(diaSrc, 

170 diaSources, 

171 ccdVisit, 

172 diaSourceId, 

173 diaObjectCat, 

174 idCat, 

175 diaObjectCoords, 

176 healPixIndices) 

177 # Create new DiaObject if no match found within the matching 

178 # tolerance. 

179 else: 

180 self.addNewDiaObject(diaSrc, 

181 diaSources, 

182 ccdVisit, 

183 diaSourceId, 

184 diaObjectCat, 

185 idCat, 

186 diaObjectCoords, 

187 healPixIndices) 

188 

189 # Drop indices before returning associated diaSource catalog. 

190 diaSources.reset_index(inplace=True) 

191 diaSources.set_index("diaSourceId", inplace=True, verify_integrity=True) 

192 

193 objs = diaObjectCat if diaObjectCat else np.array([], dtype=[('diaObjectId', 'int64'), 

194 ('ra', 'float64'), 

195 ('dec', 'float64'), 

196 ('nDiaSources', 'int64')]) 

197 diaObjects = pd.DataFrame(data=objs) 

198 

199 if "diaObjectId" in diaObjects.columns: 

200 diaObjects.set_index("diaObjectId", inplace=True, verify_integrity=True) 

201 

202 return pipeBase.Struct( 

203 assocDiaSources=diaSources, 

204 diaObjects=diaObjects) 

205 

206 def addNewDiaObject(self, 

207 diaSrc, 

208 diaSources, 

209 ccdVisit, 

210 diaSourceId, 

211 diaObjCat, 

212 idCat, 

213 diaObjCoords, 

214 healPixIndices): 

215 """Create a new DiaObject and append its data. 

216 

217 Parameters 

218 ---------- 

219 diaSrc : `pandas.Series` 

220 Full unassociated DiaSource to create a DiaObject from. 

221 diaSources : `pandas.DataFrame` 

222 DiaSource catalog to update information in. The catalog is 

223 modified in place. 

224 ccdVisit : `int` 

225 Unique identifier of the ccdVisit where ``diaSrc`` was observed. 

226 diaSourceId : `int` 

227 Unique identifier of the DiaSource. 

228 diaObjectCat : `list` of `dict`s 

229 Catalog of diaObjects to append the new object o. 

230 idCat : `lsst.afw.table.SourceCatalog` 

231 Catalog with the IdFactory used to generate unique DiaObject 

232 identifiers. 

233 diaObjectCoords : `list` of `list`s of `lsst.geom.SpherePoint`s 

234 Set of coordinates of DiaSource locations that make up the 

235 DiaObject average coordinate. 

236 healPixIndices : `list` of `int`s 

237 HealPix indices representing the locations of each currently 

238 existing DiaObject. 

239 """ 

240 hpIndex = toIndex(self.config.nside, 

241 diaSrc["ra"], 

242 diaSrc["dec"]) 

243 healPixIndices.append(hpIndex) 

244 

245 sphPoint = geom.SpherePoint(diaSrc["ra"], 

246 diaSrc["dec"], 

247 geom.degrees) 

248 diaObjCoords.append([sphPoint]) 

249 

250 diaObjId = idCat.addNew().get("id") 

251 diaObjCat.append(self.createDiaObject(diaObjId, 

252 diaSrc["ra"], 

253 diaSrc["dec"])) 

254 diaSources.loc[(ccdVisit, diaSourceId), "diaObjectId"] = diaObjId 

255 

256 def updateCatalogs(self, 

257 matchIndex, 

258 diaSrc, 

259 diaSources, 

260 ccdVisit, 

261 diaSourceId, 

262 diaObjCat, 

263 diaObjCoords, 

264 healPixIndices): 

265 """Update DiaObject and DiaSource values after an association. 

266 

267 Parameters 

268 ---------- 

269 matchIndex : `int` 

270 Array index location of the DiaObject that ``diaSrc`` was 

271 associated to. 

272 diaSrc : `pandas.Series` 

273 Full unassociated DiaSource to create a DiaObject from. 

274 diaSources : `pandas.DataFrame` 

275 DiaSource catalog to update information in. The catalog is 

276 modified in place. 

277 ccdVisit : `int` 

278 Unique identifier of the ccdVisit where ``diaSrc`` was observed. 

279 diaSourceId : `int` 

280 Unique identifier of the DiaSource. 

281 diaObjectCat : `list` of `dict`s 

282 Catalog of diaObjects to append the new object o. 

283 diaObjectCoords : `list` of `list`s of `lsst.geom.SpherePoint`s 

284 Set of coordinates of DiaSource locations that make up the 

285 DiaObject average coordinate. 

286 healPixIndices : `list` of `int`s 

287 HealPix indices representing the locations of each currently 

288 existing DiaObject. 

289 """ 

290 # Update location and healPix index. 

291 sphPoint = geom.SpherePoint(diaSrc["ra"], 

292 diaSrc["dec"], 

293 geom.degrees) 

294 diaObjCoords[matchIndex].append(sphPoint) 

295 aveCoord = geom.averageSpherePoint(diaObjCoords[matchIndex]) 

296 diaObjCat[matchIndex]["ra"] = aveCoord.getRa().asDegrees() 

297 diaObjCat[matchIndex]["dec"] = aveCoord.getDec().asDegrees() 

298 nSources = diaObjCat[matchIndex]["nDiaSources"] 

299 diaObjCat[matchIndex]["nDiaSources"] = nSources + 1 

300 healPixIndices[matchIndex] = toIndex(self.config.nside, 

301 diaObjCat[matchIndex]["ra"], 

302 diaObjCat[matchIndex]["dec"]) 

303 # Update DiaObject Id that this source is now associated to. 

304 diaSources.loc[(ccdVisit, diaSourceId), "diaObjectId"] = \ 

305 diaObjCat[matchIndex]["diaObjectId"] 

306 

307 def findMatches(self, src_ra, src_dec, tol, hpIndices, diaObjs): 

308 """Search healPixels around DiaSource locations for DiaObjects. 

309 

310 Parameters 

311 ---------- 

312 src_ra : `float` 

313 DiaSource RA location. 

314 src_dec : `float` 

315 DiaSource Dec location. 

316 tol : `float` 

317 Size of annulus to convert to covering healPixels and search for 

318 DiaObjects. 

319 hpIndices : `list` of `int`s 

320 List of heal pix indices containing the DiaObjects in ``diaObjs``. 

321 diaObjs : `list` of `dict`s 

322 Catalog diaObjects to with full location information for comparing 

323 to DiaSources. 

324 

325 Returns 

326 ------- 

327 results : `lsst.pipe.base.Struct` 

328 Results struct containing 

329 

330 ``dists`` 

331 Array of distances between the current DiaSource diaObjects. 

332 (`numpy.ndarray` or `None`) 

333 ``matches`` 

334 Array of array indices of diaObjects this DiaSource matches to. 

335 (`numpy.ndarray` or `None`) 

336 """ 

337 match_indices = query_disc(self.config.nside, 

338 src_ra, 

339 src_dec, 

340 np.deg2rad(tol/3600.)) 

341 matchIndices = np.argwhere(np.isin(hpIndices, match_indices)).flatten() 

342 

343 if len(matchIndices) < 1: 

344 return pipeBase.Struct(dists=None, matches=None) 

345 

346 dists = np.array( 

347 [np.sqrt(np.sum((eq2xyz(src_ra, src_dec) 

348 - eq2xyz(diaObjs[match]["ra"], 

349 diaObjs[match]["dec"]))**2)) 

350 for match in matchIndices]) 

351 return pipeBase.Struct( 

352 dists=dists, 

353 matches=matchIndices) 

354 

355 def createDiaObject(self, objId, ra, dec): 

356 """Create a simple empty DiaObject with location and id information. 

357 

358 Parameters 

359 ---------- 

360 objId : `int` 

361 Unique ID for this new DiaObject. 

362 ra : `float` 

363 RA location of this DiaObject. 

364 dec : `float` 

365 Dec location of this DiaObject 

366 

367 Returns 

368 ------- 

369 DiaObject : `dict` 

370 Dictionary of values representing a DiaObject. 

371 """ 

372 new_dia_object = {"diaObjectId": objId, 

373 "ra": ra, 

374 "dec": dec, 

375 "nDiaSources": 1} 

376 return new_dia_object