Coverage for python/lsst/pipe/tasks/simpleAssociation.py: 20%

87 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-10-12 11:10 +0000

1# This file is part of pipe_tasks. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""Simple association algorithm for DRP. 

23Adapted from http://github.com/LSSTDESC/dia_pipe 

24""" 

25__all__ = ["SimpleAssociationConfig", "SimpleAssociationTask"] 

26 

27import numpy as np 

28import pandas as pd 

29 

30import lsst.afw.table as afwTable 

31import lsst.geom as geom 

32import lsst.pex.config as pexConfig 

33import lsst.pipe.base as pipeBase 

34from lsst.meas.base import IdGenerator 

35from lsst.obs.base import ExposureIdInfo 

36 

37from .associationUtils import query_disc, eq2xyz, toIndex 

38 

39 

40class SimpleAssociationConfig(pexConfig.Config): 

41 """Configuration parameters for the SimpleAssociationTask 

42 """ 

43 tolerance = pexConfig.Field( 

44 dtype=float, 

45 doc='maximum distance to match sources together in arcsec', 

46 default=0.5 

47 ) 

48 nside = pexConfig.Field( 

49 dtype=int, 

50 doc='Healpix nside value used for indexing', 

51 default=2**18, 

52 ) 

53 

54 

55class SimpleAssociationTask(pipeBase.Task): 

56 """Construct DiaObjects from a DataFrame of DIASources by spatially 

57 associating the sources. 

58 

59 Represents a simple, brute force algorithm, 2-way matching of DiaSources 

60 into. DiaObjects. Algorithm picks the nearest, first match within the 

61 matching radius of a DiaObject to associate a source to for simplicity. 

62 """ 

63 ConfigClass = SimpleAssociationConfig 

64 _DefaultName = "simpleAssociation" 

65 

66 def run(self, diaSources, tractPatchId=None, skymapBits=None, idGenerator=None): 

67 """Associate DiaSources into a collection of DiaObjects using a 

68 brute force matching algorithm. 

69 

70 Reproducible is for the same input data is assured by ordering the 

71 DiaSource data by ccdVisit ordering. 

72 

73 Parameters 

74 ---------- 

75 diaSources : `pandas.DataFrame` 

76 DiaSources grouped by CcdVisitId to spatially associate into 

77 DiaObjects. 

78 tractPatchId : `int`, optional 

79 Unique identifier for the tract patch. Deprecated in favor of 

80 ``idGenerator`` along with `lsst.obs.base.ExposureIdInfo`. 

81 skymapBits : `int` 

82 Maximum number of bits used the ``tractPatchId`` integer 

83 identifier. Deprecated in favor of ``idGenerator`` along with 

84 `lsst.obs.base.ExposureIdInfo`. 

85 idGenerator : `lsst.meas.base.IdGenerator`, optional 

86 Object that generates Object IDs and random number generator seeds. 

87 

88 Returns 

89 ------- 

90 results : `lsst.pipe.base.Struct` 

91 Results struct with attributes: 

92 

93 ``assocDiaSources`` 

94 Table of DiaSources with updated values for the DiaObjects 

95 they are spatially associated to (`pandas.DataFrame`). 

96 ``diaObjects`` 

97 Table of DiaObjects from matching DiaSources 

98 (`pandas.DataFrame`). 

99 

100 """ 

101 

102 # Expected indexes include diaSourceId or meaningless range index 

103 # If meaningless range index, drop it, else keep it. 

104 doDropIndex = diaSources.index.names[0] is None 

105 diaSources.reset_index(inplace=True, drop=doDropIndex) 

106 

107 # Sort by ccdVisit and diaSourceId to get a reproducible ordering for 

108 # the association. 

109 diaSources.set_index(["ccdVisitId", "diaSourceId"], inplace=True) 

110 

111 # Empty lists to store matching and location data. 

112 diaObjectCat = [] 

113 diaObjectCoords = [] 

114 healPixIndices = [] 

115 

116 # Create Id factory and catalog for creating DiaObjectIds. 

117 if idGenerator is None: 

118 if tractPatchId is not None and skymapBits is not None: 

119 exposureIdInfo = ExposureIdInfo(tractPatchId, skymapBits) 

120 idGenerator = IdGenerator._from_exposure_id_info(exposureIdInfo) 

121 else: 

122 idGenerator = IdGenerator() 

123 idCat = idGenerator.make_source_catalog(afwTable.SourceTable.makeMinimalSchema()) 

124 

125 for ccdVisit in diaSources.index.levels[0]: 

126 # For the first ccdVisit, just copy the DiaSource info into the 

127 # diaObject data to create the first set of Objects. 

128 ccdVisitSources = diaSources.loc[ccdVisit] 

129 if len(diaObjectCat) == 0: 

130 for diaSourceId, diaSrc in ccdVisitSources.iterrows(): 

131 self.addNewDiaObject(diaSrc, 

132 diaSources, 

133 ccdVisit, 

134 diaSourceId, 

135 diaObjectCat, 

136 idCat, 

137 diaObjectCoords, 

138 healPixIndices) 

139 continue 

140 # Temp list to store DiaObjects already used for this ccdVisit. 

141 usedMatchIndicies = [] 

142 # Run over subsequent data. 

143 for diaSourceId, diaSrc in ccdVisitSources.iterrows(): 

144 # Find matches. 

145 matchResult = self.findMatches(diaSrc["ra"], 

146 diaSrc["dec"], 

147 2*self.config.tolerance, 

148 healPixIndices, 

149 diaObjectCat) 

150 dists = matchResult.dists 

151 matches = matchResult.matches 

152 # Create a new DiaObject if no match found. 

153 if dists is None: 

154 self.addNewDiaObject(diaSrc, 

155 diaSources, 

156 ccdVisit, 

157 diaSourceId, 

158 diaObjectCat, 

159 idCat, 

160 diaObjectCoords, 

161 healPixIndices) 

162 continue 

163 # If matched, update catalogs and arrays. 

164 if np.min(dists) < np.deg2rad(self.config.tolerance/3600): 

165 matchDistArg = np.argmin(dists) 

166 matchIndex = matches[matchDistArg] 

167 # Test to see if the DiaObject has been used. 

168 if np.isin([matchIndex], usedMatchIndicies).sum() < 1: 

169 self.updateCatalogs(matchIndex, 

170 diaSrc, 

171 diaSources, 

172 ccdVisit, 

173 diaSourceId, 

174 diaObjectCat, 

175 diaObjectCoords, 

176 healPixIndices) 

177 usedMatchIndicies.append(matchIndex) 

178 # If the matched DiaObject has already been used, create a 

179 # new DiaObject for this DiaSource. 

180 else: 

181 self.addNewDiaObject(diaSrc, 

182 diaSources, 

183 ccdVisit, 

184 diaSourceId, 

185 diaObjectCat, 

186 idCat, 

187 diaObjectCoords, 

188 healPixIndices) 

189 # Create new DiaObject if no match found within the matching 

190 # tolerance. 

191 else: 

192 self.addNewDiaObject(diaSrc, 

193 diaSources, 

194 ccdVisit, 

195 diaSourceId, 

196 diaObjectCat, 

197 idCat, 

198 diaObjectCoords, 

199 healPixIndices) 

200 

201 # Drop indices before returning associated diaSource catalog. 

202 diaSources.reset_index(inplace=True) 

203 diaSources.set_index("diaSourceId", inplace=True, verify_integrity=True) 

204 

205 objs = diaObjectCat if diaObjectCat else np.array([], dtype=[('diaObjectId', 'int64'), 

206 ('ra', 'float64'), 

207 ('dec', 'float64'), 

208 ('nDiaSources', 'int64')]) 

209 diaObjects = pd.DataFrame(data=objs) 

210 

211 if "diaObjectId" in diaObjects.columns: 

212 diaObjects.set_index("diaObjectId", inplace=True, verify_integrity=True) 

213 

214 return pipeBase.Struct( 

215 assocDiaSources=diaSources, 

216 diaObjects=diaObjects) 

217 

218 def addNewDiaObject(self, 

219 diaSrc, 

220 diaSources, 

221 ccdVisit, 

222 diaSourceId, 

223 diaObjCat, 

224 idCat, 

225 diaObjCoords, 

226 healPixIndices): 

227 """Create a new DiaObject and append its data. 

228 

229 Parameters 

230 ---------- 

231 diaSrc : `pandas.Series` 

232 Full unassociated DiaSource to create a DiaObject from. 

233 diaSources : `pandas.DataFrame` 

234 DiaSource catalog to update information in. The catalog is 

235 modified in place. 

236 ccdVisit : `int` 

237 Unique identifier of the ccdVisit where ``diaSrc`` was observed. 

238 diaSourceId : `int` 

239 Unique identifier of the DiaSource. 

240 diaObjectCat : `list` of `dict`s 

241 Catalog of diaObjects to append the new object o. 

242 idCat : `lsst.afw.table.SourceCatalog` 

243 Catalog with the IdFactory used to generate unique DiaObject 

244 identifiers. 

245 diaObjectCoords : `list` of `list`s of `lsst.geom.SpherePoint`s 

246 Set of coordinates of DiaSource locations that make up the 

247 DiaObject average coordinate. 

248 healPixIndices : `list` of `int`s 

249 HealPix indices representing the locations of each currently 

250 existing DiaObject. 

251 """ 

252 hpIndex = toIndex(self.config.nside, 

253 diaSrc["ra"], 

254 diaSrc["dec"]) 

255 healPixIndices.append(hpIndex) 

256 

257 sphPoint = geom.SpherePoint(diaSrc["ra"], 

258 diaSrc["dec"], 

259 geom.degrees) 

260 diaObjCoords.append([sphPoint]) 

261 

262 diaObjId = idCat.addNew().get("id") 

263 diaObjCat.append(self.createDiaObject(diaObjId, 

264 diaSrc["ra"], 

265 diaSrc["dec"])) 

266 diaSources.loc[(ccdVisit, diaSourceId), "diaObjectId"] = diaObjId 

267 

268 def updateCatalogs(self, 

269 matchIndex, 

270 diaSrc, 

271 diaSources, 

272 ccdVisit, 

273 diaSourceId, 

274 diaObjCat, 

275 diaObjCoords, 

276 healPixIndices): 

277 """Update DiaObject and DiaSource values after an association. 

278 

279 Parameters 

280 ---------- 

281 matchIndex : `int` 

282 Array index location of the DiaObject that ``diaSrc`` was 

283 associated to. 

284 diaSrc : `pandas.Series` 

285 Full unassociated DiaSource to create a DiaObject from. 

286 diaSources : `pandas.DataFrame` 

287 DiaSource catalog to update information in. The catalog is 

288 modified in place. 

289 ccdVisit : `int` 

290 Unique identifier of the ccdVisit where ``diaSrc`` was observed. 

291 diaSourceId : `int` 

292 Unique identifier of the DiaSource. 

293 diaObjectCat : `list` of `dict`s 

294 Catalog of diaObjects to append the new object o. 

295 diaObjectCoords : `list` of `list`s of `lsst.geom.SpherePoint`s 

296 Set of coordinates of DiaSource locations that make up the 

297 DiaObject average coordinate. 

298 healPixIndices : `list` of `int`s 

299 HealPix indices representing the locations of each currently 

300 existing DiaObject. 

301 """ 

302 # Update location and healPix index. 

303 sphPoint = geom.SpherePoint(diaSrc["ra"], 

304 diaSrc["dec"], 

305 geom.degrees) 

306 diaObjCoords[matchIndex].append(sphPoint) 

307 aveCoord = geom.averageSpherePoint(diaObjCoords[matchIndex]) 

308 diaObjCat[matchIndex]["ra"] = aveCoord.getRa().asDegrees() 

309 diaObjCat[matchIndex]["dec"] = aveCoord.getDec().asDegrees() 

310 nSources = diaObjCat[matchIndex]["nDiaSources"] 

311 diaObjCat[matchIndex]["nDiaSources"] = nSources + 1 

312 healPixIndices[matchIndex] = toIndex(self.config.nside, 

313 diaObjCat[matchIndex]["ra"], 

314 diaObjCat[matchIndex]["dec"]) 

315 # Update DiaObject Id that this source is now associated to. 

316 diaSources.loc[(ccdVisit, diaSourceId), "diaObjectId"] = \ 

317 diaObjCat[matchIndex]["diaObjectId"] 

318 

319 def findMatches(self, src_ra, src_dec, tol, hpIndices, diaObjs): 

320 """Search healPixels around DiaSource locations for DiaObjects. 

321 

322 Parameters 

323 ---------- 

324 src_ra : `float` 

325 DiaSource RA location. 

326 src_dec : `float` 

327 DiaSource Dec location. 

328 tol : `float` 

329 Size of annulus to convert to covering healPixels and search for 

330 DiaObjects. 

331 hpIndices : `list` of `int`s 

332 List of heal pix indices containing the DiaObjects in ``diaObjs``. 

333 diaObjs : `list` of `dict`s 

334 Catalog diaObjects to with full location information for comparing 

335 to DiaSources. 

336 

337 Returns 

338 ------- 

339 results : `lsst.pipe.base.Struct` 

340 Results struct containing 

341 

342 ``dists`` 

343 Array of distances between the current DiaSource diaObjects. 

344 (`numpy.ndarray` or `None`) 

345 ``matches`` 

346 Array of array indices of diaObjects this DiaSource matches to. 

347 (`numpy.ndarray` or `None`) 

348 """ 

349 match_indices = query_disc(self.config.nside, 

350 src_ra, 

351 src_dec, 

352 np.deg2rad(tol/3600.)) 

353 matchIndices = np.argwhere(np.isin(hpIndices, match_indices)).flatten() 

354 

355 if len(matchIndices) < 1: 

356 return pipeBase.Struct(dists=None, matches=None) 

357 

358 dists = np.array( 

359 [np.sqrt(np.sum((eq2xyz(src_ra, src_dec) 

360 - eq2xyz(diaObjs[match]["ra"], 

361 diaObjs[match]["dec"]))**2)) 

362 for match in matchIndices]) 

363 return pipeBase.Struct( 

364 dists=dists, 

365 matches=matchIndices) 

366 

367 def createDiaObject(self, objId, ra, dec): 

368 """Create a simple empty DiaObject with location and id information. 

369 

370 Parameters 

371 ---------- 

372 objId : `int` 

373 Unique ID for this new DiaObject. 

374 ra : `float` 

375 RA location of this DiaObject. 

376 dec : `float` 

377 Dec location of this DiaObject 

378 

379 Returns 

380 ------- 

381 DiaObject : `dict` 

382 Dictionary of values representing a DiaObject. 

383 """ 

384 new_dia_object = {"diaObjectId": objId, 

385 "ra": ra, 

386 "dec": dec, 

387 "nDiaSources": 1} 

388 return new_dia_object