Coverage for python/lsst/analysis/tools/tasks/catalogMatch.py: 34%

141 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-11-16 01:27 -0800

1# This file is part of analysis_tools. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22import astropy.units as units 

23import lsst.geom 

24import lsst.pex.config as pexConfig 

25import lsst.pipe.base as pipeBase 

26import numpy as np 

27import pandas as pd 

28from astropy.coordinates import SkyCoord 

29from astropy.time import Time 

30from lsst.meas.algorithms import ReferenceObjectLoader 

31from lsst.pipe.tasks.configurableActions import ConfigurableActionStructField 

32from lsst.skymap import BaseSkyMap 

33 

34from ..actions.vector import ( 

35 CoaddPlotFlagSelector, 

36 GalaxySelector, 

37 SnSelector, 

38 StarSelector, 

39 VisitPlotFlagSelector, 

40) 

41 

42__all__ = ["CatalogMatchConfig", "CatalogMatchTask", "AstropyMatchConfig", "AstropyMatchTask"] 

43 

44 

45class AstropyMatchConfig(pexConfig.Config): 

46 

47 maxDistance = pexConfig.Field[float]( 

48 doc="Max distance between matches in arcsec", 

49 default=1.0, 

50 ) 

51 refCatUnits = pexConfig.Field[str]( 

52 doc="Units of the reference catalog coordinates", 

53 default="degree", 

54 ) 

55 targetCatUnits = pexConfig.Field[str]( 

56 doc="Units of the target catalog coordinates", 

57 default="degree", 

58 ) 

59 

60 

61class AstropyMatchTask(pipeBase.Task): 

62 """A task for running the astropy matcher `match_to_catalog_sky` on 

63 between target and reference catalogs.""" 

64 

65 ConfigClass = AstropyMatchConfig 

66 

67 def run(self, refCatalog, targetCatalog): 

68 """Run matcher 

69 

70 Parameters 

71 ---------- 

72 refCatalog: `pandas.core.frame.DataFrame` 

73 The reference catalog with coordinates in degrees 

74 targetCatalog: `pandas.core.frame.DataFrame` 

75 The target catalog with coordinates in degrees 

76 

77 Returns 

78 ------- 

79 `pipeBase.Struct` containing: 

80 refMatchIndices: `numpy.ndarray` 

81 Array of indices of matched reference catalog objects 

82 targetMatchIndices: `numpy.ndarray` 

83 Array of indices of matched target catalog objects 

84 separations: `astropy.coordinates.angles.Angle` 

85 Array of angle separations between matched objects 

86 """ 

87 refCat_ap = SkyCoord( 

88 ra=refCatalog["coord_ra"] * units.Unit(self.config.refCatUnits), 

89 dec=refCatalog["coord_dec"] * units.Unit(self.config.refCatUnits), 

90 ) 

91 

92 sourceCat_ap = SkyCoord( 

93 ra=targetCatalog["coord_ra"] * units.Unit(self.config.targetCatUnits), 

94 dec=targetCatalog["coord_dec"] * units.Unit(self.config.targetCatUnits), 

95 ) 

96 

97 id, d2d, d3d = refCat_ap.match_to_catalog_sky(sourceCat_ap) 

98 

99 goodMatches = d2d.arcsecond < self.config.maxDistance 

100 

101 refMatchIndices = np.flatnonzero(goodMatches) 

102 targetMatchIndices = id[goodMatches] 

103 

104 separations = d2d[goodMatches].arcsec 

105 

106 return pipeBase.Struct( 

107 refMatchIndices=refMatchIndices, targetMatchIndices=targetMatchIndices, separations=separations 

108 ) 

109 

110 

111class CatalogMatchConnections( 

112 pipeBase.PipelineTaskConnections, 

113 dimensions=("tract", "skymap"), 

114 defaultTemplates={"targetCatalog": "objectTable_tract", "refCatalog": "gaia_dr2_20200414"}, 

115): 

116 

117 catalog = pipeBase.connectionTypes.Input( 

118 doc="The tract-wide catalog to make plots from.", 

119 storageClass="DataFrame", 

120 name="{targetCatalog}", 

121 dimensions=("tract", "skymap"), 

122 deferLoad=True, 

123 ) 

124 

125 refCat = pipeBase.connectionTypes.PrerequisiteInput( 

126 doc="The reference catalog to match to loaded input catalog sources.", 

127 name="gaia_dr2_20200414", 

128 storageClass="SimpleCatalog", 

129 dimensions=("skypix",), 

130 deferLoad=True, 

131 multiple=True, 

132 ) 

133 

134 skymap = pipeBase.connectionTypes.Input( 

135 doc="The skymap for the tract", 

136 storageClass="SkyMap", 

137 name=BaseSkyMap.SKYMAP_DATASET_TYPE_NAME, 

138 dimensions=("skymap",), 

139 ) 

140 

141 matchedCatalog = pipeBase.connectionTypes.Output( 

142 doc="Catalog with matched target and reference objects with separations", 

143 name="{targetCatalog}_{refCatalog}_match", 

144 storageClass="DataFrame", 

145 dimensions=("tract", "skymap"), 

146 ) 

147 

148 

149class CatalogMatchConfig(pipeBase.PipelineTaskConfig, pipelineConnections=CatalogMatchConnections): 

150 

151 matcher = pexConfig.ConfigurableField[pipeBase.Task]( 

152 target=AstropyMatchTask, doc="Task for matching refCat and SourceCatalog" 

153 ) 

154 

155 epoch = pexConfig.Field[float](doc="Epoch to which reference objects are shifted", default=2015.0) 

156 

157 bands = pexConfig.ListField[str]( 

158 doc="All bands to persist to downstream tasks", 

159 default=["g", "r", "i", "z", "y"], 

160 ) 

161 

162 selectorBand = pexConfig.Field[str]( 

163 doc="Band to use when selecting objects, primarily for extendedness", default="i" 

164 ) 

165 

166 selectorActions = ConfigurableActionStructField( 

167 doc="Which selectors to use to narrow down the data for QA plotting.", 

168 default={"flagSelector": CoaddPlotFlagSelector}, 

169 ) 

170 

171 sourceSelectorActions = ConfigurableActionStructField( 

172 doc="What types of sources to use.", 

173 default={"sourceSelector": StarSelector}, 

174 ) 

175 

176 extraColumnSelectors = ConfigurableActionStructField( 

177 doc="Other selectors that are not used in this task, but whose columns" "may be needed downstream", 

178 default={"selector1": SnSelector, "selector2": GalaxySelector}, 

179 ) 

180 

181 extraColumns = pexConfig.ListField[str]( 

182 doc="Other catalog columns to persist to downstream tasks", 

183 default=["i_cModelFlux", "x", "y"], 

184 ) 

185 

186 requireProperMotion = pexConfig.Field[bool]( 

187 doc="Only use reference catalog objects with proper motion information", 

188 default=False, 

189 ) 

190 

191 anyFilterMapsToThis = pexConfig.Field[str]( 

192 doc="Any filter for the reference catalog maps to this", 

193 default="phot_g_mean", 

194 ) 

195 

196 

197class CatalogMatchTask(pipeBase.PipelineTask): 

198 """Match a tract-level catalog to a reference catalog""" 

199 

200 ConfigClass = CatalogMatchConfig 

201 _DefaultName = "analysisToolsCatalogMatch" 

202 

203 def __init__(self, butler=None, initInputs=None, **kwargs): 

204 super().__init__(**kwargs) 

205 self.makeSubtask("matcher") 

206 

207 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

208 # Docs inherited from base class 

209 

210 inputs = butlerQC.get(inputRefs) 

211 

212 columns = ["coord_ra", "coord_dec", "patch"] + self.config.extraColumns.list() 

213 for selectorAction in [ 

214 self.config.selectorActions, 

215 self.config.sourceSelectorActions, 

216 self.config.extraColumnSelectors, 

217 ]: 

218 for selector in selectorAction: 

219 for band in self.config.bands: 

220 selectorSchema = selector.getFormattedInputSchema(band=band) 

221 columns += [s[0] for s in selectorSchema] 

222 

223 dataFrame = inputs["catalog"].get(parameters={"columns": columns}) 

224 inputs["catalog"] = dataFrame 

225 

226 tract = butlerQC.quantum.dataId["tract"] 

227 

228 self.refObjLoader = ReferenceObjectLoader( 

229 dataIds=[ref.datasetRef.dataId for ref in inputRefs.refCat], 

230 refCats=inputs.pop("refCat"), 

231 name=self.config.connections.refCat, 

232 log=self.log, 

233 ) 

234 self.refObjLoader.config.requireProperMotion = self.config.requireProperMotion 

235 self.refObjLoader.config.anyFilterMapsToThis = self.config.anyFilterMapsToThis 

236 

237 self.setRefCat(inputs.pop("skymap"), tract) 

238 

239 outputs = self.run(**inputs) 

240 

241 butlerQC.put(outputs, outputRefs) 

242 

243 def run(self, catalog): 

244 """Prep the catalog and run the matcher. 

245 

246 Parameters 

247 ---------- 

248 catalog : `pandas.core.frame.DataFrame` 

249 

250 Returns 

251 ------- 

252 `pipeBase.Struct` containing: 

253 matchedCat : `pandas.core.frame.DataFrame` 

254 Catalog containing the matched objects with all columns from 

255 the original input catalogs, with the suffix "_ref" or 

256 "_target" for duplicated column names, plus a column with the 

257 angular separation in arcseconds between matches. 

258 """ 

259 # Apply the selectors to the catalog 

260 mask = np.ones(len(catalog), dtype=bool) 

261 for selector in self.config.selectorActions: 

262 selector.bands = self.config.bands 

263 mask &= selector(catalog) 

264 

265 for selector in self.config.sourceSelectorActions: 

266 mask &= selector(catalog, band=self.config.selectorBand).astype(bool) 

267 

268 targetCatalog = catalog[mask] 

269 targetCatalog = targetCatalog.reset_index() 

270 

271 if (len(targetCatalog) == 0) or (len(self.refCat) == 0): 

272 matches = pipeBase.Struct( 

273 refMatchIndices=np.array([]), targetMatchIndices=np.array([]), separations=np.array([]) 

274 ) 

275 else: 

276 # Run the matcher 

277 matches = self.matcher.run(self.refCat, targetCatalog) 

278 

279 # Join the catalogs for the matched catalogs 

280 refMatches = self.refCat.iloc[matches.refMatchIndices].reset_index() 

281 sourceMatches = targetCatalog.iloc[matches.targetMatchIndices].reset_index() 

282 matchedCat = sourceMatches.join(refMatches, lsuffix="_target", rsuffix="_ref") 

283 

284 separations = pd.Series(matches.separations).rename("separation") 

285 matchedCat = matchedCat.join(separations) 

286 

287 return pipeBase.Struct(matchedCatalog=matchedCat) 

288 

289 def setRefCat(self, skymap, tract): 

290 """Make a reference catalog with coordinates in degrees 

291 

292 Parameters 

293 ---------- 

294 skymap : `lsst.skymap` 

295 The skymap used to define the patch boundaries. 

296 tract : int 

297 The tract corresponding to the catalog data. 

298 """ 

299 # Load the reference objects in a skyCircle around the tract 

300 tractInfo = skymap.generateTract(tract) 

301 boundingCircle = tractInfo.getOuterSkyPolygon().getBoundingCircle() 

302 center = lsst.geom.SpherePoint(boundingCircle.getCenter()) 

303 radius = boundingCircle.getOpeningAngle() 

304 

305 epoch = Time(self.config.epoch, format="decimalyear") 

306 

307 skyCircle = self.refObjLoader.loadSkyCircle(center, radius, "i", epoch=epoch) 

308 refCat = skyCircle.refCat 

309 

310 # Convert the coordinates to RA/Dec and convert the catalog to a 

311 # dataframe 

312 refCat["coord_ra"] = (refCat["coord_ra"] * units.radian).to(units.degree).to_value() 

313 refCat["coord_dec"] = (refCat["coord_dec"] * units.radian).to(units.degree).to_value() 

314 self.refCat = refCat.asAstropy().to_pandas() 

315 

316 

317class CatalogMatchVisitConnections( 

318 pipeBase.PipelineTaskConnections, 

319 dimensions=("visit",), 

320 defaultTemplates={"targetCatalog": "sourceTable_visit", "refCatalog": "gaia_dr2_20200414"}, 

321): 

322 

323 catalog = pipeBase.connectionTypes.Input( 

324 doc="The visit-wide catalog to make plots from.", 

325 storageClass="DataFrame", 

326 name="sourceTable_visit", 

327 dimensions=("visit",), 

328 deferLoad=True, 

329 ) 

330 

331 refCat = pipeBase.connectionTypes.PrerequisiteInput( 

332 doc="The astrometry reference catalog to match to loaded input catalog sources.", 

333 name="gaia_dr2_20200414", 

334 storageClass="SimpleCatalog", 

335 dimensions=("skypix",), 

336 deferLoad=True, 

337 multiple=True, 

338 ) 

339 

340 visitSummaryTable = pipeBase.connectionTypes.Input( 

341 doc="A summary table of the ccds in the visit", 

342 storageClass="ExposureCatalog", 

343 name="visitSummary", 

344 dimensions=("visit",), 

345 ) 

346 

347 matchedCatalog = pipeBase.connectionTypes.Output( 

348 doc="Catalog with matched target and reference objects with separations", 

349 name="{targetCatalog}_{refCatalog}_match", 

350 storageClass="DataFrame", 

351 dimensions=("visit",), 

352 ) 

353 

354 

355class CatalogMatchVisitConfig(CatalogMatchConfig, pipelineConnections=CatalogMatchVisitConnections): 

356 selectorActions = ConfigurableActionStructField( 

357 doc="Which selectors to use to narrow down the data for QA plotting.", 

358 default={"flagSelector": VisitPlotFlagSelector}, 

359 ) 

360 

361 extraColumns = pexConfig.ListField[str]( 

362 doc="Other catalog columns to persist to downstream tasks", 

363 default=["psfFlux", "psfFluxErr"], 

364 ) 

365 

366 def setDefaults(self): 

367 # sourceSelectorActions.sourceSelector is StarSelector 

368 self.sourceSelectorActions.sourceSelector.vectorKey = "extendedness" 

369 # extraColumnSelectors.selector1 is SnSelector 

370 self.extraColumnSelectors.selector1.fluxType = "psfFlux" 

371 # extraColumnSelectors.selector2 is GalaxySelector 

372 self.extraColumnSelectors.selector2.vectorKey = "extendedness" 

373 

374 

375class CatalogMatchVisitTask(CatalogMatchTask): 

376 """Match a visit-level catalog to a reference catalog""" 

377 

378 ConfigClass = CatalogMatchVisitConfig 

379 _DefaultName = "analysisToolsCatalogMatchVisit" 

380 

381 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

382 # Docs inherited from base class 

383 

384 inputs = butlerQC.get(inputRefs) 

385 

386 columns = ["coord_ra", "coord_dec", "detector"] + self.config.extraColumns.list() 

387 for selectorAction in [ 

388 self.config.selectorActions, 

389 self.config.sourceSelectorActions, 

390 self.config.extraColumnSelectors, 

391 ]: 

392 for selector in selectorAction: 

393 selectorSchema = selector.getFormattedInputSchema() 

394 columns += [s[0] for s in selectorSchema] 

395 

396 dataFrame = inputs["catalog"].get(parameters={"columns": columns}) 

397 inputs["catalog"] = dataFrame 

398 

399 self.refObjLoader = ReferenceObjectLoader( 

400 dataIds=[ref.datasetRef.dataId for ref in inputRefs.refCat], 

401 refCats=inputs.pop("refCat"), 

402 name=self.config.connections.refCat, 

403 log=self.log, 

404 ) 

405 self.refObjLoader.config.requireProperMotion = self.config.requireProperMotion 

406 self.refObjLoader.config.anyFilterMapsToThis = self.config.anyFilterMapsToThis 

407 

408 self.setRefCat(inputs.pop("visitSummaryTable")) 

409 

410 outputs = self.run(**inputs) 

411 

412 butlerQC.put(outputs, outputRefs) 

413 

414 def setRefCat(self, visitSummaryTable): 

415 """Make a reference catalog with coordinates in degrees 

416 

417 Parameters 

418 ---------- 

419 visitSummaryTable : `lsst.afw.table.ExposureCatalog` 

420 The table of visit information 

421 """ 

422 # Get convex hull around the detectors, then get its center and radius 

423 corners = [] 

424 for visSum in visitSummaryTable: 

425 for (ra, dec) in zip(visSum["raCorners"], visSum["decCorners"]): 

426 corners.append(lsst.geom.SpherePoint(ra, dec, units=lsst.geom.degrees).getVector()) 

427 visitBoundingCircle = lsst.sphgeom.ConvexPolygon.convexHull(corners).getBoundingCircle() 

428 center = lsst.geom.SpherePoint(visitBoundingCircle.getCenter()) 

429 radius = visitBoundingCircle.getOpeningAngle() 

430 

431 # Get the observation date of the visit 

432 obsDate = visSum.getVisitInfo().getDate() 

433 epoch = Time(obsDate.toPython()) 

434 

435 # Load the reference catalog in the skyCircle of the detectors, then 

436 # convert the coordinates to degrees and convert the catalog to a 

437 # dataframe 

438 skyCircle = self.refObjLoader.loadSkyCircle(center, radius, "i", epoch=epoch) 

439 refCat = skyCircle.refCat 

440 

441 refCat["coord_ra"] = (refCat["coord_ra"] * units.radian).to(units.degree).to_value() 

442 refCat["coord_dec"] = (refCat["coord_dec"] * units.radian).to(units.degree).to_value() 

443 self.refCat = refCat.asAstropy().to_pandas()