Coverage for python/lsst/analysis/tools/tasks/catalogMatch.py: 34%

140 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-12-23 09:30 +0000

1# This file is part of analysis_tools. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22import astropy.units as units 

23import lsst.geom 

24import lsst.pex.config as pexConfig 

25import lsst.pipe.base as pipeBase 

26import numpy as np 

27import pandas as pd 

28from astropy.coordinates import SkyCoord 

29from astropy.time import Time 

30from lsst.meas.algorithms import ReferenceObjectLoader 

31from lsst.pipe.tasks.configurableActions import ConfigurableActionStructField 

32from lsst.skymap import BaseSkyMap 

33 

34from ..actions.vector import ( 

35 CoaddPlotFlagSelector, 

36 GalaxySelector, 

37 SnSelector, 

38 StarSelector, 

39 VisitPlotFlagSelector, 

40) 

41 

42__all__ = ["CatalogMatchConfig", "CatalogMatchTask", "AstropyMatchConfig", "AstropyMatchTask"] 

43 

44 

45class AstropyMatchConfig(pexConfig.Config): 

46 

47 maxDistance = pexConfig.Field[float]( 

48 doc="Max distance between matches in arcsec", 

49 default=1.0, 

50 ) 

51 refCatUnits = pexConfig.Field[str]( 

52 doc="Units of the reference catalog coordinates", 

53 default="degree", 

54 ) 

55 targetCatUnits = pexConfig.Field[str]( 

56 doc="Units of the target catalog coordinates", 

57 default="degree", 

58 ) 

59 

60 

61class AstropyMatchTask(pipeBase.Task): 

62 """A task for running the astropy matcher `match_to_catalog_sky` on 

63 between target and reference catalogs.""" 

64 

65 ConfigClass = AstropyMatchConfig 

66 

67 def run(self, refCatalog, targetCatalog): 

68 """Run matcher 

69 

70 Parameters 

71 ---------- 

72 refCatalog: `pandas.core.frame.DataFrame` 

73 The reference catalog with coordinates in degrees 

74 targetCatalog: `pandas.core.frame.DataFrame` 

75 The target catalog with coordinates in degrees 

76 

77 Returns 

78 ------- 

79 `pipeBase.Struct` containing: 

80 refMatchIndices: `numpy.ndarray` 

81 Array of indices of matched reference catalog objects 

82 targetMatchIndices: `numpy.ndarray` 

83 Array of indices of matched target catalog objects 

84 separations: `astropy.coordinates.angles.Angle` 

85 Array of angle separations between matched objects 

86 """ 

87 refCat_ap = SkyCoord( 

88 ra=refCatalog["coord_ra"] * units.Unit(self.config.refCatUnits), 

89 dec=refCatalog["coord_dec"] * units.Unit(self.config.refCatUnits), 

90 ) 

91 

92 sourceCat_ap = SkyCoord( 

93 ra=targetCatalog["coord_ra"] * units.Unit(self.config.targetCatUnits), 

94 dec=targetCatalog["coord_dec"] * units.Unit(self.config.targetCatUnits), 

95 ) 

96 

97 id, d2d, d3d = refCat_ap.match_to_catalog_sky(sourceCat_ap) 

98 

99 goodMatches = d2d.arcsecond < self.config.maxDistance 

100 

101 refMatchIndices = np.flatnonzero(goodMatches) 

102 targetMatchIndices = id[goodMatches] 

103 

104 separations = d2d[goodMatches].arcsec 

105 

106 return pipeBase.Struct( 

107 refMatchIndices=refMatchIndices, targetMatchIndices=targetMatchIndices, separations=separations 

108 ) 

109 

110 

111class CatalogMatchConnections( 

112 pipeBase.PipelineTaskConnections, 

113 dimensions=("tract", "skymap"), 

114 defaultTemplates={"targetCatalog": "objectTable_tract", "refCatalog": "gaia_dr2_20200414"}, 

115): 

116 

117 catalog = pipeBase.connectionTypes.Input( 

118 doc="The tract-wide catalog to make plots from.", 

119 storageClass="DataFrame", 

120 name="{targetCatalog}", 

121 dimensions=("tract", "skymap"), 

122 deferLoad=True, 

123 ) 

124 

125 refCat = pipeBase.connectionTypes.PrerequisiteInput( 

126 doc="The reference catalog to match to loaded input catalog sources.", 

127 name="gaia_dr2_20200414", 

128 storageClass="SimpleCatalog", 

129 dimensions=("skypix",), 

130 deferLoad=True, 

131 multiple=True, 

132 ) 

133 

134 skymap = pipeBase.connectionTypes.Input( 

135 doc="The skymap for the tract", 

136 storageClass="SkyMap", 

137 name=BaseSkyMap.SKYMAP_DATASET_TYPE_NAME, 

138 dimensions=("skymap",), 

139 ) 

140 

141 matchedCatalog = pipeBase.connectionTypes.Output( 

142 doc="Catalog with matched target and reference objects with separations", 

143 name="{targetCatalog}_{refCatalog}_match", 

144 storageClass="DataFrame", 

145 dimensions=("tract", "skymap"), 

146 ) 

147 

148 

149class CatalogMatchConfig(pipeBase.PipelineTaskConfig, pipelineConnections=CatalogMatchConnections): 

150 

151 matcher = pexConfig.ConfigurableField[pipeBase.Task]( 

152 target=AstropyMatchTask, doc="Task for matching refCat and SourceCatalog" 

153 ) 

154 

155 epoch = pexConfig.Field[float](doc="Epoch to which reference objects are shifted", default=2015.0) 

156 

157 bands = pexConfig.ListField[str]( 

158 doc="All bands to persist to downstream tasks", 

159 default=["u", "g", "r", "i", "z", "y"], 

160 ) 

161 

162 selectorBand = pexConfig.Field[str]( 

163 doc="Band to use when selecting objects, primarily for extendedness", default="i" 

164 ) 

165 

166 selectorActions = ConfigurableActionStructField( 

167 doc="Which selectors to use to narrow down the data for QA plotting.", 

168 default={"flagSelector": CoaddPlotFlagSelector}, 

169 ) 

170 

171 sourceSelectorActions = ConfigurableActionStructField( 

172 doc="What types of sources to use.", 

173 default={"sourceSelector": StarSelector}, 

174 ) 

175 

176 extraColumnSelectors = ConfigurableActionStructField( 

177 doc="Other selectors that are not used in this task, but whose columns" "may be needed downstream", 

178 default={"selector1": SnSelector, "selector2": GalaxySelector}, 

179 ) 

180 

181 extraColumns = pexConfig.ListField[str]( 

182 doc="Other catalog columns to persist to downstream tasks", 

183 default=["i_cModelFlux", "x", "y"], 

184 ) 

185 

186 requireProperMotion = pexConfig.Field[bool]( 

187 doc="Only use reference catalog objects with proper motion information", 

188 default=False, 

189 ) 

190 

191 anyFilterMapsToThis = pexConfig.Field[str]( 

192 doc="Any filter for the reference catalog maps to this", 

193 default="phot_g_mean", 

194 ) 

195 

196 

197class CatalogMatchTask(pipeBase.PipelineTask): 

198 """Match a tract-level catalog to a reference catalog""" 

199 

200 ConfigClass = CatalogMatchConfig 

201 _DefaultName = "analysisToolsCatalogMatch" 

202 

203 def __init__(self, butler=None, initInputs=None, **kwargs): 

204 super().__init__(**kwargs) 

205 self.makeSubtask("matcher") 

206 

207 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

208 # Docs inherited from base class 

209 

210 inputs = butlerQC.get(inputRefs) 

211 

212 columns = ["coord_ra", "coord_dec", "patch"] + self.config.extraColumns.list() 

213 for selectorAction in [ 

214 self.config.selectorActions, 

215 self.config.sourceSelectorActions, 

216 self.config.extraColumnSelectors, 

217 ]: 

218 for selector in selectorAction: 

219 for band in self.config.bands: 

220 selectorSchema = selector.getFormattedInputSchema(band=band) 

221 columns += [s[0] for s in selectorSchema] 

222 

223 dataFrame = inputs["catalog"].get(parameters={"columns": columns}) 

224 inputs["catalog"] = dataFrame 

225 

226 tract = butlerQC.quantum.dataId["tract"] 

227 

228 self.refObjLoader = ReferenceObjectLoader( 

229 dataIds=[ref.datasetRef.dataId for ref in inputRefs.refCat], 

230 refCats=inputs.pop("refCat"), 

231 name=self.config.connections.refCat, 

232 log=self.log, 

233 ) 

234 self.refObjLoader.config.requireProperMotion = self.config.requireProperMotion 

235 self.refObjLoader.config.anyFilterMapsToThis = self.config.anyFilterMapsToThis 

236 

237 self.setRefCat(inputs.pop("skymap"), tract) 

238 

239 outputs = self.run(**inputs) 

240 

241 butlerQC.put(outputs, outputRefs) 

242 

243 def run(self, catalog): 

244 """Prep the catalog and run the matcher. 

245 

246 Parameters 

247 ---------- 

248 catalog : `pandas.core.frame.DataFrame` 

249 

250 Returns 

251 ------- 

252 `pipeBase.Struct` containing: 

253 matchedCat : `pandas.core.frame.DataFrame` 

254 Catalog containing the matched objects with all columns from 

255 the original input catalogs, with the suffix "_ref" or 

256 "_target" for duplicated column names, plus a column with the 

257 angular separation in arcseconds between matches. 

258 """ 

259 # Apply the selectors to the catalog 

260 mask = np.ones(len(catalog), dtype=bool) 

261 for selector in self.config.selectorActions: 

262 mask &= selector(catalog, bands=self.config.bands) 

263 

264 for selector in self.config.sourceSelectorActions: 

265 mask &= selector(catalog, band=self.config.selectorBand).astype(bool) 

266 

267 targetCatalog = catalog[mask] 

268 targetCatalog = targetCatalog.reset_index() 

269 

270 if (len(targetCatalog) == 0) or (len(self.refCat) == 0): 

271 matches = pipeBase.Struct( 

272 refMatchIndices=np.array([]), targetMatchIndices=np.array([]), separations=np.array([]) 

273 ) 

274 else: 

275 # Run the matcher 

276 matches = self.matcher.run(self.refCat, targetCatalog) 

277 

278 # Join the catalogs for the matched catalogs 

279 refMatches = self.refCat.iloc[matches.refMatchIndices].reset_index() 

280 sourceMatches = targetCatalog.iloc[matches.targetMatchIndices].reset_index() 

281 matchedCat = sourceMatches.join(refMatches, lsuffix="_target", rsuffix="_ref") 

282 

283 separations = pd.Series(matches.separations).rename("separation") 

284 matchedCat = matchedCat.join(separations) 

285 

286 return pipeBase.Struct(matchedCatalog=matchedCat) 

287 

288 def setRefCat(self, skymap, tract): 

289 """Make a reference catalog with coordinates in degrees 

290 

291 Parameters 

292 ---------- 

293 skymap : `lsst.skymap` 

294 The skymap used to define the patch boundaries. 

295 tract : int 

296 The tract corresponding to the catalog data. 

297 """ 

298 # Load the reference objects in a skyCircle around the tract 

299 tractInfo = skymap.generateTract(tract) 

300 boundingCircle = tractInfo.getOuterSkyPolygon().getBoundingCircle() 

301 center = lsst.geom.SpherePoint(boundingCircle.getCenter()) 

302 radius = boundingCircle.getOpeningAngle() 

303 

304 epoch = Time(self.config.epoch, format="decimalyear") 

305 

306 skyCircle = self.refObjLoader.loadSkyCircle(center, radius, "i", epoch=epoch) 

307 refCat = skyCircle.refCat 

308 

309 # Convert the coordinates to RA/Dec and convert the catalog to a 

310 # dataframe 

311 refCat["coord_ra"] = (refCat["coord_ra"] * units.radian).to(units.degree).to_value() 

312 refCat["coord_dec"] = (refCat["coord_dec"] * units.radian).to(units.degree).to_value() 

313 self.refCat = refCat.asAstropy().to_pandas() 

314 

315 

316class CatalogMatchVisitConnections( 

317 pipeBase.PipelineTaskConnections, 

318 dimensions=("visit",), 

319 defaultTemplates={"targetCatalog": "sourceTable_visit", "refCatalog": "gaia_dr2_20200414"}, 

320): 

321 

322 catalog = pipeBase.connectionTypes.Input( 

323 doc="The visit-wide catalog to make plots from.", 

324 storageClass="DataFrame", 

325 name="sourceTable_visit", 

326 dimensions=("visit",), 

327 deferLoad=True, 

328 ) 

329 

330 refCat = pipeBase.connectionTypes.PrerequisiteInput( 

331 doc="The astrometry reference catalog to match to loaded input catalog sources.", 

332 name="gaia_dr2_20200414", 

333 storageClass="SimpleCatalog", 

334 dimensions=("skypix",), 

335 deferLoad=True, 

336 multiple=True, 

337 ) 

338 

339 visitSummaryTable = pipeBase.connectionTypes.Input( 

340 doc="A summary table of the ccds in the visit", 

341 storageClass="ExposureCatalog", 

342 name="visitSummary", 

343 dimensions=("visit",), 

344 ) 

345 

346 matchedCatalog = pipeBase.connectionTypes.Output( 

347 doc="Catalog with matched target and reference objects with separations", 

348 name="{targetCatalog}_{refCatalog}_match", 

349 storageClass="DataFrame", 

350 dimensions=("visit",), 

351 ) 

352 

353 

354class CatalogMatchVisitConfig(CatalogMatchConfig, pipelineConnections=CatalogMatchVisitConnections): 

355 selectorActions = ConfigurableActionStructField( 

356 doc="Which selectors to use to narrow down the data for QA plotting.", 

357 default={"flagSelector": VisitPlotFlagSelector}, 

358 ) 

359 

360 extraColumns = pexConfig.ListField[str]( 

361 doc="Other catalog columns to persist to downstream tasks", 

362 default=["psfFlux", "psfFluxErr"], 

363 ) 

364 

365 def setDefaults(self): 

366 # sourceSelectorActions.sourceSelector is StarSelector 

367 self.sourceSelectorActions.sourceSelector.vectorKey = "extendedness" 

368 # extraColumnSelectors.selector1 is SnSelector 

369 self.extraColumnSelectors.selector1.fluxType = "psfFlux" 

370 # extraColumnSelectors.selector2 is GalaxySelector 

371 self.extraColumnSelectors.selector2.vectorKey = "extendedness" 

372 

373 

374class CatalogMatchVisitTask(CatalogMatchTask): 

375 """Match a visit-level catalog to a reference catalog""" 

376 

377 ConfigClass = CatalogMatchVisitConfig 

378 _DefaultName = "analysisToolsCatalogMatchVisit" 

379 

380 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

381 # Docs inherited from base class 

382 

383 inputs = butlerQC.get(inputRefs) 

384 

385 columns = ["coord_ra", "coord_dec", "detector"] + self.config.extraColumns.list() 

386 for selectorAction in [ 

387 self.config.selectorActions, 

388 self.config.sourceSelectorActions, 

389 self.config.extraColumnSelectors, 

390 ]: 

391 for selector in selectorAction: 

392 selectorSchema = selector.getFormattedInputSchema() 

393 columns += [s[0] for s in selectorSchema] 

394 

395 dataFrame = inputs["catalog"].get(parameters={"columns": columns}) 

396 inputs["catalog"] = dataFrame 

397 

398 self.refObjLoader = ReferenceObjectLoader( 

399 dataIds=[ref.datasetRef.dataId for ref in inputRefs.refCat], 

400 refCats=inputs.pop("refCat"), 

401 name=self.config.connections.refCat, 

402 log=self.log, 

403 ) 

404 self.refObjLoader.config.requireProperMotion = self.config.requireProperMotion 

405 self.refObjLoader.config.anyFilterMapsToThis = self.config.anyFilterMapsToThis 

406 

407 self.setRefCat(inputs.pop("visitSummaryTable")) 

408 

409 outputs = self.run(**inputs) 

410 

411 butlerQC.put(outputs, outputRefs) 

412 

413 def setRefCat(self, visitSummaryTable): 

414 """Make a reference catalog with coordinates in degrees 

415 

416 Parameters 

417 ---------- 

418 visitSummaryTable : `lsst.afw.table.ExposureCatalog` 

419 The table of visit information 

420 """ 

421 # Get convex hull around the detectors, then get its center and radius 

422 corners = [] 

423 for visSum in visitSummaryTable: 

424 for (ra, dec) in zip(visSum["raCorners"], visSum["decCorners"]): 

425 corners.append(lsst.geom.SpherePoint(ra, dec, units=lsst.geom.degrees).getVector()) 

426 visitBoundingCircle = lsst.sphgeom.ConvexPolygon.convexHull(corners).getBoundingCircle() 

427 center = lsst.geom.SpherePoint(visitBoundingCircle.getCenter()) 

428 radius = visitBoundingCircle.getOpeningAngle() 

429 

430 # Get the observation date of the visit 

431 obsDate = visSum.getVisitInfo().getDate() 

432 epoch = Time(obsDate.toPython()) 

433 

434 # Load the reference catalog in the skyCircle of the detectors, then 

435 # convert the coordinates to degrees and convert the catalog to a 

436 # dataframe 

437 skyCircle = self.refObjLoader.loadSkyCircle(center, radius, "i", epoch=epoch) 

438 refCat = skyCircle.refCat 

439 

440 refCat["coord_ra"] = (refCat["coord_ra"] * units.radian).to(units.degree).to_value() 

441 refCat["coord_dec"] = (refCat["coord_dec"] * units.radian).to(units.degree).to_value() 

442 self.refCat = refCat.asAstropy().to_pandas()