Coverage for python/lsst/analysis/tools/tasks/catalogMatch.py: 40%

140 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2022-08-18 12:39 -0700

1# This file is part of analysis_tools. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22import astropy.units as units 

23import lsst.geom 

24import lsst.pex.config as pexConfig 

25import lsst.pipe.base as pipeBase 

26import numpy as np 

27import pandas as pd 

28from astropy.coordinates import SkyCoord 

29from astropy.time import Time 

30from lsst.meas.algorithms import ReferenceObjectLoader 

31from lsst.pipe.tasks.configurableActions import ConfigurableActionStructField 

32from lsst.skymap import BaseSkyMap 

33 

34from ..actions.vector import ( 

35 CoaddPlotFlagSelector, 

36 GalaxySelector, 

37 SnSelector, 

38 StarSelector, 

39 VisitPlotFlagSelector, 

40) 

41 

42__all__ = ["CatalogMatchConfig", "CatalogMatchTask", "AstropyMatchConfig", "AstropyMatchTask"] 

43 

44 

45class AstropyMatchConfig(pexConfig.Config): 

46 

47 maxDistance = pexConfig.Field[float]( 

48 doc="Max distance between matches in arcsec", 

49 default=1.0, 

50 ) 

51 refCatUnits = pexConfig.Field[str]( 

52 doc="Units of the reference catalog coordinates", 

53 default="degree", 

54 ) 

55 targetCatUnits = pexConfig.Field[str]( 

56 doc="Units of the target catalog coordinates", 

57 default="degree", 

58 ) 

59 

60 

61class AstropyMatchTask(pipeBase.Task): 

62 """A task for running the astropy matcher `match_to_catalog_sky` on 

63 between target and reference catalogs.""" 

64 

65 ConfigClass = AstropyMatchConfig 

66 

67 def run(self, refCatalog, targetCatalog): 

68 """Run matcher 

69 

70 Parameters 

71 ---------- 

72 refCatalog: `pandas.core.frame.DataFrame` 

73 The reference catalog with coordinates in degrees 

74 targetCatalog: `pandas.core.frame.DataFrame` 

75 The target catalog with coordinates in degrees 

76 

77 Returns 

78 ------- 

79 `pipeBase.Struct` containing: 

80 refMatchIndices: `numpy.ndarray` 

81 Array of indices of matched reference catalog objects 

82 targetMatchIndices: `numpy.ndarray` 

83 Array of indices of matched target catalog objects 

84 separations: `astropy.coordinates.angles.Angle` 

85 Array of angle separations between matched objects 

86 """ 

87 refCat_ap = SkyCoord( 

88 ra=refCatalog["coord_ra"] * units.Unit(self.config.refCatUnits), 

89 dec=refCatalog["coord_dec"] * units.Unit(self.config.refCatUnits), 

90 ) 

91 

92 sourceCat_ap = SkyCoord( 

93 ra=targetCatalog["coord_ra"] * units.Unit(self.config.targetCatUnits), 

94 dec=targetCatalog["coord_dec"] * units.Unit(self.config.targetCatUnits), 

95 ) 

96 

97 id, d2d, d3d = refCat_ap.match_to_catalog_sky(sourceCat_ap) 

98 

99 goodMatches = d2d.arcsecond < self.config.maxDistance 

100 

101 refMatchIndices = np.flatnonzero(goodMatches) 

102 targetMatchIndices = id[goodMatches] 

103 

104 separations = d2d[goodMatches].arcsec 

105 

106 return pipeBase.Struct( 

107 refMatchIndices=refMatchIndices, targetMatchIndices=targetMatchIndices, separations=separations 

108 ) 

109 

110 

111class CatalogMatchConnections( 

112 pipeBase.PipelineTaskConnections, 

113 dimensions=("tract", "skymap"), 

114 defaultTemplates={"targetCatalog": "objectTable_tract", "refCatalog": "gaia_dr2_20200414"}, 

115): 

116 

117 catalog = pipeBase.connectionTypes.Input( 

118 doc="The tract-wide catalog to make plots from.", 

119 storageClass="DataFrame", 

120 name="{targetCatalog}", 

121 dimensions=("tract", "skymap"), 

122 deferLoad=True, 

123 ) 

124 

125 refCat = pipeBase.connectionTypes.PrerequisiteInput( 

126 doc="The reference catalog to match to loaded input catalog sources.", 

127 name="gaia_dr2_20200414", 

128 storageClass="SimpleCatalog", 

129 dimensions=("skypix",), 

130 deferLoad=True, 

131 multiple=True, 

132 ) 

133 

134 skymap = pipeBase.connectionTypes.Input( 

135 doc="The skymap for the tract", 

136 storageClass="SkyMap", 

137 name=BaseSkyMap.SKYMAP_DATASET_TYPE_NAME, 

138 dimensions=("skymap",), 

139 ) 

140 

141 matchedCatalog = pipeBase.connectionTypes.Output( 

142 doc="Catalog with matched target and reference objects with separations", 

143 name="{targetCatalog}_{refCatalog}_match", 

144 storageClass="DataFrame", 

145 dimensions=("tract", "skymap"), 

146 ) 

147 

148 

149class CatalogMatchConfig(pipeBase.PipelineTaskConfig, pipelineConnections=CatalogMatchConnections): 

150 

151 matcher = pexConfig.ConfigurableField[pipeBase.Task]( 

152 target=AstropyMatchTask, doc="Task for matching refCat and SourceCatalog" 

153 ) 

154 

155 epoch = pexConfig.Field[float](doc="Epoch to which reference objects are shifted", default=2015.0) 

156 

157 bands = pexConfig.ListField[str]( 

158 doc="All bands to persist to downstream tasks", 

159 default=["g", "r", "i", "z", "y"], 

160 ) 

161 

162 selectorBand = pexConfig.Field[str]( 

163 doc="Band to use when selecting objects, primarily for extendedness", default="i" 

164 ) 

165 

166 selectorActions = ConfigurableActionStructField( 

167 doc="Which selectors to use to narrow down the data for QA plotting.", 

168 default={"flagSelector": CoaddPlotFlagSelector}, 

169 ) 

170 

171 sourceSelectorActions = ConfigurableActionStructField( 

172 doc="What types of sources to use.", 

173 default={"sourceSelector": StarSelector}, 

174 ) 

175 

176 extraColumnSelectors = ConfigurableActionStructField( 

177 doc="Other selectors that are not used in this task, but whose columns" "may be needed downstream", 

178 default={"selector1": SnSelector, "selector2": GalaxySelector}, 

179 ) 

180 

181 extraColumns = pexConfig.ListField[str]( 

182 doc="Other catalog columns to persist to downstream tasks", 

183 default=["i_cModelFlux", "x", "y"], 

184 ) 

185 

186 requireProperMotion = pexConfig.Field[bool]( 

187 doc="Only use reference catalog objects with proper motion information", 

188 default=False, 

189 ) 

190 

191 anyFilterMapsToThis = pexConfig.Field[str]( 

192 doc="Any filter for the reference catalog maps to this", 

193 default="phot_g_mean", 

194 ) 

195 

196 

197class CatalogMatchTask(pipeBase.PipelineTask): 

198 """Match a tract-level catalog to a reference catalog""" 

199 

200 ConfigClass = CatalogMatchConfig 

201 _DefaultName = "analysisToolsCatalogMatch" 

202 

203 def __init__(self, butler=None, initInputs=None, **kwargs): 

204 super().__init__(**kwargs) 

205 self.makeSubtask("matcher") 

206 

207 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

208 # Docs inherited from base class 

209 

210 inputs = butlerQC.get(inputRefs) 

211 

212 columns = ["coord_ra", "coord_dec", "patch"] + self.config.extraColumns.list() 

213 for selectorAction in [ 

214 self.config.selectorActions, 

215 self.config.sourceSelectorActions, 

216 self.config.extraColumnSelectors, 

217 ]: 

218 for selector in selectorAction: 

219 for band in self.config.bands: 

220 selectorSchema = selector.getFormattedInputSchema(band=band) 

221 columns += [s[0] for s in selectorSchema] 

222 

223 dataFrame = inputs["catalog"].get(parameters={"columns": columns}) 

224 inputs["catalog"] = dataFrame 

225 

226 tract = butlerQC.quantum.dataId["tract"] 

227 

228 self.refObjLoader = ReferenceObjectLoader( 

229 dataIds=[ref.datasetRef.dataId for ref in inputRefs.refCat], 

230 refCats=inputs.pop("refCat"), 

231 log=self.log, 

232 ) 

233 self.refObjLoader.config.requireProperMotion = self.config.requireProperMotion 

234 self.refObjLoader.config.anyFilterMapsToThis = self.config.anyFilterMapsToThis 

235 

236 self.setRefCat(inputs.pop("skymap"), tract) 

237 

238 outputs = self.run(**inputs) 

239 

240 butlerQC.put(outputs, outputRefs) 

241 

242 def run(self, catalog): 

243 """Prep the catalog and run the matcher. 

244 

245 Parameters 

246 ---------- 

247 catalog : `pandas.core.frame.DataFrame` 

248 

249 Returns 

250 ------- 

251 `pipeBase.Struct` containing: 

252 matchedCat : `pandas.core.frame.DataFrame` 

253 Catalog containing the matched objects with all columns from 

254 the original input catalogs, with the suffix "_ref" or 

255 "_target" for duplicated column names, plus a column with the 

256 angular separation in arcseconds between matches. 

257 """ 

258 # Apply the selectors to the catalog 

259 mask = np.ones(len(catalog), dtype=bool) 

260 for selector in self.config.selectorActions: 

261 mask &= selector(catalog) 

262 

263 for selector in self.config.sourceSelectorActions: 

264 mask &= selector(catalog, band=self.config.selectorBand).astype(bool) 

265 

266 targetCatalog = catalog[mask] 

267 targetCatalog = targetCatalog.reset_index() 

268 

269 if (len(targetCatalog) == 0) or (len(self.refCat) == 0): 

270 matches = pipeBase.Struct( 

271 refMatchIndices=np.array([]), targetMatchIndices=np.array([]), separations=np.array([]) 

272 ) 

273 else: 

274 # Run the matcher 

275 matches = self.matcher.run(self.refCat, targetCatalog) 

276 

277 # Join the catalogs for the matched catalogs 

278 refMatches = self.refCat.iloc[matches.refMatchIndices].reset_index() 

279 sourceMatches = targetCatalog.iloc[matches.targetMatchIndices].reset_index() 

280 matchedCat = sourceMatches.join(refMatches, lsuffix="_target", rsuffix="_ref") 

281 

282 separations = pd.Series(matches.separations).rename("separation") 

283 matchedCat = matchedCat.join(separations) 

284 

285 return pipeBase.Struct(matchedCatalog=matchedCat) 

286 

287 def setRefCat(self, skymap, tract): 

288 """Make a reference catalog with coordinates in degrees 

289 

290 Parameters 

291 ---------- 

292 skymap : `lsst.skymap` 

293 The skymap used to define the patch boundaries. 

294 tract : int 

295 The tract corresponding to the catalog data. 

296 """ 

297 # Load the reference objects in a skyCircle around the tract 

298 tractInfo = skymap.generateTract(tract) 

299 boundingCircle = tractInfo.getOuterSkyPolygon().getBoundingCircle() 

300 center = lsst.geom.SpherePoint(boundingCircle.getCenter()) 

301 radius = boundingCircle.getOpeningAngle() 

302 

303 epoch = Time(self.config.epoch, format="decimalyear") 

304 

305 skyCircle = self.refObjLoader.loadSkyCircle(center, radius, "i", epoch=epoch) 

306 refCat = skyCircle.refCat 

307 

308 # Convert the coordinates to RA/Dec and convert the catalog to a 

309 # dataframe 

310 refCat["coord_ra"] = (refCat["coord_ra"] * units.radian).to(units.degree).to_value() 

311 refCat["coord_dec"] = (refCat["coord_dec"] * units.radian).to(units.degree).to_value() 

312 self.refCat = refCat.asAstropy().to_pandas() 

313 

314 

315class CatalogMatchVisitConnections( 

316 pipeBase.PipelineTaskConnections, 

317 dimensions=("visit",), 

318 defaultTemplates={"targetCatalog": "sourceTable_visit", "refCatalog": "gaia_dr2_20200414"}, 

319): 

320 

321 catalog = pipeBase.connectionTypes.Input( 

322 doc="The visit-wide catalog to make plots from.", 

323 storageClass="DataFrame", 

324 name="sourceTable_visit", 

325 dimensions=("visit",), 

326 deferLoad=True, 

327 ) 

328 

329 refCat = pipeBase.connectionTypes.PrerequisiteInput( 

330 doc="The astrometry reference catalog to match to loaded input catalog sources.", 

331 name="gaia_dr2_20200414", 

332 storageClass="SimpleCatalog", 

333 dimensions=("skypix",), 

334 deferLoad=True, 

335 multiple=True, 

336 ) 

337 

338 visitSummaryTable = pipeBase.connectionTypes.Input( 

339 doc="A summary table of the ccds in the visit", 

340 storageClass="ExposureCatalog", 

341 name="visitSummary", 

342 dimensions=("visit",), 

343 ) 

344 

345 matchedCatalog = pipeBase.connectionTypes.Output( 

346 doc="Catalog with matched target and reference objects with separations", 

347 name="{targetCatalog}_{refCatalog}_match", 

348 storageClass="DataFrame", 

349 dimensions=("visit",), 

350 ) 

351 

352 

353class CatalogMatchVisitConfig(CatalogMatchConfig, pipelineConnections=CatalogMatchVisitConnections): 

354 selectorActions = ConfigurableActionStructField( 

355 doc="Which selectors to use to narrow down the data for QA plotting.", 

356 default={"flagSelector": VisitPlotFlagSelector}, 

357 ) 

358 

359 extraColumns = pexConfig.ListField[str]( 

360 doc="Other catalog columns to persist to downstream tasks", 

361 default=["psfFlux", "psfFluxErr"], 

362 ) 

363 

364 def setDefaults(self): 

365 # sourceSelectorActions.sourceSelector is StarSelector 

366 self.sourceSelectorActions.sourceSelector.vectorKey = "extendedness" 

367 # extraColumnSelectors.selector1 is SnSelector 

368 self.extraColumnSelectors.selector1.fluxType = "psfFlux" 

369 # extraColumnSelectors.selector2 is GalaxySelector 

370 self.extraColumnSelectors.selector2.vectorKey = "extendedness" 

371 

372 

373class CatalogMatchVisitTask(CatalogMatchTask): 

374 """Match a visit-level catalog to a reference catalog""" 

375 

376 ConfigClass = CatalogMatchVisitConfig 

377 _DefaultName = "analysisToolsCatalogMatchVisit" 

378 

379 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

380 # Docs inherited from base class 

381 

382 inputs = butlerQC.get(inputRefs) 

383 

384 columns = ["coord_ra", "coord_dec", "detector"] + self.config.extraColumns.list() 

385 for selectorAction in [ 

386 self.config.selectorActions, 

387 self.config.sourceSelectorActions, 

388 self.config.extraColumnSelectors, 

389 ]: 

390 for selector in selectorAction: 

391 selectorSchema = selector.getFormattedInputSchema() 

392 columns += [s[0] for s in selectorSchema] 

393 

394 dataFrame = inputs["catalog"].get(parameters={"columns": columns}) 

395 inputs["catalog"] = dataFrame 

396 

397 self.refObjLoader = ReferenceObjectLoader( 

398 dataIds=[ref.datasetRef.dataId for ref in inputRefs.refCat], 

399 refCats=inputs.pop("refCat"), 

400 log=self.log, 

401 ) 

402 self.refObjLoader.config.requireProperMotion = self.config.requireProperMotion 

403 self.refObjLoader.config.anyFilterMapsToThis = self.config.anyFilterMapsToThis 

404 

405 self.setRefCat(inputs.pop("visitSummaryTable")) 

406 

407 outputs = self.run(**inputs) 

408 

409 butlerQC.put(outputs, outputRefs) 

410 

411 def setRefCat(self, visitSummaryTable): 

412 """Make a reference catalog with coordinates in degrees 

413 

414 Parameters 

415 ---------- 

416 visitSummaryTable : `lsst.afw.table.ExposureCatalog` 

417 The table of visit information 

418 """ 

419 # Get convex hull around the detectors, then get its center and radius 

420 corners = [] 

421 for visSum in visitSummaryTable: 

422 for (ra, dec) in zip(visSum["raCorners"], visSum["decCorners"]): 

423 corners.append(lsst.geom.SpherePoint(ra, dec, units=lsst.geom.degrees).getVector()) 

424 visitBoundingCircle = lsst.sphgeom.ConvexPolygon.convexHull(corners).getBoundingCircle() 

425 center = lsst.geom.SpherePoint(visitBoundingCircle.getCenter()) 

426 radius = visitBoundingCircle.getOpeningAngle() 

427 

428 # Get the observation date of the visit 

429 obsDate = visSum.getVisitInfo().getDate() 

430 epoch = Time(obsDate.toPython()) 

431 

432 # Load the reference catalog in the skyCircle of the detectors, then 

433 # convert the coordinates to degrees and convert the catalog to a 

434 # dataframe 

435 skyCircle = self.refObjLoader.loadSkyCircle(center, radius, "i", epoch=epoch) 

436 refCat = skyCircle.refCat 

437 

438 refCat["coord_ra"] = (refCat["coord_ra"] * units.radian).to(units.degree).to_value() 

439 refCat["coord_dec"] = (refCat["coord_dec"] * units.radian).to(units.degree).to_value() 

440 self.refCat = refCat.asAstropy().to_pandas()