Coverage for python/lsst/analysis/tools/tasks/catalogMatch.py: 34%

141 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-01 04:04 -0700

1# This file is part of analysis_tools. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22__all__ = ("CatalogMatchConfig", "CatalogMatchTask", "AstropyMatchConfig", "AstropyMatchTask") 

23 

24import astropy.units as units 

25import lsst.geom 

26import lsst.pex.config as pexConfig 

27import lsst.pipe.base as pipeBase 

28import numpy as np 

29import pandas as pd 

30from astropy.coordinates import SkyCoord 

31from astropy.time import Time 

32from lsst.meas.algorithms import ReferenceObjectLoader 

33from lsst.pex.config.configurableActions import ConfigurableActionStructField 

34from lsst.skymap import BaseSkyMap 

35 

36from ..actions.vector import ( 

37 CoaddPlotFlagSelector, 

38 GalaxySelector, 

39 SnSelector, 

40 StarSelector, 

41 VisitPlotFlagSelector, 

42) 

43from ..interfaces import VectorAction 

44 

45 

46class AstropyMatchConfig(pexConfig.Config): 

47 maxDistance = pexConfig.Field[float]( 

48 doc="Max distance between matches in arcsec", 

49 default=1.0, 

50 ) 

51 refCatUnits = pexConfig.Field[str]( 

52 doc="Units of the reference catalog coordinates", 

53 default="degree", 

54 ) 

55 targetCatUnits = pexConfig.Field[str]( 

56 doc="Units of the target catalog coordinates", 

57 default="degree", 

58 ) 

59 

60 

61class AstropyMatchTask(pipeBase.Task): 

62 """A task for running the astropy matcher `match_to_catalog_sky` on 

63 between target and reference catalogs.""" 

64 

65 ConfigClass = AstropyMatchConfig 

66 

67 def run(self, refCatalog, targetCatalog): 

68 """Run matcher 

69 

70 Parameters 

71 ---------- 

72 refCatalog: `pandas.core.frame.DataFrame` 

73 The reference catalog with coordinates in degrees 

74 targetCatalog: `pandas.core.frame.DataFrame` 

75 The target catalog with coordinates in degrees 

76 

77 Returns 

78 ------- 

79 `pipeBase.Struct` containing: 

80 refMatchIndices: `numpy.ndarray` 

81 Array of indices of matched reference catalog objects 

82 targetMatchIndices: `numpy.ndarray` 

83 Array of indices of matched target catalog objects 

84 separations: `astropy.coordinates.angles.Angle` 

85 Array of angle separations between matched objects 

86 """ 

87 refCat_ap = SkyCoord( 

88 ra=refCatalog["coord_ra"].values * units.Unit(self.config.refCatUnits), 

89 dec=refCatalog["coord_dec"].values * units.Unit(self.config.refCatUnits), 

90 ) 

91 

92 sourceCat_ap = SkyCoord( 

93 ra=targetCatalog["coord_ra"].values * units.Unit(self.config.targetCatUnits), 

94 dec=targetCatalog["coord_dec"].values * units.Unit(self.config.targetCatUnits), 

95 ) 

96 

97 id, d2d, d3d = refCat_ap.match_to_catalog_sky(sourceCat_ap) 

98 

99 goodMatches = d2d.arcsecond < self.config.maxDistance 

100 

101 refMatchIndices = np.flatnonzero(goodMatches) 

102 targetMatchIndices = id[goodMatches] 

103 

104 separations = d2d[goodMatches].arcsec 

105 

106 return pipeBase.Struct( 

107 refMatchIndices=refMatchIndices, targetMatchIndices=targetMatchIndices, separations=separations 

108 ) 

109 

110 

111class CatalogMatchConnections( 

112 pipeBase.PipelineTaskConnections, 

113 dimensions=("tract", "skymap"), 

114 defaultTemplates={"targetCatalog": "objectTable_tract", "refCatalog": "gaia_dr2_20200414"}, 

115): 

116 catalog = pipeBase.connectionTypes.Input( 

117 doc="The tract-wide catalog to make plots from.", 

118 storageClass="DataFrame", 

119 name="{targetCatalog}", 

120 dimensions=("tract", "skymap"), 

121 deferLoad=True, 

122 ) 

123 

124 refCat = pipeBase.connectionTypes.PrerequisiteInput( 

125 doc="The reference catalog to match to loaded input catalog sources.", 

126 name="{refCatalog}", 

127 storageClass="SimpleCatalog", 

128 dimensions=("skypix",), 

129 deferLoad=True, 

130 multiple=True, 

131 ) 

132 

133 skymap = pipeBase.connectionTypes.Input( 

134 doc="The skymap for the tract", 

135 storageClass="SkyMap", 

136 name=BaseSkyMap.SKYMAP_DATASET_TYPE_NAME, 

137 dimensions=("skymap",), 

138 ) 

139 

140 matchedCatalog = pipeBase.connectionTypes.Output( 

141 doc="Catalog with matched target and reference objects with separations", 

142 name="{targetCatalog}_{refCatalog}_match", 

143 storageClass="DataFrame", 

144 dimensions=("tract", "skymap"), 

145 ) 

146 

147 

148class CatalogMatchConfig(pipeBase.PipelineTaskConfig, pipelineConnections=CatalogMatchConnections): 

149 matcher = pexConfig.ConfigurableField[pipeBase.Task]( 

150 target=AstropyMatchTask, doc="Task for matching refCat and SourceCatalog" 

151 ) 

152 

153 epoch = pexConfig.Field[float](doc="Epoch to which reference objects are shifted", default=2015.0) 

154 

155 bands = pexConfig.ListField[str]( 

156 doc="All bands to persist to downstream tasks", 

157 default=["u", "g", "r", "i", "z", "y"], 

158 ) 

159 

160 selectorBand = pexConfig.Field[str]( 

161 doc="Band to use when selecting objects, primarily for extendedness", default="i" 

162 ) 

163 

164 selectorActions = ConfigurableActionStructField[VectorAction]( 

165 doc="Which selectors to use to narrow down the data for QA plotting.", 

166 default={"flagSelector": CoaddPlotFlagSelector()}, 

167 ) 

168 

169 sourceSelectorActions = ConfigurableActionStructField[VectorAction]( 

170 doc="What types of sources to use.", 

171 default={"sourceSelector": StarSelector()}, 

172 ) 

173 

174 extraColumnSelectors = ConfigurableActionStructField[VectorAction]( 

175 doc="Other selectors that are not used in this task, but whose columns" "may be needed downstream", 

176 default={"selector1": SnSelector(), "selector2": GalaxySelector()}, 

177 ) 

178 

179 extraColumns = pexConfig.ListField[str]( 

180 doc="Other catalog columns to persist to downstream tasks", 

181 default=["i_cModelFlux", "x", "y"], 

182 ) 

183 

184 requireProperMotion = pexConfig.Field[bool]( 

185 doc="Only use reference catalog objects with proper motion information", 

186 default=False, 

187 ) 

188 

189 anyFilterMapsToThis = pexConfig.Field[str]( 

190 doc="Any filter for the reference catalog maps to this", 

191 default="phot_g_mean", 

192 ) 

193 

194 

195class CatalogMatchTask(pipeBase.PipelineTask): 

196 """Match a tract-level catalog to a reference catalog""" 

197 

198 ConfigClass = CatalogMatchConfig 

199 _DefaultName = "analysisToolsCatalogMatch" 

200 

201 def __init__(self, butler=None, initInputs=None, **kwargs): 

202 super().__init__(**kwargs) 

203 self.makeSubtask("matcher") 

204 

205 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

206 # Docs inherited from base class 

207 

208 inputs = butlerQC.get(inputRefs) 

209 

210 columns = ["coord_ra", "coord_dec", "patch"] + self.config.extraColumns.list() 

211 for selectorAction in [ 

212 self.config.selectorActions, 

213 self.config.sourceSelectorActions, 

214 self.config.extraColumnSelectors, 

215 ]: 

216 for selector in selectorAction: 

217 for band in self.config.bands: 

218 selectorSchema = selector.getFormattedInputSchema(band=band) 

219 columns += [s[0] for s in selectorSchema] 

220 

221 dataFrame = inputs["catalog"].get(parameters={"columns": columns}) 

222 inputs["catalog"] = dataFrame 

223 

224 tract = butlerQC.quantum.dataId["tract"] 

225 

226 self.refObjLoader = ReferenceObjectLoader( 

227 dataIds=[ref.datasetRef.dataId for ref in inputRefs.refCat], 

228 refCats=inputs.pop("refCat"), 

229 name=self.config.connections.refCat, 

230 log=self.log, 

231 ) 

232 self.refObjLoader.config.requireProperMotion = self.config.requireProperMotion 

233 self.refObjLoader.config.anyFilterMapsToThis = self.config.anyFilterMapsToThis 

234 

235 self.setRefCat(inputs.pop("skymap"), tract) 

236 

237 outputs = self.run(**inputs) 

238 

239 butlerQC.put(outputs, outputRefs) 

240 

241 def run(self, catalog): 

242 """Prep the catalog and run the matcher. 

243 

244 Parameters 

245 ---------- 

246 catalog : `pandas.core.frame.DataFrame` 

247 

248 Returns 

249 ------- 

250 `pipeBase.Struct` containing: 

251 matchedCat : `pandas.core.frame.DataFrame` 

252 Catalog containing the matched objects with all columns from 

253 the original input catalogs, with the suffix "_ref" or 

254 "_target" for duplicated column names, plus a column with the 

255 angular separation in arcseconds between matches. 

256 """ 

257 # Apply the selectors to the catalog 

258 mask = np.ones(len(catalog), dtype=bool) 

259 for selector in self.config.selectorActions: 

260 mask &= selector(catalog, bands=self.config.bands) 

261 

262 for selector in self.config.sourceSelectorActions: 

263 mask &= selector(catalog, band=self.config.selectorBand).astype(bool) 

264 

265 targetCatalog = catalog[mask] 

266 targetCatalog = targetCatalog.reset_index() 

267 

268 if (len(targetCatalog) == 0) or (len(self.refCat) == 0): 

269 matches = pipeBase.Struct( 

270 refMatchIndices=np.array([]), targetMatchIndices=np.array([]), separations=np.array([]) 

271 ) 

272 else: 

273 # Run the matcher 

274 matches = self.matcher.run(self.refCat, targetCatalog) 

275 

276 # Join the catalogs for the matched catalogs 

277 refMatches = self.refCat.iloc[matches.refMatchIndices].reset_index() 

278 sourceMatches = targetCatalog.iloc[matches.targetMatchIndices].reset_index() 

279 matchedCat = sourceMatches.join(refMatches, lsuffix="_target", rsuffix="_ref") 

280 

281 separations = pd.Series(matches.separations).rename("separation") 

282 matchedCat = matchedCat.join(separations) 

283 

284 return pipeBase.Struct(matchedCatalog=matchedCat) 

285 

286 def setRefCat(self, skymap, tract): 

287 """Make a reference catalog with coordinates in degrees 

288 

289 Parameters 

290 ---------- 

291 skymap : `lsst.skymap` 

292 The skymap used to define the patch boundaries. 

293 tract : int 

294 The tract corresponding to the catalog data. 

295 """ 

296 # Load the reference objects in a skyCircle around the tract 

297 tractInfo = skymap.generateTract(tract) 

298 boundingCircle = tractInfo.getOuterSkyPolygon().getBoundingCircle() 

299 center = lsst.geom.SpherePoint(boundingCircle.getCenter()) 

300 radius = boundingCircle.getOpeningAngle() 

301 

302 epoch = Time(self.config.epoch, format="decimalyear") 

303 

304 skyCircle = self.refObjLoader.loadSkyCircle(center, radius, "i", epoch=epoch) 

305 refCat = skyCircle.refCat 

306 

307 # Convert the coordinates to RA/Dec and convert the catalog to a 

308 # dataframe 

309 refCat["coord_ra"] = (refCat["coord_ra"] * units.radian).to(units.degree).to_value() 

310 refCat["coord_dec"] = (refCat["coord_dec"] * units.radian).to(units.degree).to_value() 

311 self.refCat = refCat.asAstropy().to_pandas() 

312 

313 

314class CatalogMatchVisitConnections( 

315 pipeBase.PipelineTaskConnections, 

316 dimensions=("visit",), 

317 defaultTemplates={"targetCatalog": "sourceTable_visit", "refCatalog": "gaia_dr2_20200414"}, 

318): 

319 catalog = pipeBase.connectionTypes.Input( 

320 doc="The visit-wide catalog to make plots from.", 

321 storageClass="DataFrame", 

322 name="sourceTable_visit", 

323 dimensions=("visit",), 

324 deferLoad=True, 

325 ) 

326 

327 refCat = pipeBase.connectionTypes.PrerequisiteInput( 

328 doc="The astrometry reference catalog to match to loaded input catalog sources.", 

329 name="{refCatalog}", 

330 storageClass="SimpleCatalog", 

331 dimensions=("skypix",), 

332 deferLoad=True, 

333 multiple=True, 

334 ) 

335 

336 visitSummaryTable = pipeBase.connectionTypes.Input( 

337 doc="A summary table of the ccds in the visit", 

338 storageClass="ExposureCatalog", 

339 name="finalVisitSummary", 

340 dimensions=("visit",), 

341 ) 

342 

343 matchedCatalog = pipeBase.connectionTypes.Output( 

344 doc="Catalog with matched target and reference objects with separations", 

345 name="{targetCatalog}_{refCatalog}_match", 

346 storageClass="DataFrame", 

347 dimensions=("visit",), 

348 ) 

349 

350 

351class CatalogMatchVisitConfig(CatalogMatchConfig, pipelineConnections=CatalogMatchVisitConnections): 

352 selectorActions = ConfigurableActionStructField( 

353 doc="Which selectors to use to narrow down the data for QA plotting.", 

354 default={"flagSelector": VisitPlotFlagSelector()}, 

355 ) 

356 

357 extraColumns = pexConfig.ListField[str]( 

358 doc="Other catalog columns to persist to downstream tasks", 

359 default=["psfFlux", "psfFluxErr"], 

360 ) 

361 

362 def setDefaults(self): 

363 # sourceSelectorActions.sourceSelector is StarSelector 

364 self.sourceSelectorActions.sourceSelector.vectorKey = "extendedness" 

365 # extraColumnSelectors.selector1 is SnSelector 

366 self.extraColumnSelectors.selector1.fluxType = "psfFlux" 

367 # extraColumnSelectors.selector2 is GalaxySelector 

368 self.extraColumnSelectors.selector2.vectorKey = "extendedness" 

369 

370 

371class CatalogMatchVisitTask(CatalogMatchTask): 

372 """Match a visit-level catalog to a reference catalog""" 

373 

374 ConfigClass = CatalogMatchVisitConfig 

375 _DefaultName = "analysisToolsCatalogMatchVisit" 

376 

377 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

378 # Docs inherited from base class 

379 

380 inputs = butlerQC.get(inputRefs) 

381 

382 columns = ["coord_ra", "coord_dec", "detector"] + self.config.extraColumns.list() 

383 for selectorAction in [ 

384 self.config.selectorActions, 

385 self.config.sourceSelectorActions, 

386 self.config.extraColumnSelectors, 

387 ]: 

388 for selector in selectorAction: 

389 selectorSchema = selector.getFormattedInputSchema() 

390 columns += [s[0] for s in selectorSchema] 

391 

392 dataFrame = inputs["catalog"].get(parameters={"columns": columns}) 

393 inputs["catalog"] = dataFrame 

394 

395 self.refObjLoader = ReferenceObjectLoader( 

396 dataIds=[ref.datasetRef.dataId for ref in inputRefs.refCat], 

397 refCats=inputs.pop("refCat"), 

398 name=self.config.connections.refCat, 

399 log=self.log, 

400 ) 

401 self.refObjLoader.config.requireProperMotion = self.config.requireProperMotion 

402 self.refObjLoader.config.anyFilterMapsToThis = self.config.anyFilterMapsToThis 

403 

404 self.setRefCat(inputs.pop("visitSummaryTable")) 

405 

406 outputs = self.run(**inputs) 

407 

408 butlerQC.put(outputs, outputRefs) 

409 

410 def setRefCat(self, visitSummaryTable): 

411 """Make a reference catalog with coordinates in degrees 

412 

413 Parameters 

414 ---------- 

415 visitSummaryTable : `lsst.afw.table.ExposureCatalog` 

416 The table of visit information 

417 """ 

418 # Get convex hull around the detectors, then get its center and radius 

419 corners = [] 

420 for visSum in visitSummaryTable: 

421 for ra, dec in zip(visSum["raCorners"], visSum["decCorners"]): 

422 corners.append(lsst.geom.SpherePoint(ra, dec, units=lsst.geom.degrees).getVector()) 

423 visitBoundingCircle = lsst.sphgeom.ConvexPolygon.convexHull(corners).getBoundingCircle() 

424 center = lsst.geom.SpherePoint(visitBoundingCircle.getCenter()) 

425 radius = visitBoundingCircle.getOpeningAngle() 

426 

427 # Get the observation date of the visit 

428 obsDate = visSum.getVisitInfo().getDate() 

429 epoch = Time(obsDate.toPython()) 

430 

431 # Load the reference catalog in the skyCircle of the detectors, then 

432 # convert the coordinates to degrees and convert the catalog to a 

433 # dataframe 

434 skyCircle = self.refObjLoader.loadSkyCircle(center, radius, "i", epoch=epoch) 

435 refCat = skyCircle.refCat 

436 

437 refCat["coord_ra"] = (refCat["coord_ra"] * units.radian).to(units.degree).to_value() 

438 refCat["coord_dec"] = (refCat["coord_dec"] * units.radian).to(units.degree).to_value() 

439 self.refCat = refCat.asAstropy().to_pandas()