Coverage for python/lsst/analysis/tools/tasks/catalogMatch.py: 34%

140 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-03-30 11:37 +0000

1# This file is part of analysis_tools. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22__all__ = ("CatalogMatchConfig", "CatalogMatchTask", "AstropyMatchConfig", "AstropyMatchTask") 

23 

24import astropy.units as units 

25import lsst.geom 

26import lsst.pex.config as pexConfig 

27import lsst.pipe.base as pipeBase 

28import numpy as np 

29import pandas as pd 

30from astropy.coordinates import SkyCoord 

31from astropy.time import Time 

32from lsst.meas.algorithms import ReferenceObjectLoader 

33from lsst.pex.config.configurableActions import ConfigurableActionStructField 

34from lsst.skymap import BaseSkyMap 

35 

36from ..actions.vector import ( 

37 CoaddPlotFlagSelector, 

38 GalaxySelector, 

39 SnSelector, 

40 StarSelector, 

41 VisitPlotFlagSelector, 

42) 

43 

44 

45class AstropyMatchConfig(pexConfig.Config): 

46 maxDistance = pexConfig.Field[float]( 

47 doc="Max distance between matches in arcsec", 

48 default=1.0, 

49 ) 

50 refCatUnits = pexConfig.Field[str]( 

51 doc="Units of the reference catalog coordinates", 

52 default="degree", 

53 ) 

54 targetCatUnits = pexConfig.Field[str]( 

55 doc="Units of the target catalog coordinates", 

56 default="degree", 

57 ) 

58 

59 

60class AstropyMatchTask(pipeBase.Task): 

61 """A task for running the astropy matcher `match_to_catalog_sky` on 

62 between target and reference catalogs.""" 

63 

64 ConfigClass = AstropyMatchConfig 

65 

66 def run(self, refCatalog, targetCatalog): 

67 """Run matcher 

68 

69 Parameters 

70 ---------- 

71 refCatalog: `pandas.core.frame.DataFrame` 

72 The reference catalog with coordinates in degrees 

73 targetCatalog: `pandas.core.frame.DataFrame` 

74 The target catalog with coordinates in degrees 

75 

76 Returns 

77 ------- 

78 `pipeBase.Struct` containing: 

79 refMatchIndices: `numpy.ndarray` 

80 Array of indices of matched reference catalog objects 

81 targetMatchIndices: `numpy.ndarray` 

82 Array of indices of matched target catalog objects 

83 separations: `astropy.coordinates.angles.Angle` 

84 Array of angle separations between matched objects 

85 """ 

86 refCat_ap = SkyCoord( 

87 ra=refCatalog["coord_ra"] * units.Unit(self.config.refCatUnits), 

88 dec=refCatalog["coord_dec"] * units.Unit(self.config.refCatUnits), 

89 ) 

90 

91 sourceCat_ap = SkyCoord( 

92 ra=targetCatalog["coord_ra"] * units.Unit(self.config.targetCatUnits), 

93 dec=targetCatalog["coord_dec"] * units.Unit(self.config.targetCatUnits), 

94 ) 

95 

96 id, d2d, d3d = refCat_ap.match_to_catalog_sky(sourceCat_ap) 

97 

98 goodMatches = d2d.arcsecond < self.config.maxDistance 

99 

100 refMatchIndices = np.flatnonzero(goodMatches) 

101 targetMatchIndices = id[goodMatches] 

102 

103 separations = d2d[goodMatches].arcsec 

104 

105 return pipeBase.Struct( 

106 refMatchIndices=refMatchIndices, targetMatchIndices=targetMatchIndices, separations=separations 

107 ) 

108 

109 

110class CatalogMatchConnections( 

111 pipeBase.PipelineTaskConnections, 

112 dimensions=("tract", "skymap"), 

113 defaultTemplates={"targetCatalog": "objectTable_tract", "refCatalog": "gaia_dr2_20200414"}, 

114): 

115 catalog = pipeBase.connectionTypes.Input( 

116 doc="The tract-wide catalog to make plots from.", 

117 storageClass="DataFrame", 

118 name="{targetCatalog}", 

119 dimensions=("tract", "skymap"), 

120 deferLoad=True, 

121 ) 

122 

123 refCat = pipeBase.connectionTypes.PrerequisiteInput( 

124 doc="The reference catalog to match to loaded input catalog sources.", 

125 name="gaia_dr2_20200414", 

126 storageClass="SimpleCatalog", 

127 dimensions=("skypix",), 

128 deferLoad=True, 

129 multiple=True, 

130 ) 

131 

132 skymap = pipeBase.connectionTypes.Input( 

133 doc="The skymap for the tract", 

134 storageClass="SkyMap", 

135 name=BaseSkyMap.SKYMAP_DATASET_TYPE_NAME, 

136 dimensions=("skymap",), 

137 ) 

138 

139 matchedCatalog = pipeBase.connectionTypes.Output( 

140 doc="Catalog with matched target and reference objects with separations", 

141 name="{targetCatalog}_{refCatalog}_match", 

142 storageClass="DataFrame", 

143 dimensions=("tract", "skymap"), 

144 ) 

145 

146 

147class CatalogMatchConfig(pipeBase.PipelineTaskConfig, pipelineConnections=CatalogMatchConnections): 

148 matcher = pexConfig.ConfigurableField[pipeBase.Task]( 

149 target=AstropyMatchTask, doc="Task for matching refCat and SourceCatalog" 

150 ) 

151 

152 epoch = pexConfig.Field[float](doc="Epoch to which reference objects are shifted", default=2015.0) 

153 

154 bands = pexConfig.ListField[str]( 

155 doc="All bands to persist to downstream tasks", 

156 default=["u", "g", "r", "i", "z", "y"], 

157 ) 

158 

159 selectorBand = pexConfig.Field[str]( 

160 doc="Band to use when selecting objects, primarily for extendedness", default="i" 

161 ) 

162 

163 selectorActions = ConfigurableActionStructField( 

164 doc="Which selectors to use to narrow down the data for QA plotting.", 

165 default={"flagSelector": CoaddPlotFlagSelector}, 

166 ) 

167 

168 sourceSelectorActions = ConfigurableActionStructField( 

169 doc="What types of sources to use.", 

170 default={"sourceSelector": StarSelector}, 

171 ) 

172 

173 extraColumnSelectors = ConfigurableActionStructField( 

174 doc="Other selectors that are not used in this task, but whose columns" "may be needed downstream", 

175 default={"selector1": SnSelector, "selector2": GalaxySelector}, 

176 ) 

177 

178 extraColumns = pexConfig.ListField[str]( 

179 doc="Other catalog columns to persist to downstream tasks", 

180 default=["i_cModelFlux", "x", "y"], 

181 ) 

182 

183 requireProperMotion = pexConfig.Field[bool]( 

184 doc="Only use reference catalog objects with proper motion information", 

185 default=False, 

186 ) 

187 

188 anyFilterMapsToThis = pexConfig.Field[str]( 

189 doc="Any filter for the reference catalog maps to this", 

190 default="phot_g_mean", 

191 ) 

192 

193 

194class CatalogMatchTask(pipeBase.PipelineTask): 

195 """Match a tract-level catalog to a reference catalog""" 

196 

197 ConfigClass = CatalogMatchConfig 

198 _DefaultName = "analysisToolsCatalogMatch" 

199 

200 def __init__(self, butler=None, initInputs=None, **kwargs): 

201 super().__init__(**kwargs) 

202 self.makeSubtask("matcher") 

203 

204 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

205 # Docs inherited from base class 

206 

207 inputs = butlerQC.get(inputRefs) 

208 

209 columns = ["coord_ra", "coord_dec", "patch"] + self.config.extraColumns.list() 

210 for selectorAction in [ 

211 self.config.selectorActions, 

212 self.config.sourceSelectorActions, 

213 self.config.extraColumnSelectors, 

214 ]: 

215 for selector in selectorAction: 

216 for band in self.config.bands: 

217 selectorSchema = selector.getFormattedInputSchema(band=band) 

218 columns += [s[0] for s in selectorSchema] 

219 

220 dataFrame = inputs["catalog"].get(parameters={"columns": columns}) 

221 inputs["catalog"] = dataFrame 

222 

223 tract = butlerQC.quantum.dataId["tract"] 

224 

225 self.refObjLoader = ReferenceObjectLoader( 

226 dataIds=[ref.datasetRef.dataId for ref in inputRefs.refCat], 

227 refCats=inputs.pop("refCat"), 

228 name=self.config.connections.refCat, 

229 log=self.log, 

230 ) 

231 self.refObjLoader.config.requireProperMotion = self.config.requireProperMotion 

232 self.refObjLoader.config.anyFilterMapsToThis = self.config.anyFilterMapsToThis 

233 

234 self.setRefCat(inputs.pop("skymap"), tract) 

235 

236 outputs = self.run(**inputs) 

237 

238 butlerQC.put(outputs, outputRefs) 

239 

240 def run(self, catalog): 

241 """Prep the catalog and run the matcher. 

242 

243 Parameters 

244 ---------- 

245 catalog : `pandas.core.frame.DataFrame` 

246 

247 Returns 

248 ------- 

249 `pipeBase.Struct` containing: 

250 matchedCat : `pandas.core.frame.DataFrame` 

251 Catalog containing the matched objects with all columns from 

252 the original input catalogs, with the suffix "_ref" or 

253 "_target" for duplicated column names, plus a column with the 

254 angular separation in arcseconds between matches. 

255 """ 

256 # Apply the selectors to the catalog 

257 mask = np.ones(len(catalog), dtype=bool) 

258 for selector in self.config.selectorActions: 

259 mask &= selector(catalog, bands=self.config.bands) 

260 

261 for selector in self.config.sourceSelectorActions: 

262 mask &= selector(catalog, band=self.config.selectorBand).astype(bool) 

263 

264 targetCatalog = catalog[mask] 

265 targetCatalog = targetCatalog.reset_index() 

266 

267 if (len(targetCatalog) == 0) or (len(self.refCat) == 0): 

268 matches = pipeBase.Struct( 

269 refMatchIndices=np.array([]), targetMatchIndices=np.array([]), separations=np.array([]) 

270 ) 

271 else: 

272 # Run the matcher 

273 matches = self.matcher.run(self.refCat, targetCatalog) 

274 

275 # Join the catalogs for the matched catalogs 

276 refMatches = self.refCat.iloc[matches.refMatchIndices].reset_index() 

277 sourceMatches = targetCatalog.iloc[matches.targetMatchIndices].reset_index() 

278 matchedCat = sourceMatches.join(refMatches, lsuffix="_target", rsuffix="_ref") 

279 

280 separations = pd.Series(matches.separations).rename("separation") 

281 matchedCat = matchedCat.join(separations) 

282 

283 return pipeBase.Struct(matchedCatalog=matchedCat) 

284 

285 def setRefCat(self, skymap, tract): 

286 """Make a reference catalog with coordinates in degrees 

287 

288 Parameters 

289 ---------- 

290 skymap : `lsst.skymap` 

291 The skymap used to define the patch boundaries. 

292 tract : int 

293 The tract corresponding to the catalog data. 

294 """ 

295 # Load the reference objects in a skyCircle around the tract 

296 tractInfo = skymap.generateTract(tract) 

297 boundingCircle = tractInfo.getOuterSkyPolygon().getBoundingCircle() 

298 center = lsst.geom.SpherePoint(boundingCircle.getCenter()) 

299 radius = boundingCircle.getOpeningAngle() 

300 

301 epoch = Time(self.config.epoch, format="decimalyear") 

302 

303 skyCircle = self.refObjLoader.loadSkyCircle(center, radius, "i", epoch=epoch) 

304 refCat = skyCircle.refCat 

305 

306 # Convert the coordinates to RA/Dec and convert the catalog to a 

307 # dataframe 

308 refCat["coord_ra"] = (refCat["coord_ra"] * units.radian).to(units.degree).to_value() 

309 refCat["coord_dec"] = (refCat["coord_dec"] * units.radian).to(units.degree).to_value() 

310 self.refCat = refCat.asAstropy().to_pandas() 

311 

312 

313class CatalogMatchVisitConnections( 

314 pipeBase.PipelineTaskConnections, 

315 dimensions=("visit",), 

316 defaultTemplates={"targetCatalog": "sourceTable_visit", "refCatalog": "gaia_dr2_20200414"}, 

317): 

318 catalog = pipeBase.connectionTypes.Input( 

319 doc="The visit-wide catalog to make plots from.", 

320 storageClass="DataFrame", 

321 name="sourceTable_visit", 

322 dimensions=("visit",), 

323 deferLoad=True, 

324 ) 

325 

326 refCat = pipeBase.connectionTypes.PrerequisiteInput( 

327 doc="The astrometry reference catalog to match to loaded input catalog sources.", 

328 name="gaia_dr2_20200414", 

329 storageClass="SimpleCatalog", 

330 dimensions=("skypix",), 

331 deferLoad=True, 

332 multiple=True, 

333 ) 

334 

335 visitSummaryTable = pipeBase.connectionTypes.Input( 

336 doc="A summary table of the ccds in the visit", 

337 storageClass="ExposureCatalog", 

338 name="finalVisitSummary", 

339 dimensions=("visit",), 

340 ) 

341 

342 matchedCatalog = pipeBase.connectionTypes.Output( 

343 doc="Catalog with matched target and reference objects with separations", 

344 name="{targetCatalog}_{refCatalog}_match", 

345 storageClass="DataFrame", 

346 dimensions=("visit",), 

347 ) 

348 

349 

350class CatalogMatchVisitConfig(CatalogMatchConfig, pipelineConnections=CatalogMatchVisitConnections): 

351 selectorActions = ConfigurableActionStructField( 

352 doc="Which selectors to use to narrow down the data for QA plotting.", 

353 default={"flagSelector": VisitPlotFlagSelector}, 

354 ) 

355 

356 extraColumns = pexConfig.ListField[str]( 

357 doc="Other catalog columns to persist to downstream tasks", 

358 default=["psfFlux", "psfFluxErr"], 

359 ) 

360 

361 def setDefaults(self): 

362 # sourceSelectorActions.sourceSelector is StarSelector 

363 self.sourceSelectorActions.sourceSelector.vectorKey = "extendedness" 

364 # extraColumnSelectors.selector1 is SnSelector 

365 self.extraColumnSelectors.selector1.fluxType = "psfFlux" 

366 # extraColumnSelectors.selector2 is GalaxySelector 

367 self.extraColumnSelectors.selector2.vectorKey = "extendedness" 

368 

369 

370class CatalogMatchVisitTask(CatalogMatchTask): 

371 """Match a visit-level catalog to a reference catalog""" 

372 

373 ConfigClass = CatalogMatchVisitConfig 

374 _DefaultName = "analysisToolsCatalogMatchVisit" 

375 

376 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

377 # Docs inherited from base class 

378 

379 inputs = butlerQC.get(inputRefs) 

380 

381 columns = ["coord_ra", "coord_dec", "detector"] + self.config.extraColumns.list() 

382 for selectorAction in [ 

383 self.config.selectorActions, 

384 self.config.sourceSelectorActions, 

385 self.config.extraColumnSelectors, 

386 ]: 

387 for selector in selectorAction: 

388 selectorSchema = selector.getFormattedInputSchema() 

389 columns += [s[0] for s in selectorSchema] 

390 

391 dataFrame = inputs["catalog"].get(parameters={"columns": columns}) 

392 inputs["catalog"] = dataFrame 

393 

394 self.refObjLoader = ReferenceObjectLoader( 

395 dataIds=[ref.datasetRef.dataId for ref in inputRefs.refCat], 

396 refCats=inputs.pop("refCat"), 

397 name=self.config.connections.refCat, 

398 log=self.log, 

399 ) 

400 self.refObjLoader.config.requireProperMotion = self.config.requireProperMotion 

401 self.refObjLoader.config.anyFilterMapsToThis = self.config.anyFilterMapsToThis 

402 

403 self.setRefCat(inputs.pop("visitSummaryTable")) 

404 

405 outputs = self.run(**inputs) 

406 

407 butlerQC.put(outputs, outputRefs) 

408 

409 def setRefCat(self, visitSummaryTable): 

410 """Make a reference catalog with coordinates in degrees 

411 

412 Parameters 

413 ---------- 

414 visitSummaryTable : `lsst.afw.table.ExposureCatalog` 

415 The table of visit information 

416 """ 

417 # Get convex hull around the detectors, then get its center and radius 

418 corners = [] 

419 for visSum in visitSummaryTable: 

420 for ra, dec in zip(visSum["raCorners"], visSum["decCorners"]): 

421 corners.append(lsst.geom.SpherePoint(ra, dec, units=lsst.geom.degrees).getVector()) 

422 visitBoundingCircle = lsst.sphgeom.ConvexPolygon.convexHull(corners).getBoundingCircle() 

423 center = lsst.geom.SpherePoint(visitBoundingCircle.getCenter()) 

424 radius = visitBoundingCircle.getOpeningAngle() 

425 

426 # Get the observation date of the visit 

427 obsDate = visSum.getVisitInfo().getDate() 

428 epoch = Time(obsDate.toPython()) 

429 

430 # Load the reference catalog in the skyCircle of the detectors, then 

431 # convert the coordinates to degrees and convert the catalog to a 

432 # dataframe 

433 skyCircle = self.refObjLoader.loadSkyCircle(center, radius, "i", epoch=epoch) 

434 refCat = skyCircle.refCat 

435 

436 refCat["coord_ra"] = (refCat["coord_ra"] * units.radian).to(units.degree).to_value() 

437 refCat["coord_dec"] = (refCat["coord_dec"] * units.radian).to(units.degree).to_value() 

438 self.refCat = refCat.asAstropy().to_pandas()