Coverage for python/lsst/pipe/tasks/diff_matched_tract_catalog.py: 48%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

90 statements  

1# This file is part of pipe_tasks. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22__all__ = [ 

23 'DiffMatchedTractCatalogConfig', 'DiffMatchedTractCatalogTask', 'MatchedCatalogFluxesConfig', 

24] 

25 

26import lsst.afw.geom as afwGeom 

27from lsst.meas.astrom.matcher_probabilistic import ConvertCatalogCoordinatesConfig 

28from lsst.meas.astrom.match_probabilistic_task import radec_to_xy 

29import lsst.pex.config as pexConfig 

30import lsst.pipe.base as pipeBase 

31import lsst.pipe.base.connectionTypes as cT 

32from lsst.skymap import BaseSkyMap 

33 

34import numpy as np 

35import pandas as pd 

36from typing import Set 

37 

38 

39DiffMatchedTractCatalogBaseTemplates = { 

40 "name_input_cat_ref": "truth_summary", 

41 "name_input_cat_target": "objectTable_tract", 

42 "name_skymap": BaseSkyMap.SKYMAP_DATASET_TYPE_NAME, 

43} 

44 

45 

46class DiffMatchedTractCatalogConnections( 

47 pipeBase.PipelineTaskConnections, 

48 dimensions=("tract", "skymap"), 

49 defaultTemplates=DiffMatchedTractCatalogBaseTemplates, 

50): 

51 cat_ref = cT.Input( 

52 doc="Reference object catalog to match from", 

53 name="{name_input_cat_ref}", 

54 storageClass="DataFrame", 

55 dimensions=("tract", "skymap"), 

56 deferLoad=True, 

57 ) 

58 cat_target = cT.Input( 

59 doc="Target object catalog to match", 

60 name="{name_input_cat_target}", 

61 storageClass="DataFrame", 

62 dimensions=("tract", "skymap"), 

63 deferLoad=True, 

64 ) 

65 skymap = cT.Input( 

66 doc="Input definition of geometry/bbox and projection/wcs for coadded exposures", 

67 name="{name_skymap}", 

68 storageClass="SkyMap", 

69 dimensions=("skymap",), 

70 ) 

71 cat_match_ref = cT.Input( 

72 doc="Reference matched catalog with indices of target matches", 

73 name="match_ref_{name_input_cat_ref}_{name_input_cat_target}", 

74 storageClass="DataFrame", 

75 dimensions=("tract", "skymap"), 

76 deferLoad=True, 

77 ) 

78 cat_match_target = cT.Input( 

79 doc="Target matched catalog with indices of references matches", 

80 name="match_target_{name_input_cat_ref}_{name_input_cat_target}", 

81 storageClass="DataFrame", 

82 dimensions=("tract", "skymap"), 

83 deferLoad=True, 

84 ) 

85 cat_matched = cT.Output( 

86 doc="Catalog with reference and target columns for matched sources only", 

87 name="matched_{name_input_cat_ref}_{name_input_cat_target}", 

88 storageClass="DataFrame", 

89 dimensions=("tract", "skymap"), 

90 ) 

91 

92 

93class MatchedCatalogFluxesConfig(pexConfig.Config): 

94 column_ref_flux = pexConfig.Field( 

95 dtype=str, 

96 doc='Reference catalog flux column name', 

97 ) 

98 columns_target_flux = pexConfig.ListField( 98 ↛ exitline 98 didn't jump to the function exit

99 dtype=str, 

100 listCheck=lambda x: len(set(x)) == len(x), 

101 doc="List of target catalog flux column names", 

102 ) 

103 columns_target_flux_err = pexConfig.ListField( 103 ↛ exitline 103 didn't jump to the function exit

104 dtype=str, 

105 listCheck=lambda x: len(set(x)) == len(x), 

106 doc="List of target catalog flux error column names", 

107 ) 

108 

109 @property 

110 def columns_in_ref(self) -> Set[str]: 

111 return {self.column_ref_flux} 

112 

113 @property 

114 def columns_in_target(self) -> Set[str]: 

115 return set(self.columns_target_flux).union(set(self.columns_target_flux_err)) 

116 

117 

118class DiffMatchedTractCatalogConfig( 

119 pipeBase.PipelineTaskConfig, 

120 pipelineConnections=DiffMatchedTractCatalogConnections, 

121): 

122 column_matched_prefix_ref = pexConfig.Field( 

123 dtype=str, 

124 default='refcat_', 

125 doc='The prefix for matched columns copied from the reference catalog', 

126 ) 

127 column_ref_extended = pexConfig.Field( 

128 dtype=str, 

129 default='is_pointsource', 

130 doc='The boolean reference table column specifying if the target is extended', 

131 ) 

132 column_ref_extended_inverted = pexConfig.Field( 

133 dtype=bool, 

134 default=True, 

135 doc='Whether column_ref_extended specifies if the object is compact, not extended', 

136 ) 

137 column_target_extended = pexConfig.Field( 

138 dtype=str, 

139 default='refExtendedness', 

140 doc='The target table column estimating the extendedness of the object (0 <= x <= 1)', 

141 ) 

142 

143 @property 

144 def columns_in_ref(self) -> Set[str]: 

145 columns_all = [self.coord_format.column_ref_coord1, self.coord_format.column_ref_coord2, 

146 self.column_ref_extended] 

147 for columns_list in ( 

148 ( 

149 self.columns_ref_copy, 

150 ), 

151 (x.columns_in_ref for x in self.columns_flux.values()), 

152 ): 

153 for columns in columns_list: 

154 columns_all.extend(columns) 

155 

156 return set(columns_all) 

157 

158 @property 

159 def columns_in_target(self) -> Set[str]: 

160 columns_all = [self.coord_format.column_target_coord1, self.coord_format.column_target_coord2, 

161 self.column_target_extended] 

162 if self.coord_format.coords_ref_to_convert is not None: 

163 columns_all.extend(self.coord_format.coords_ref_to_convert.values()) 

164 for columns_list in ( 

165 ( 

166 self.columns_target_coord_err, 

167 self.columns_target_select_false, 

168 self.columns_target_select_true, 

169 self.columns_target_copy, 

170 ), 

171 (x.columns_in_target for x in self.columns_flux.values()), 

172 ): 

173 for columns in columns_list: 

174 columns_all.extend(columns) 

175 return set(columns_all) 

176 

177 columns_flux = pexConfig.ConfigDictField( 

178 keytype=str, 

179 itemtype=MatchedCatalogFluxesConfig, 

180 doc="Configs for flux columns for each band", 

181 ) 

182 columns_ref_copy = pexConfig.ListField( 

183 dtype=str, 

184 default=set(), 

185 doc='Reference table columns to copy to copy into cat_matched', 

186 ) 

187 columns_target_coord_err = pexConfig.ListField( 187 ↛ exitline 187 didn't jump to the function exit

188 dtype=str, 

189 listCheck=lambda x: (len(x) == 2) and (x[0] != x[1]), 

190 doc='Target table coordinate columns with standard errors (sigma)', 

191 ) 

192 columns_target_copy = pexConfig.ListField( 

193 dtype=str, 

194 default=('patch',), 

195 doc='Target table columns to copy to copy into cat_matched', 

196 ) 

197 columns_target_select_true = pexConfig.ListField( 

198 dtype=str, 

199 default=('detect_isPrimary',), 

200 doc='Target table columns to require to be True for selecting sources', 

201 ) 

202 columns_target_select_false = pexConfig.ListField( 

203 dtype=str, 

204 default=('merge_peak_sky',), 

205 doc='Target table columns to require to be False for selecting sources', 

206 ) 

207 coord_format = pexConfig.ConfigField( 

208 dtype=ConvertCatalogCoordinatesConfig, 

209 doc="Configuration for coordinate conversion", 

210 ) 

211 

212 

213class DiffMatchedTractCatalogTask(pipeBase.PipelineTask): 

214 """Load subsets of matched catalogs and output a merged catalog of matched sources. 

215 """ 

216 ConfigClass = DiffMatchedTractCatalogConfig 

217 _DefaultName = "DiffMatchedTractCatalog" 

218 

219 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

220 inputs = butlerQC.get(inputRefs) 

221 skymap = inputs.pop("skymap") 

222 

223 outputs = self.run( 

224 catalog_ref=inputs['cat_ref'].get(parameters={'columns': self.config.columns_in_ref}), 

225 catalog_target=inputs['cat_target'].get(parameters={'columns': self.config.columns_in_target}), 

226 catalog_match_ref=inputs['cat_match_ref'].get( 

227 parameters={'columns': ['match_candidate', 'match_row']}, 

228 ), 

229 catalog_match_target=inputs['cat_match_target'].get( 

230 parameters={'columns': ['match_row']}, 

231 ), 

232 wcs=skymap[butlerQC.quantum.dataId["tract"]].wcs, 

233 ) 

234 butlerQC.put(outputs, outputRefs) 

235 

236 def run( 

237 self, 

238 catalog_ref: pd.DataFrame, 

239 catalog_target: pd.DataFrame, 

240 catalog_match_ref: pd.DataFrame, 

241 catalog_match_target: pd.DataFrame, 

242 wcs: afwGeom.SkyWcs = None, 

243 ) -> pipeBase.Struct: 

244 """Load matched reference and target (measured) catalogs, measure summary statistics (TBD) and output 

245 a combined matched catalog with columns from both inputs. 

246 

247 Parameters 

248 ---------- 

249 catalog_ref : `pandas.DataFrame` 

250 A reference catalog to diff objects/sources from. 

251 catalog_target : `pandas.DataFrame` 

252 A target catalog to diff reference objects/sources to. 

253 catalog_match_ref : `pandas.DataFrame` 

254 A catalog with match indices of target sources and selection flags 

255 for each reference source. 

256 catalog_match_target : `pandas.DataFrame` 

257 A catalog with selection flags for each target source. 

258 wcs : `lsst.afw.image.SkyWcs` 

259 A coordinate system to convert catalog positions to sky coordinates, 

260 if necessary. 

261 

262 Returns 

263 ------- 

264 retStruct : `lsst.pipe.base.Struct` 

265 A struct with output_ref and output_target attribute containing the 

266 output matched catalogs. 

267 """ 

268 config = self.config 

269 

270 # Add additional selection criteria for target sources beyond those for matching 

271 # (not recommended, but can be done anyway) 

272 select_target = (catalog_match_target['match_candidate'].values 

273 if 'match_candidate' in catalog_match_target.columns 

274 else np.ones(len(catalog_match_target), dtype=bool)) 

275 for column in config.columns_target_select_true: 

276 select_target &= catalog_target[column].values 

277 for column in config.columns_target_select_false: 

278 select_target &= ~catalog_target[column].values 

279 

280 ref, target = config.coord_format.format_catalogs( 

281 catalog_ref=catalog_ref, catalog_target=catalog_target, 

282 select_ref=None, select_target=select_target, wcs=wcs, radec_to_xy_func=radec_to_xy, 

283 return_converted_columns=config.coord_format.coords_ref_to_convert is not None, 

284 ) 

285 cat_ref = ref.catalog 

286 cat_target = target.catalog 

287 n_target = len(cat_target) 

288 

289 match_row = catalog_match_ref['match_row'].values 

290 matched_ref = match_row >= 0 

291 matched_row = match_row[matched_ref] 

292 matched_target = np.zeros(n_target, dtype=bool) 

293 matched_target[matched_row] = True 

294 

295 # Create a matched table, preserving the target catalog's named index (if it has one) 

296 cat_left = cat_target.iloc[matched_row] 

297 has_index_left = cat_left.index.name is not None 

298 cat_right = cat_ref[matched_ref].reset_index() 

299 cat_matched = pd.concat((cat_left.reset_index(drop=True), cat_right), 1) 

300 if has_index_left: 

301 cat_matched.index = cat_left.index 

302 cat_matched.columns.values[len(cat_target.columns):] = [f'refcat_{col}' for col in cat_right.columns] 

303 

304 retStruct = pipeBase.Struct(cat_matched=cat_matched) 

305 return retStruct