lsst.pipe.tasks g0f82ab2f21+b4c33f426f
diff_matched_tract_catalog.py
Go to the documentation of this file.
1# This file is part of pipe_tasks.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
21
22__all__ = [
23 'DiffMatchedTractCatalogConfig', 'DiffMatchedTractCatalogTask', 'MatchedCatalogFluxesConfig',
24]
25
26import lsst.afw.geom as afwGeom
27from lsst.meas.astrom.matcher_probabilistic import ConvertCatalogCoordinatesConfig
29import lsst.pex.config as pexConfig
30import lsst.pipe.base as pipeBase
31import lsst.pipe.base.connectionTypes as cT
32from lsst.skymap import BaseSkyMap
33
34import numpy as np
35import pandas as pd
36from typing import Set
37
38
39DiffMatchedTractCatalogBaseTemplates = {
40 "name_input_cat_ref": "truth_summary",
41 "name_input_cat_target": "objectTable_tract",
42 "name_skymap": BaseSkyMap.SKYMAP_DATASET_TYPE_NAME,
43}
44
45
47 pipeBase.PipelineTaskConnections,
48 dimensions=("tract", "skymap"),
49 defaultTemplates=DiffMatchedTractCatalogBaseTemplates,
50):
51 cat_ref = cT.Input(
52 doc="Reference object catalog to match from",
53 name="{name_input_cat_ref}",
54 storageClass="DataFrame",
55 dimensions=("tract", "skymap"),
56 deferLoad=True,
57 )
58 cat_target = cT.Input(
59 doc="Target object catalog to match",
60 name="{name_input_cat_target}",
61 storageClass="DataFrame",
62 dimensions=("tract", "skymap"),
63 deferLoad=True,
64 )
65 skymap = cT.Input(
66 doc="Input definition of geometry/bbox and projection/wcs for coadded exposures",
67 name="{name_skymap}",
68 storageClass="SkyMap",
69 dimensions=("skymap",),
70 )
71 cat_match_ref = cT.Input(
72 doc="Reference matched catalog with indices of target matches",
73 name="match_ref_{name_input_cat_ref}_{name_input_cat_target}",
74 storageClass="DataFrame",
75 dimensions=("tract", "skymap"),
76 deferLoad=True,
77 )
78 cat_match_target = cT.Input(
79 doc="Target matched catalog with indices of references matches",
80 name="match_target_{name_input_cat_ref}_{name_input_cat_target}",
81 storageClass="DataFrame",
82 dimensions=("tract", "skymap"),
83 deferLoad=True,
84 )
85 cat_matched = cT.Output(
86 doc="Catalog with reference and target columns for matched sources only",
87 name="matched_{name_input_cat_ref}_{name_input_cat_target}",
88 storageClass="DataFrame",
89 dimensions=("tract", "skymap"),
90 )
91
92
93class MatchedCatalogFluxesConfig(pexConfig.Config):
94 column_ref_flux = pexConfig.Field(
95 dtype=str,
96 doc='Reference catalog flux column name',
97 )
98 columns_target_flux = pexConfig.ListField(
99 dtype=str,
100 listCheck=lambda x: len(set(x)) == len(x),
101 doc="List of target catalog flux column names",
102 )
103 columns_target_flux_err = pexConfig.ListField(
104 dtype=str,
105 listCheck=lambda x: len(set(x)) == len(x),
106 doc="List of target catalog flux error column names",
107 )
108
109 @property
110 def columns_in_ref(self) -> Set[str]:
111 return {self.column_ref_fluxcolumn_ref_flux}
112
113 @property
114 def columns_in_target(self) -> Set[str]:
115 return set(self.columns_target_fluxcolumns_target_flux).union(set(self.columns_target_flux_errcolumns_target_flux_err))
116
117
119 pipeBase.PipelineTaskConfig,
120 pipelineConnections=DiffMatchedTractCatalogConnections,
121):
122 column_matched_prefix_ref = pexConfig.Field(
123 dtype=str,
124 default='refcat_',
125 doc='The prefix for matched columns copied from the reference catalog',
126 )
127 column_ref_extended = pexConfig.Field(
128 dtype=str,
129 default='is_pointsource',
130 doc='The boolean reference table column specifying if the target is extended',
131 )
132 column_ref_extended_inverted = pexConfig.Field(
133 dtype=bool,
134 default=True,
135 doc='Whether column_ref_extended specifies if the object is compact, not extended',
136 )
137 column_target_extended = pexConfig.Field(
138 dtype=str,
139 default='refExtendedness',
140 doc='The target table column estimating the extendedness of the object (0 <= x <= 1)',
141 )
142
143 @property
144 def columns_in_ref(self) -> Set[str]:
145 columns_all = [self.coord_formatcoord_format.column_ref_coord1, self.coord_formatcoord_format.column_ref_coord2,
146 self.column_ref_extendedcolumn_ref_extended]
147 for columns_list in (
148 (
149 self.columns_ref_copycolumns_ref_copy,
150 ),
151 (x.columns_in_ref for x in self.columns_fluxcolumns_flux.values()),
152 ):
153 for columns in columns_list:
154 columns_all.extend(columns)
155
156 return set(columns_all)
157
158 @property
159 def columns_in_target(self) -> Set[str]:
160 columns_all = [self.coord_formatcoord_format.column_target_coord1, self.coord_formatcoord_format.column_target_coord2,
161 self.column_target_extendedcolumn_target_extended]
162 if self.coord_formatcoord_format.coords_ref_to_convert is not None:
163 columns_all.extend(self.coord_formatcoord_format.coords_ref_to_convert.values())
164 for columns_list in (
165 (
166 self.columns_target_coord_errcolumns_target_coord_err,
167 self.columns_target_select_falsecolumns_target_select_false,
168 self.columns_target_select_truecolumns_target_select_true,
169 self.columns_target_copycolumns_target_copy,
170 ),
171 (x.columns_in_target for x in self.columns_fluxcolumns_flux.values()),
172 ):
173 for columns in columns_list:
174 columns_all.extend(columns)
175 return set(columns_all)
176
177 columns_flux = pexConfig.ConfigDictField(
178 keytype=str,
179 itemtype=MatchedCatalogFluxesConfig,
180 doc="Configs for flux columns for each band",
181 )
182 columns_ref_copy = pexConfig.ListField(
183 dtype=str,
184 default=set(),
185 doc='Reference table columns to copy to copy into cat_matched',
186 )
187 columns_target_coord_err = pexConfig.ListField(
188 dtype=str,
189 listCheck=lambda x: (len(x) == 2) and (x[0] != x[1]),
190 doc='Target table coordinate columns with standard errors (sigma)',
191 )
192 columns_target_copy = pexConfig.ListField(
193 dtype=str,
194 default=('patch',),
195 doc='Target table columns to copy to copy into cat_matched',
196 )
197 columns_target_select_true = pexConfig.ListField(
198 dtype=str,
199 default=('detect_isPrimary',),
200 doc='Target table columns to require to be True for selecting sources',
201 )
202 columns_target_select_false = pexConfig.ListField(
203 dtype=str,
204 default=('merge_peak_sky',),
205 doc='Target table columns to require to be False for selecting sources',
206 )
207 coord_format = pexConfig.ConfigField(
208 dtype=ConvertCatalogCoordinatesConfig,
209 doc="Configuration for coordinate conversion",
210 )
211
212
213class DiffMatchedTractCatalogTask(pipeBase.PipelineTask):
214 """Load subsets of matched catalogs and output a merged catalog of matched sources.
215 """
216 ConfigClass = DiffMatchedTractCatalogConfig
217 _DefaultName = "DiffMatchedTractCatalog"
218
219 def runQuantum(self, butlerQC, inputRefs, outputRefs):
220 inputs = butlerQC.get(inputRefs)
221 skymap = inputs.pop("skymap")
222
223 outputs = self.runrun(
224 catalog_ref=inputs['cat_ref'].get(parameters={'columns': self.config.columns_in_ref}),
225 catalog_target=inputs['cat_target'].get(parameters={'columns': self.config.columns_in_target}),
226 catalog_match_ref=inputs['cat_match_ref'].get(
227 parameters={'columns': ['match_candidate', 'match_row']},
228 ),
229 catalog_match_target=inputs['cat_match_target'].get(
230 parameters={'columns': ['match_row']},
231 ),
232 wcs=skymap[butlerQC.quantum.dataId["tract"]].wcs,
233 )
234 butlerQC.put(outputs, outputRefs)
235
236 def run(
237 self,
238 catalog_ref: pd.DataFrame,
239 catalog_target: pd.DataFrame,
240 catalog_match_ref: pd.DataFrame,
241 catalog_match_target: pd.DataFrame,
242 wcs: afwGeom.SkyWcs = None,
243 ) -> pipeBase.Struct:
244 """Load matched reference and target (measured) catalogs, measure summary statistics (TBD) and output
245 a combined matched catalog with columns from both inputs.
246
247 Parameters
248 ----------
249 catalog_ref : `pandas.DataFrame`
250 A reference catalog to diff objects/sources from.
251 catalog_target : `pandas.DataFrame`
252 A target catalog to diff reference objects/sources to.
253 catalog_match_ref : `pandas.DataFrame`
254 A catalog with match indices of target sources and selection flags
255 for each reference source.
256 catalog_match_target : `pandas.DataFrame`
257 A catalog with selection flags for each target source.
258 wcs : `lsst.afw.image.SkyWcs`
259 A coordinate system to convert catalog positions to sky coordinates,
260 if necessary.
261
262 Returns
263 -------
264 retStruct : `lsst.pipe.base.Struct`
265 A struct with output_ref and output_target attribute containing the
266 output matched catalogs.
267 """
268 config = self.config
269
270 # Add additional selection criteria for target sources beyond those for matching
271 # (not recommended, but can be done anyway)
272 select_target = (catalog_match_target['match_candidate'].values
273 if 'match_candidate' in catalog_match_target.columns
274 else np.ones(len(catalog_match_target), dtype=bool))
275 for column in config.columns_target_select_true:
276 select_target &= catalog_target[column].values
277 for column in config.columns_target_select_false:
278 select_target &= ~catalog_target[column].values
279
280 ref, target = config.coord_format.format_catalogs(
281 catalog_ref=catalog_ref, catalog_target=catalog_target,
282 select_ref=None, select_target=select_target, wcs=wcs, radec_to_xy_func=radec_to_xy,
283 return_converted_columns=config.coord_format.coords_ref_to_convert is not None,
284 )
285 cat_ref = ref.catalog
286 cat_target = target.catalog
287 n_target = len(cat_target)
288
289 match_row = catalog_match_ref['match_row'].values
290 matched_ref = match_row >= 0
291 matched_row = match_row[matched_ref]
292 matched_target = np.zeros(n_target, dtype=bool)
293 matched_target[matched_row] = True
294
295 # Create a matched table, preserving the target catalog's named index (if it has one)
296 cat_left = cat_target.iloc[matched_row]
297 has_index_left = cat_left.index.name is not None
298 cat_right = cat_ref[matched_ref].reset_index()
299 cat_matched = pd.concat((cat_left.reset_index(drop=True), cat_right), 1)
300 if has_index_left:
301 cat_matched.index = cat_left.index
302 cat_matched.columns.values[len(cat_target.columns):] = [f'refcat_{col}' for col in cat_right.columns]
303
304 retStruct = pipeBase.Struct(cat_matched=cat_matched)
305 return retStruct
pipeBase.Struct run(self, pd.DataFrame catalog_ref, pd.DataFrame catalog_target, pd.DataFrame catalog_match_ref, pd.DataFrame catalog_match_target, afwGeom.SkyWcs wcs=None)