lsst.pipe.tasks gf10b05e212+10497e73ae
mergeMeasurements.py
Go to the documentation of this file.
1#!/usr/bin/env python
2#
3# LSST Data Management System
4# Copyright 2008-2015 AURA/LSST.
5#
6# This product includes software developed by the
7# LSST Project (http://www.lsst.org/).
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the LSST License Statement and
20# the GNU General Public License along with this program. If not,
21# see <https://www.lsstcorp.org/LegalNotices/>.
22#
23import numpy
24import warnings
25
26import lsst.afw.table as afwTable
27import lsst.pex.config as pexConfig
28import lsst.pipe.base as pipeBase
29
30from lsst.pipe.base import PipelineTaskConnections, PipelineTaskConfig
31import lsst.pipe.base.connectionTypes as cT
32
33
34class MergeMeasurementsConnections(PipelineTaskConnections,
35 dimensions=("skymap", "tract", "patch"),
36 defaultTemplates={"inputCoaddName": "deep",
37 "outputCoaddName": "deep"}):
38 inputSchema = cT.InitInput(
39 doc="Schema for the output merged measurement catalog.",
40 name="{inputCoaddName}Coadd_meas_schema",
41 storageClass="SourceCatalog",
42 )
43 outputSchema = cT.InitOutput(
44 doc="Schema for the output merged measurement catalog.",
45 name="{outputCoaddName}Coadd_ref_schema",
46 storageClass="SourceCatalog",
47 )
48 catalogs = cT.Input(
49 doc="Input catalogs to merge.",
50 name="{inputCoaddName}Coadd_meas",
51 multiple=True,
52 storageClass="SourceCatalog",
53 dimensions=["band", "skymap", "tract", "patch"],
54 )
55 mergedCatalog = cT.Output(
56 doc="Output merged catalog.",
57 name="{outputCoaddName}Coadd_ref",
58 storageClass="SourceCatalog",
59 dimensions=["skymap", "tract", "patch"],
60 )
61
62
63class MergeMeasurementsConfig(PipelineTaskConfig, pipelineConnections=MergeMeasurementsConnections):
64 """!
65 @anchor MergeMeasurementsConfig_
66
67 @brief Configuration parameters for the MergeMeasurementsTask
68 """
69 pseudoFilterList = pexConfig.ListField(
70 dtype=str,
71 default=["sky"],
72 doc="Names of filters which may have no associated detection\n"
73 "(N.b. should include MergeDetectionsConfig.skyFilterName)"
74 )
75 snName = pexConfig.Field(
76 dtype=str,
77 default="base_PsfFlux",
78 doc="Name of flux measurement for calculating the S/N when choosing the reference band."
79 )
80 minSN = pexConfig.Field(
81 dtype=float,
82 default=10.,
83 doc="If the S/N from the priority band is below this value (and the S/N "
84 "is larger than minSNDiff compared to the priority band), use the band with "
85 "the largest S/N as the reference band."
86 )
87 minSNDiff = pexConfig.Field(
88 dtype=float,
89 default=3.,
90 doc="If the difference in S/N between another band and the priority band is larger "
91 "than this value (and the S/N in the priority band is less than minSN) "
92 "use the band with the largest S/N as the reference band"
93 )
94 flags = pexConfig.ListField(
95 dtype=str,
96 doc="Require that these flags, if available, are not set",
97 default=["base_PixelFlags_flag_interpolatedCenter", "base_PsfFlux_flag",
98 "ext_photometryKron_KronFlux_flag", "modelfit_CModel_flag", ]
99 )
100 priorityList = pexConfig.ListField(
101 dtype=str,
102 default=[],
103 doc="Priority-ordered list of filter bands for the merge."
104 )
105 coaddName = pexConfig.Field(
106 dtype=str,
107 default="deep",
108 doc="Name of coadd"
109 )
110
111 def validate(self):
112 super().validate()
113 if len(self.priorityList) == 0:
114 raise RuntimeError("No priority list provided")
115
116
117
123
124
125class MergeMeasurementsTask(pipeBase.PipelineTask):
126 """Merge measurements from multiple bands.
127
128 Parameters
129 ----------
130 butler : `None`
131 Compatibility parameter. Should always be `None`.
132 schema : `lsst.afw.table.Schema`, optional
133 The schema of the detection catalogs used as input to this task.
134 initInputs : `dict`, optional
135 Dictionary that can contain a key ``inputSchema`` containing the
136 input schema. If present will override the value of ``schema``.
137 """
138 _DefaultName = "mergeCoaddMeasurements"
139 ConfigClass = MergeMeasurementsConfig
140
141 inputDataset = "meas"
142 outputDataset = "ref"
143
144 def __init__(self, butler=None, schema=None, initInputs=None, **kwargs):
145 super().__init__(**kwargs)
146
147 if butler is not None:
148 warnings.warn("The 'butler' parameter is no longer used and can be safely removed.",
149 category=FutureWarning, stacklevel=2)
150 butler = None
151
152 if initInputs is not None:
153 schema = initInputs['inputSchema'].schema
154
155 if schema is None:
156 raise ValueError("No input schema or initInputs['inputSchema'] provided.")
157
158 inputSchema = schema
159
160 self.schemaMapper = afwTable.SchemaMapper(inputSchema, True)
161 self.schemaMapper.addMinimalSchema(inputSchema, True)
162 self.instFluxKey = inputSchema.find(self.config.snName + "_instFlux").getKey()
163 self.instFluxErrKey = inputSchema.find(self.config.snName + "_instFluxErr").getKey()
164 self.fluxFlagKey = inputSchema.find(self.config.snName + "_flag").getKey()
165
166 self.flagKeys = {}
167 for band in self.config.priorityList:
168 outputKey = self.schemaMapper.editOutputSchema().addField(
169 "merge_measurement_%s" % band,
170 type="Flag",
171 doc="Flag field set if the measurements here are from the %s filter" % band
172 )
173 peakKey = inputSchema.find("merge_peak_%s" % band).key
174 footprintKey = inputSchema.find("merge_footprint_%s" % band).key
175 self.flagKeys[band] = pipeBase.Struct(peak=peakKey, footprint=footprintKey, output=outputKey)
176 self.schema = self.schemaMapper.getOutputSchema()
177
178 self.pseudoFilterKeys = []
179 for filt in self.config.pseudoFilterList:
180 try:
181 self.pseudoFilterKeys.append(self.schema.find("merge_peak_%s" % filt).getKey())
182 except Exception as e:
183 self.log.warning("merge_peak is not set for pseudo-filter %s: %s", filt, e)
184
185 self.badFlags = {}
186 for flag in self.config.flags:
187 try:
188 self.badFlags[flag] = self.schema.find(flag).getKey()
189 except KeyError as exc:
190 self.log.warning("Can't find flag %s in schema: %s", flag, exc)
191 self.outputSchema = afwTable.SourceCatalog(self.schema)
192
193 def runQuantum(self, butlerQC, inputRefs, outputRefs):
194 inputs = butlerQC.get(inputRefs)
195 dataIds = (ref.dataId for ref in inputRefs.catalogs)
196 catalogDict = {dataId['band']: cat for dataId, cat in zip(dataIds, inputs['catalogs'])}
197 inputs['catalogs'] = catalogDict
198 outputs = self.run(**inputs)
199 butlerQC.put(outputs, outputRefs)
200
201 def run(self, catalogs):
202 """!
203 Merge measurement catalogs to create a single reference catalog for forced photometry
204
205 @param[in] catalogs: the catalogs to be merged
206
207 For parent sources, we choose the first band in config.priorityList for which the
208 merge_footprint flag for that band is is True.
209
210 For child sources, the logic is the same, except that we use the merge_peak flags.
211 """
212 # Put catalogs, filters in priority order
213 orderedCatalogs = [catalogs[band] for band in self.config.priorityList if band in catalogs.keys()]
214 orderedKeys = [self.flagKeys[band] for band in self.config.priorityList if band in catalogs.keys()]
215
216 mergedCatalog = afwTable.SourceCatalog(self.schema)
217 mergedCatalog.reserve(len(orderedCatalogs[0]))
218
219 idKey = orderedCatalogs[0].table.getIdKey()
220 for catalog in orderedCatalogs[1:]:
221 if numpy.any(orderedCatalogs[0].get(idKey) != catalog.get(idKey)):
222 raise ValueError("Error in inputs to MergeCoaddMeasurements: source IDs do not match")
223
224 # This first zip iterates over all the catalogs simultaneously, yielding a sequence of one
225 # record for each band, in priority order.
226 for orderedRecords in zip(*orderedCatalogs):
227
228 maxSNRecord = None
229 maxSNFlagKeys = None
230 maxSN = 0.
231 priorityRecord = None
232 priorityFlagKeys = None
233 prioritySN = 0.
234 hasPseudoFilter = False
235
236 # Now we iterate over those record-band pairs, keeping track of the priority and the
237 # largest S/N band.
238 for inputRecord, flagKeys in zip(orderedRecords, orderedKeys):
239 parent = (inputRecord.getParent() == 0 and inputRecord.get(flagKeys.footprint))
240 child = (inputRecord.getParent() != 0 and inputRecord.get(flagKeys.peak))
241
242 if not (parent or child):
243 for pseudoFilterKey in self.pseudoFilterKeys:
244 if inputRecord.get(pseudoFilterKey):
245 hasPseudoFilter = True
246 priorityRecord = inputRecord
247 priorityFlagKeys = flagKeys
248 break
249 if hasPseudoFilter:
250 break
251
252 isBad = any(inputRecord.get(flag) for flag in self.badFlags)
253 if isBad or inputRecord.get(self.fluxFlagKey) or inputRecord.get(self.instFluxErrKey) == 0:
254 sn = 0.
255 else:
256 sn = inputRecord.get(self.instFluxKey)/inputRecord.get(self.instFluxErrKey)
257 if numpy.isnan(sn) or sn < 0.:
258 sn = 0.
259 if (parent or child) and priorityRecord is None:
260 priorityRecord = inputRecord
261 priorityFlagKeys = flagKeys
262 prioritySN = sn
263 if sn > maxSN:
264 maxSNRecord = inputRecord
265 maxSNFlagKeys = flagKeys
266 maxSN = sn
267
268 # If the priority band has a low S/N we would like to choose the band with the highest S/N as
269 # the reference band instead. However, we only want to choose the highest S/N band if it is
270 # significantly better than the priority band. Therefore, to choose a band other than the
271 # priority, we require that the priority S/N is below the minimum threshold and that the
272 # difference between the priority and highest S/N is larger than the difference threshold.
273 #
274 # For pseudo code objects we always choose the first band in the priority list.
275 bestRecord = None
276 bestFlagKeys = None
277 if hasPseudoFilter:
278 bestRecord = priorityRecord
279 bestFlagKeys = priorityFlagKeys
280 elif (prioritySN < self.config.minSN and (maxSN - prioritySN) > self.config.minSNDiff
281 and maxSNRecord is not None):
282 bestRecord = maxSNRecord
283 bestFlagKeys = maxSNFlagKeys
284 elif priorityRecord is not None:
285 bestRecord = priorityRecord
286 bestFlagKeys = priorityFlagKeys
287
288 if bestRecord is not None and bestFlagKeys is not None:
289 outputRecord = mergedCatalog.addNew()
290 outputRecord.assign(bestRecord, self.schemaMapper)
291 outputRecord.set(bestFlagKeys.output, True)
292 else: # if we didn't find any records
293 raise ValueError("Error in inputs to MergeCoaddMeasurements: no valid reference for %s" %
294 inputRecord.getId())
295
296 # more checking for sane inputs, since zip silently iterates over the smallest sequence
297 for inputCatalog in orderedCatalogs:
298 if len(mergedCatalog) != len(inputCatalog):
299 raise ValueError("Mismatch between catalog sizes: %s != %s" %
300 (len(mergedCatalog), len(orderedCatalogs)))
301
302 return pipeBase.Struct(
303 mergedCatalog=mergedCatalog
304 )