Coverage for python/lsst/pipe/tasks/mergeMeasurements.py : 80%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#!/usr/bin/env python
2#
3# LSST Data Management System
4# Copyright 2008-2015 AURA/LSST.
5#
6# This product includes software developed by the
7# LSST Project (http://www.lsst.org/).
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the LSST License Statement and
20# the GNU General Public License along with this program. If not,
21# see <https://www.lsstcorp.org/LegalNotices/>.
22#
23import numpy
25from .multiBandUtils import (MergeSourcesRunner, _makeGetSchemaCatalogs, makeMergeArgumentParser,
26 getInputSchema, getShortFilterName, readCatalog)
29import lsst.afw.table as afwTable
30import lsst.pex.config as pexConfig
31import lsst.pipe.base as pipeBase
33from lsst.pipe.base import PipelineTaskConnections, PipelineTaskConfig
34import lsst.pipe.base.connectionTypes as cT
37class MergeMeasurementsConnections(PipelineTaskConnections,
38 dimensions=("skymap", "tract", "patch"),
39 defaultTemplates={"inputCoaddName": "deep",
40 "outputCoaddName": "deep"}):
41 inputSchema = cT.InitInput(
42 doc="Schema for the output merged measurement catalog.",
43 name="{inputCoaddName}Coadd_meas_schema",
44 storageClass="SourceCatalog",
45 )
46 outputSchema = cT.InitOutput(
47 doc="Schema for the output merged measurement catalog.",
48 name="{outputCoaddName}Coadd_ref_schema",
49 storageClass="SourceCatalog",
50 )
51 catalogs = cT.Input(
52 doc="Input catalogs to merge.",
53 name="{inputCoaddName}Coadd_meas",
54 multiple=True,
55 storageClass="SourceCatalog",
56 dimensions=["band", "skymap", "tract", "patch"],
57 )
58 mergedCatalog = cT.Output(
59 doc="Output merged catalog.",
60 name="{outputCoaddName}Coadd_ref",
61 storageClass="SourceCatalog",
62 dimensions=["skymap", "tract", "patch"],
63 )
66class MergeMeasurementsConfig(PipelineTaskConfig, pipelineConnections=MergeMeasurementsConnections):
67 """!
68 @anchor MergeMeasurementsConfig_
70 @brief Configuration parameters for the MergeMeasurementsTask
71 """
72 pseudoFilterList = pexConfig.ListField(
73 dtype=str,
74 default=["sky"],
75 doc="Names of filters which may have no associated detection\n"
76 "(N.b. should include MergeDetectionsConfig.skyFilterName)"
77 )
78 snName = pexConfig.Field(
79 dtype=str,
80 default="base_PsfFlux",
81 doc="Name of flux measurement for calculating the S/N when choosing the reference band."
82 )
83 minSN = pexConfig.Field(
84 dtype=float,
85 default=10.,
86 doc="If the S/N from the priority band is below this value (and the S/N "
87 "is larger than minSNDiff compared to the priority band), use the band with "
88 "the largest S/N as the reference band."
89 )
90 minSNDiff = pexConfig.Field(
91 dtype=float,
92 default=3.,
93 doc="If the difference in S/N between another band and the priority band is larger "
94 "than this value (and the S/N in the priority band is less than minSN) "
95 "use the band with the largest S/N as the reference band"
96 )
97 flags = pexConfig.ListField(
98 dtype=str,
99 doc="Require that these flags, if available, are not set",
100 default=["base_PixelFlags_flag_interpolatedCenter", "base_PsfFlux_flag",
101 "ext_photometryKron_KronFlux_flag", "modelfit_CModel_flag", ]
102 )
103 priorityList = pexConfig.ListField(
104 dtype=str,
105 default=[],
106 doc="Priority-ordered list of bands for the merge."
107 )
108 coaddName = pexConfig.Field(
109 dtype=str,
110 default="deep",
111 doc="Name of coadd"
112 )
114 def validate(self):
115 super().validate()
116 if len(self.priorityList) == 0:
117 raise RuntimeError("No priority list provided")
120## @addtogroup LSST_task_documentation
121## @{
122## @page MergeMeasurementsTask
123## @ref MergeMeasurementsTask_ "MergeMeasurementsTask"
124## @copybrief MergeMeasurementsTask
125## @}
128class MergeMeasurementsTask(pipeBase.PipelineTask, pipeBase.CmdLineTask):
129 r"""!
130 @anchor MergeMeasurementsTask_
132 @brief Merge measurements from multiple bands
134 @section pipe_tasks_multiBand_Contents Contents
136 - @ref pipe_tasks_multiBand_MergeMeasurementsTask_Purpose
137 - @ref pipe_tasks_multiBand_MergeMeasurementsTask_Initialize
138 - @ref pipe_tasks_multiBand_MergeMeasurementsTask_Run
139 - @ref pipe_tasks_multiBand_MergeMeasurementsTask_Config
140 - @ref pipe_tasks_multiBand_MergeMeasurementsTask_Debug
141 - @ref pipe_tasks_multiband_MergeMeasurementsTask_Example
143 @section pipe_tasks_multiBand_MergeMeasurementsTask_Purpose Description
145 Command-line task that merges measurements from multiple bands.
147 Combines consistent (i.e. with the same peaks and footprints) catalogs of sources from multiple filter
148 bands to construct a unified catalog that is suitable for driving forced photometry. Every source is
149 required to have centroid, shape and flux measurements in each band.
151 @par Inputs:
152 deepCoadd_meas{tract,patch,filter}: SourceCatalog
153 @par Outputs:
154 deepCoadd_ref{tract,patch}: SourceCatalog
155 @par Data Unit:
156 tract, patch
158 MergeMeasurementsTask subclasses @ref CmdLineTask_ "CmdLineTask".
160 @section pipe_tasks_multiBand_MergeMeasurementsTask_Initialize Task initialization
162 @copydoc \_\_init\_\_
164 @section pipe_tasks_multiBand_MergeMeasurementsTask_Run Invoking the Task
166 @copydoc run
168 @section pipe_tasks_multiBand_MergeMeasurementsTask_Config Configuration parameters
170 See @ref MergeMeasurementsConfig_
172 @section pipe_tasks_multiBand_MergeMeasurementsTask_Debug Debug variables
174 The @link lsst.pipe.base.cmdLineTask.CmdLineTask command line task@endlink interface supports a
175 flag @c -d to import @b debug.py from your @c PYTHONPATH; see @ref baseDebug for more about @b debug.py
176 files.
178 MergeMeasurementsTask has no debug variables.
180 @section pipe_tasks_multiband_MergeMeasurementsTask_Example A complete example
181 of using MergeMeasurementsTask
183 MergeMeasurementsTask is meant to be run after deblending & measuring sources in every band.
184 The purpose of the task is to generate a catalog of sources suitable for driving forced photometry in
185 coadds and individual exposures.
186 Command-line usage of MergeMeasurementsTask expects a data reference to the coadds to be processed. A list
187 of the available optional arguments can be obtained by calling mergeCoaddMeasurements.py with the `--help`
188 command line argument:
189 @code
190 mergeCoaddMeasurements.py --help
191 @endcode
193 To demonstrate usage of the DetectCoaddSourcesTask in the larger context of multi-band processing, we
194 will process HSC data in the [ci_hsc](https://github.com/lsst/ci_hsc) package. Assuming one has finished
195 step 7 at @ref pipeTasks_multiBand, one may merge the catalogs generated after deblending and measuring
196 as follows:
197 @code
198 mergeCoaddMeasurements.py $CI_HSC_DIR/DATA --id patch=5,4 tract=0 filter=HSC-I^HSC-R
199 @endcode
200 This will merge the HSC-I & HSC-R band catalogs. The results are written in
201 `$CI_HSC_DIR/DATA/deepCoadd-results/`.
202 """
203 _DefaultName = "mergeCoaddMeasurements"
204 ConfigClass = MergeMeasurementsConfig
205 RunnerClass = MergeSourcesRunner
206 inputDataset = "meas"
207 outputDataset = "ref"
208 getSchemaCatalogs = _makeGetSchemaCatalogs("ref")
210 @classmethod
211 def _makeArgumentParser(cls):
212 return makeMergeArgumentParser(cls._DefaultName, cls.inputDataset)
214 def getInputSchema(self, butler=None, schema=None):
215 return getInputSchema(self, butler, schema)
217 def runQuantum(self, butlerQC, inputRefs, outputRefs):
218 inputs = butlerQC.get(inputRefs)
219 dataIds = (ref.dataId for ref in inputRefs.catalogs)
220 catalogDict = {dataId['band']: cat for dataId, cat in zip(dataIds, inputs['catalogs'])}
221 inputs['catalogs'] = catalogDict
222 outputs = self.run(**inputs)
223 butlerQC.put(outputs, outputRefs)
225 def __init__(self, butler=None, schema=None, initInputs=None, **kwargs):
226 """!
227 Initialize the task.
229 @param[in] schema: the schema of the detection catalogs used as input to this one
230 @param[in] butler: a butler used to read the input schema from disk, if schema is None
232 The task will set its own self.schema attribute to the schema of the output merged catalog.
233 """
234 super().__init__(**kwargs)
236 if initInputs is not None: 236 ↛ 237line 236 didn't jump to line 237, because the condition on line 236 was never true
237 inputSchema = initInputs['inputSchema'].schema
238 else:
239 inputSchema = self.getInputSchema(butler=butler, schema=schema)
240 self.schemaMapper = afwTable.SchemaMapper(inputSchema, True)
241 self.schemaMapper.addMinimalSchema(inputSchema, True)
242 self.instFluxKey = inputSchema.find(self.config.snName + "_instFlux").getKey()
243 self.instFluxErrKey = inputSchema.find(self.config.snName + "_instFluxErr").getKey()
244 self.fluxFlagKey = inputSchema.find(self.config.snName + "_flag").getKey()
246 self.flagKeys = {}
247 for band in self.config.priorityList:
248 short = getShortFilterName(band)
249 outputKey = self.schemaMapper.editOutputSchema().addField(
250 "merge_measurement_%s" % short,
251 type="Flag",
252 doc="Flag field set if the measurements here are from the %s filter" % band
253 )
254 peakKey = inputSchema.find("merge_peak_%s" % short).key
255 footprintKey = inputSchema.find("merge_footprint_%s" % short).key
256 self.flagKeys[band] = pipeBase.Struct(peak=peakKey, footprint=footprintKey, output=outputKey)
257 self.schema = self.schemaMapper.getOutputSchema()
259 self.pseudoFilterKeys = []
260 for filt in self.config.pseudoFilterList:
261 try:
262 self.pseudoFilterKeys.append(self.schema.find("merge_peak_%s" % filt).getKey())
263 except Exception as e:
264 self.log.warn("merge_peak is not set for pseudo-filter %s: %s" % (filt, e))
266 self.badFlags = {}
267 for flag in self.config.flags:
268 try:
269 self.badFlags[flag] = self.schema.find(flag).getKey()
270 except KeyError as exc:
271 self.log.warn("Can't find flag %s in schema: %s" % (flag, exc,))
272 self.outputSchema = afwTable.SourceCatalog(self.schema)
274 def runDataRef(self, patchRefList):
275 """!
276 @brief Merge coadd sources from multiple bands. Calls @ref `run`.
277 @param[in] patchRefList list of data references for each filter
278 """
279 catalogs = dict(readCatalog(self, patchRef) for patchRef in patchRefList)
280 mergedCatalog = self.run(catalogs).mergedCatalog
281 self.write(patchRefList[0], mergedCatalog)
283 def run(self, catalogs):
284 """!
285 Merge measurement catalogs to create a single reference catalog for forced photometry
287 @param[in] catalogs: the catalogs to be merged
289 For parent sources, we choose the first band in config.priorityList for which the
290 merge_footprint flag for that band is is True.
292 For child sources, the logic is the same, except that we use the merge_peak flags.
293 """
294 # Put catalogs, filters in priority order
295 orderedCatalogs = [catalogs[band] for band in self.config.priorityList if band in catalogs.keys()]
296 orderedKeys = [self.flagKeys[band] for band in self.config.priorityList if band in catalogs.keys()]
298 mergedCatalog = afwTable.SourceCatalog(self.schema)
299 mergedCatalog.reserve(len(orderedCatalogs[0]))
301 idKey = orderedCatalogs[0].table.getIdKey()
302 for catalog in orderedCatalogs[1:]: 302 ↛ 303line 302 didn't jump to line 303, because the loop on line 302 never started
303 if numpy.any(orderedCatalogs[0].get(idKey) != catalog.get(idKey)):
304 raise ValueError("Error in inputs to MergeCoaddMeasurements: source IDs do not match")
306 # This first zip iterates over all the catalogs simultaneously, yielding a sequence of one
307 # record for each band, in priority order.
308 for orderedRecords in zip(*orderedCatalogs):
310 maxSNRecord = None
311 maxSNFlagKeys = None
312 maxSN = 0.
313 priorityRecord = None
314 priorityFlagKeys = None
315 prioritySN = 0.
316 hasPseudoFilter = False
318 # Now we iterate over those record-band pairs, keeping track of the priority and the
319 # largest S/N band.
320 for inputRecord, flagKeys in zip(orderedRecords, orderedKeys):
321 parent = (inputRecord.getParent() == 0 and inputRecord.get(flagKeys.footprint))
322 child = (inputRecord.getParent() != 0 and inputRecord.get(flagKeys.peak))
324 if not (parent or child):
325 for pseudoFilterKey in self.pseudoFilterKeys: 325 ↛ 331line 325 didn't jump to line 331, because the loop on line 325 didn't complete
326 if inputRecord.get(pseudoFilterKey): 326 ↛ 325line 326 didn't jump to line 325, because the condition on line 326 was never false
327 hasPseudoFilter = True
328 priorityRecord = inputRecord
329 priorityFlagKeys = flagKeys
330 break
331 if hasPseudoFilter: 331 ↛ 334line 331 didn't jump to line 334, because the condition on line 331 was never false
332 break
334 isBad = any(inputRecord.get(flag) for flag in self.badFlags)
335 if isBad or inputRecord.get(self.fluxFlagKey) or inputRecord.get(self.instFluxErrKey) == 0:
336 sn = 0.
337 else:
338 sn = inputRecord.get(self.instFluxKey)/inputRecord.get(self.instFluxErrKey)
339 if numpy.isnan(sn) or sn < 0.: 339 ↛ 340line 339 didn't jump to line 340, because the condition on line 339 was never true
340 sn = 0.
341 if (parent or child) and priorityRecord is None: 341 ↛ 345line 341 didn't jump to line 345, because the condition on line 341 was never false
342 priorityRecord = inputRecord
343 priorityFlagKeys = flagKeys
344 prioritySN = sn
345 if sn > maxSN:
346 maxSNRecord = inputRecord
347 maxSNFlagKeys = flagKeys
348 maxSN = sn
350 # If the priority band has a low S/N we would like to choose the band with the highest S/N as
351 # the reference band instead. However, we only want to choose the highest S/N band if it is
352 # significantly better than the priority band. Therefore, to choose a band other than the
353 # priority, we require that the priority S/N is below the minimum threshold and that the
354 # difference between the priority and highest S/N is larger than the difference threshold.
355 #
356 # For pseudo code objects we always choose the first band in the priority list.
357 bestRecord = None
358 bestFlagKeys = None
359 if hasPseudoFilter:
360 bestRecord = priorityRecord
361 bestFlagKeys = priorityFlagKeys
362 elif (prioritySN < self.config.minSN and (maxSN - prioritySN) > self.config.minSNDiff 362 ↛ 364line 362 didn't jump to line 364, because the condition on line 362 was never true
363 and maxSNRecord is not None):
364 bestRecord = maxSNRecord
365 bestFlagKeys = maxSNFlagKeys
366 elif priorityRecord is not None: 366 ↛ 370line 366 didn't jump to line 370, because the condition on line 366 was never false
367 bestRecord = priorityRecord
368 bestFlagKeys = priorityFlagKeys
370 if bestRecord is not None and bestFlagKeys is not None: 370 ↛ 375line 370 didn't jump to line 375, because the condition on line 370 was never false
371 outputRecord = mergedCatalog.addNew()
372 outputRecord.assign(bestRecord, self.schemaMapper)
373 outputRecord.set(bestFlagKeys.output, True)
374 else: # if we didn't find any records
375 raise ValueError("Error in inputs to MergeCoaddMeasurements: no valid reference for %s" %
376 inputRecord.getId())
378 # more checking for sane inputs, since zip silently iterates over the smallest sequence
379 for inputCatalog in orderedCatalogs:
380 if len(mergedCatalog) != len(inputCatalog): 380 ↛ 381line 380 didn't jump to line 381, because the condition on line 380 was never true
381 raise ValueError("Mismatch between catalog sizes: %s != %s" %
382 (len(mergedCatalog), len(orderedCatalogs)))
384 return pipeBase.Struct(
385 mergedCatalog=mergedCatalog
386 )
388 def write(self, patchRef, catalog):
389 """!
390 @brief Write the output.
392 @param[in] patchRef data reference for patch
393 @param[in] catalog catalog
395 We write as the dataset provided by the 'outputDataset'
396 class variable.
397 """
398 patchRef.put(catalog, self.config.coaddName + "Coadd_" + self.outputDataset)
399 # since the filter isn't actually part of the data ID for the dataset we're saving,
400 # it's confusing to see it in the log message, even if the butler simply ignores it.
401 mergeDataId = patchRef.dataId.copy()
402 del mergeDataId["filter"]
403 self.log.info("Wrote merged catalog: %s" % (mergeDataId,))
405 def writeMetadata(self, dataRefList):
406 """!
407 @brief No metadata to write, and not sure how to write it for a list of dataRefs.
408 """
409 pass