lsst.pipe.tasks  16.0-37-gdefe6db9
mergeMeasurements.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 #
3 # LSST Data Management System
4 # Copyright 2008-2015 AURA/LSST.
5 #
6 # This product includes software developed by the
7 # LSST Project (http://www.lsst.org/).
8 #
9 # This program is free software: you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation, either version 3 of the License, or
12 # (at your option) any later version.
13 #
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
18 #
19 # You should have received a copy of the LSST License Statement and
20 # the GNU General Public License along with this program. If not,
21 # see <https://www.lsstcorp.org/LegalNotices/>.
22 #
23 import numpy
24 
25 from .multiBandUtils import (MergeSourcesRunner, _makeGetSchemaCatalogs, makeMergeArgumentParser,
26  getInputSchema, getShortFilterName, readCatalog)
27 
28 import lsst.afw.table as afwTable
29 
30 from lsst.pex.config import Config, Field, ListField
31 from lsst.pipe.base import CmdLineTask, Struct
32 
33 
35  """!
36  @anchor MergeMeasurementsConfig_
37 
38  @brief Configuration parameters for the MergeMeasurementsTask
39  """
40  pseudoFilterList = ListField(dtype=str, default=["sky"],
41  doc="Names of filters which may have no associated detection\n"
42  "(N.b. should include MergeDetectionsConfig.skyFilterName)")
43  snName = Field(dtype=str, default="base_PsfFlux",
44  doc="Name of flux measurement for calculating the S/N when choosing the reference band.")
45  minSN = Field(dtype=float, default=10.,
46  doc="If the S/N from the priority band is below this value (and the S/N "
47  "is larger than minSNDiff compared to the priority band), use the band with "
48  "the largest S/N as the reference band.")
49  minSNDiff = Field(dtype=float, default=3.,
50  doc="If the difference in S/N between another band and the priority band is larger "
51  "than this value (and the S/N in the priority band is less than minSN) "
52  "use the band with the largest S/N as the reference band")
53  flags = ListField(dtype=str, doc="Require that these flags, if available, are not set",
54  default=["base_PixelFlags_flag_interpolatedCenter", "base_PsfFlux_flag",
55  "ext_photometryKron_KronFlux_flag", "modelfit_CModel_flag", ])
56  priorityList = ListField(dtype=str, default=[],
57  doc="Priority-ordered list of bands for the merge.")
58  coaddName = Field(dtype=str, default="deep", doc="Name of coadd")
59 
60  def validate(self):
61  Config.validate(self)
62  if len(self.priorityList) == 0:
63  raise RuntimeError("No priority list provided")
64 
65 
66 
72 
73 
74 class MergeMeasurementsTask(CmdLineTask):
75  r"""!
76  @anchor MergeMeasurementsTask_
77 
78  @brief Merge measurements from multiple bands
79 
80  @section pipe_tasks_multiBand_Contents Contents
81 
82  - @ref pipe_tasks_multiBand_MergeMeasurementsTask_Purpose
83  - @ref pipe_tasks_multiBand_MergeMeasurementsTask_Initialize
84  - @ref pipe_tasks_multiBand_MergeMeasurementsTask_Run
85  - @ref pipe_tasks_multiBand_MergeMeasurementsTask_Config
86  - @ref pipe_tasks_multiBand_MergeMeasurementsTask_Debug
87  - @ref pipe_tasks_multiband_MergeMeasurementsTask_Example
88 
89  @section pipe_tasks_multiBand_MergeMeasurementsTask_Purpose Description
90 
91  Command-line task that merges measurements from multiple bands.
92 
93  Combines consistent (i.e. with the same peaks and footprints) catalogs of sources from multiple filter
94  bands to construct a unified catalog that is suitable for driving forced photometry. Every source is
95  required to have centroid, shape and flux measurements in each band.
96 
97  @par Inputs:
98  deepCoadd_meas{tract,patch,filter}: SourceCatalog
99  @par Outputs:
100  deepCoadd_ref{tract,patch}: SourceCatalog
101  @par Data Unit:
102  tract, patch
103 
104  MergeMeasurementsTask subclasses @ref CmdLineTask_ "CmdLineTask".
105 
106  @section pipe_tasks_multiBand_MergeMeasurementsTask_Initialize Task initialization
107 
108  @copydoc \_\_init\_\_
109 
110  @section pipe_tasks_multiBand_MergeMeasurementsTask_Run Invoking the Task
111 
112  @copydoc run
113 
114  @section pipe_tasks_multiBand_MergeMeasurementsTask_Config Configuration parameters
115 
116  See @ref MergeMeasurementsConfig_
117 
118  @section pipe_tasks_multiBand_MergeMeasurementsTask_Debug Debug variables
119 
120  The @link lsst.pipe.base.cmdLineTask.CmdLineTask command line task@endlink interface supports a
121  flag @c -d to import @b debug.py from your @c PYTHONPATH; see @ref baseDebug for more about @b debug.py
122  files.
123 
124  MergeMeasurementsTask has no debug variables.
125 
126  @section pipe_tasks_multiband_MergeMeasurementsTask_Example A complete example
127  of using MergeMeasurementsTask
128 
129  MergeMeasurementsTask is meant to be run after deblending & measuring sources in every band.
130  The purpose of the task is to generate a catalog of sources suitable for driving forced photometry in
131  coadds and individual exposures.
132  Command-line usage of MergeMeasurementsTask expects a data reference to the coadds to be processed. A list
133  of the available optional arguments can be obtained by calling mergeCoaddMeasurements.py with the `--help`
134  command line argument:
135  @code
136  mergeCoaddMeasurements.py --help
137  @endcode
138 
139  To demonstrate usage of the DetectCoaddSourcesTask in the larger context of multi-band processing, we
140  will process HSC data in the [ci_hsc](https://github.com/lsst/ci_hsc) package. Assuming one has finished
141  step 7 at @ref pipeTasks_multiBand, one may merge the catalogs generated after deblending and measuring
142  as follows:
143  @code
144  mergeCoaddMeasurements.py $CI_HSC_DIR/DATA --id patch=5,4 tract=0 filter=HSC-I^HSC-R
145  @endcode
146  This will merge the HSC-I & HSC-R band catalogs. The results are written in
147  `$CI_HSC_DIR/DATA/deepCoadd-results/`.
148  """
149  _DefaultName = "mergeCoaddMeasurements"
150  ConfigClass = MergeMeasurementsConfig
151  RunnerClass = MergeSourcesRunner
152  inputDataset = "meas"
153  outputDataset = "ref"
154  getSchemaCatalogs = _makeGetSchemaCatalogs("ref")
155 
156  @classmethod
157  def _makeArgumentParser(cls):
159 
160  def getInputSchema(self, butler=None, schema=None):
161  return getInputSchema(self, butler, schema)
162 
163  def __init__(self, butler=None, schema=None, **kwargs):
164  """!
165  Initialize the task.
166 
167  @param[in] schema: the schema of the detection catalogs used as input to this one
168  @param[in] butler: a butler used to read the input schema from disk, if schema is None
169 
170  The task will set its own self.schema attribute to the schema of the output merged catalog.
171  """
172  CmdLineTask.__init__(self, **kwargs)
173  inputSchema = self.getInputSchema(butler=butler, schema=schema)
174  self.schemaMapper = afwTable.SchemaMapper(inputSchema, True)
175  self.schemaMapper.addMinimalSchema(inputSchema, True)
176  self.instFluxKey = inputSchema.find(self.config.snName + "_instFlux").getKey()
177  self.instFluxErrKey = inputSchema.find(self.config.snName + "_instFluxErr").getKey()
178  self.fluxFlagKey = inputSchema.find(self.config.snName + "_flag").getKey()
179 
180  self.flagKeys = {}
181  for band in self.config.priorityList:
182  short = getShortFilterName(band)
183  outputKey = self.schemaMapper.editOutputSchema().addField(
184  "merge_measurement_%s" % short,
185  type="Flag",
186  doc="Flag field set if the measurements here are from the %s filter" % band
187  )
188  peakKey = inputSchema.find("merge_peak_%s" % short).key
189  footprintKey = inputSchema.find("merge_footprint_%s" % short).key
190  self.flagKeys[band] = Struct(peak=peakKey, footprint=footprintKey, output=outputKey)
191  self.schema = self.schemaMapper.getOutputSchema()
192 
194  for filt in self.config.pseudoFilterList:
195  try:
196  self.pseudoFilterKeys.append(self.schema.find("merge_peak_%s" % filt).getKey())
197  except Exception as e:
198  self.log.warn("merge_peak is not set for pseudo-filter %s: %s" % (filt, e))
199 
200  self.badFlags = {}
201  for flag in self.config.flags:
202  try:
203  self.badFlags[flag] = self.schema.find(flag).getKey()
204  except KeyError as exc:
205  self.log.warn("Can't find flag %s in schema: %s" % (flag, exc,))
206 
207  def runDataRef(self, patchRefList):
208  """!
209  @brief Merge coadd sources from multiple bands. Calls @ref `run`.
210  @param[in] patchRefList list of data references for each filter
211  """
212  catalogs = dict(readCatalog(self, patchRef) for patchRef in patchRefList)
213  mergedCatalog = self.run(catalogs)
214  self.write(patchRefList[0], mergedCatalog)
215 
216  def run(self, catalogs):
217  """!
218  Merge measurement catalogs to create a single reference catalog for forced photometry
219 
220  @param[in] catalogs: the catalogs to be merged
221 
222  For parent sources, we choose the first band in config.priorityList for which the
223  merge_footprint flag for that band is is True.
224 
225  For child sources, the logic is the same, except that we use the merge_peak flags.
226  """
227  # Put catalogs, filters in priority order
228  orderedCatalogs = [catalogs[band] for band in self.config.priorityList if band in catalogs.keys()]
229  orderedKeys = [self.flagKeys[band] for band in self.config.priorityList if band in catalogs.keys()]
230 
231  mergedCatalog = afwTable.SourceCatalog(self.schema)
232  mergedCatalog.reserve(len(orderedCatalogs[0]))
233 
234  idKey = orderedCatalogs[0].table.getIdKey()
235  for catalog in orderedCatalogs[1:]:
236  if numpy.any(orderedCatalogs[0].get(idKey) != catalog.get(idKey)):
237  raise ValueError("Error in inputs to MergeCoaddMeasurements: source IDs do not match")
238 
239  # This first zip iterates over all the catalogs simultaneously, yielding a sequence of one
240  # record for each band, in priority order.
241  for orderedRecords in zip(*orderedCatalogs):
242 
243  maxSNRecord = None
244  maxSNFlagKeys = None
245  maxSN = 0.
246  priorityRecord = None
247  priorityFlagKeys = None
248  prioritySN = 0.
249  hasPseudoFilter = False
250 
251  # Now we iterate over those record-band pairs, keeping track of the priority and the
252  # largest S/N band.
253  for inputRecord, flagKeys in zip(orderedRecords, orderedKeys):
254  parent = (inputRecord.getParent() == 0 and inputRecord.get(flagKeys.footprint))
255  child = (inputRecord.getParent() != 0 and inputRecord.get(flagKeys.peak))
256 
257  if not (parent or child):
258  for pseudoFilterKey in self.pseudoFilterKeys:
259  if inputRecord.get(pseudoFilterKey):
260  hasPseudoFilter = True
261  priorityRecord = inputRecord
262  priorityFlagKeys = flagKeys
263  break
264  if hasPseudoFilter:
265  break
266 
267  isBad = any(inputRecord.get(flag) for flag in self.badFlags)
268  if isBad or inputRecord.get(self.fluxFlagKey) or inputRecord.get(self.instFluxErrKey) == 0:
269  sn = 0.
270  else:
271  sn = inputRecord.get(self.instFluxKey)/inputRecord.get(self.instFluxErrKey)
272  if numpy.isnan(sn) or sn < 0.:
273  sn = 0.
274  if (parent or child) and priorityRecord is None:
275  priorityRecord = inputRecord
276  priorityFlagKeys = flagKeys
277  prioritySN = sn
278  if sn > maxSN:
279  maxSNRecord = inputRecord
280  maxSNFlagKeys = flagKeys
281  maxSN = sn
282 
283  # If the priority band has a low S/N we would like to choose the band with the highest S/N as
284  # the reference band instead. However, we only want to choose the highest S/N band if it is
285  # significantly better than the priority band. Therefore, to choose a band other than the
286  # priority, we require that the priority S/N is below the minimum threshold and that the
287  # difference between the priority and highest S/N is larger than the difference threshold.
288  #
289  # For pseudo code objects we always choose the first band in the priority list.
290  bestRecord = None
291  bestFlagKeys = None
292  if hasPseudoFilter:
293  bestRecord = priorityRecord
294  bestFlagKeys = priorityFlagKeys
295  elif (prioritySN < self.config.minSN and (maxSN - prioritySN) > self.config.minSNDiff and
296  maxSNRecord is not None):
297  bestRecord = maxSNRecord
298  bestFlagKeys = maxSNFlagKeys
299  elif priorityRecord is not None:
300  bestRecord = priorityRecord
301  bestFlagKeys = priorityFlagKeys
302 
303  if bestRecord is not None and bestFlagKeys is not None:
304  outputRecord = mergedCatalog.addNew()
305  outputRecord.assign(bestRecord, self.schemaMapper)
306  outputRecord.set(bestFlagKeys.output, True)
307  else: # if we didn't find any records
308  raise ValueError("Error in inputs to MergeCoaddMeasurements: no valid reference for %s" %
309  inputRecord.getId())
310 
311  # more checking for sane inputs, since zip silently iterates over the smallest sequence
312  for inputCatalog in orderedCatalogs:
313  if len(mergedCatalog) != len(inputCatalog):
314  raise ValueError("Mismatch between catalog sizes: %s != %s" %
315  (len(mergedCatalog), len(orderedCatalogs)))
316 
317  return mergedCatalog
318 
319  def write(self, patchRef, catalog):
320  """!
321  @brief Write the output.
322 
323  @param[in] patchRef data reference for patch
324  @param[in] catalog catalog
325 
326  We write as the dataset provided by the 'outputDataset'
327  class variable.
328  """
329  patchRef.put(catalog, self.config.coaddName + "Coadd_" + self.outputDataset)
330  # since the filter isn't actually part of the data ID for the dataset we're saving,
331  # it's confusing to see it in the log message, even if the butler simply ignores it.
332  mergeDataId = patchRef.dataId.copy()
333  del mergeDataId["filter"]
334  self.log.info("Wrote merged catalog: %s" % (mergeDataId,))
335 
336  def writeMetadata(self, dataRefList):
337  """!
338  @brief No metadata to write, and not sure how to write it for a list of dataRefs.
339  """
340  pass
def runDataRef(self, patchRefList)
Merge coadd sources from multiple bands.
def makeMergeArgumentParser(name, dataset)
Create a suitable ArgumentParser.
def readCatalog(task, patchRef)
Read input catalog.
Merge measurements from multiple bands.
def run(self, catalogs)
Merge measurement catalogs to create a single reference catalog for forced photometry.
def __init__(self, butler=None, schema=None, kwargs)
Initialize the task.
Configuration parameters for the MergeMeasurementsTask.
def write(self, patchRef, catalog)
Write the output.
def writeMetadata(self, dataRefList)
No metadata to write, and not sure how to write it for a list of dataRefs.