lsst.pipe.tasks  16.0-65-g12857137+1
mergeMeasurements.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 #
3 # LSST Data Management System
4 # Copyright 2008-2015 AURA/LSST.
5 #
6 # This product includes software developed by the
7 # LSST Project (http://www.lsst.org/).
8 #
9 # This program is free software: you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation, either version 3 of the License, or
12 # (at your option) any later version.
13 #
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
18 #
19 # You should have received a copy of the LSST License Statement and
20 # the GNU General Public License along with this program. If not,
21 # see <https://www.lsstcorp.org/LegalNotices/>.
22 #
23 import numpy
24 
25 from .multiBandUtils import (MergeSourcesRunner, _makeGetSchemaCatalogs, makeMergeArgumentParser,
26  getInputSchema, getShortFilterName, readCatalog)
27 
28 import lsst.afw.table as afwTable
29 import lsst.pex.config as pexConfig
30 import lsst.pipe.base as pipeBase
31 
32 
33 class MergeMeasurementsConfig(pipeBase.PipelineTaskConfig):
34  """!
35  @anchor MergeMeasurementsConfig_
36 
37  @brief Configuration parameters for the MergeMeasurementsTask
38  """
39  # Gen 3 options
40  inputSchema = pipeBase.InitInputDatasetField(
41  doc="Schema for the input measurement catalogs.",
42  name="",
43  nameTemplate="{inputCoaddName}Coadd_meas_schema",
44  storageClass="SourceCatalog",
45  )
46  outputSchema = pipeBase.InitOutputDatasetField(
47  doc="Schema for the output merged measurement catalog.",
48  name="",
49  nameTemplate="{outputCoaddName}Coadd_ref_schema",
50  storageClass="SourceCatalog",
51  )
52  catalogs = pipeBase.InputDatasetField(
53  doc="Input catalogs to merge.",
54  name="",
55  nameTemplate="{inputCoaddName}Coadd_meas",
56  scalar=False,
57  storageClass="SourceCatalog",
58  dimensions=["AbstractFilter", "SkyMap", "Tract", "Patch"],
59  )
60  mergedCatalog = pipeBase.OutputDatasetField(
61  doc="Output merged catalog.",
62  name="",
63  nameTemplate="{outputCoaddName}Coadd_ref",
64  scalar=True,
65  storageClass="SourceCatalog",
66  dimensions=["SkyMap", "Tract", "Patch"],
67  )
68  # Task configuration options
69  pseudoFilterList = pexConfig.ListField(
70  dtype=str,
71  default=["sky"],
72  doc="Names of filters which may have no associated detection\n"
73  "(N.b. should include MergeDetectionsConfig.skyFilterName)"
74  )
75  snName = pexConfig.Field(
76  dtype=str,
77  default="base_PsfFlux",
78  doc="Name of flux measurement for calculating the S/N when choosing the reference band."
79  )
80  minSN = pexConfig.Field(
81  dtype=float,
82  default=10.,
83  doc="If the S/N from the priority band is below this value (and the S/N "
84  "is larger than minSNDiff compared to the priority band), use the band with "
85  "the largest S/N as the reference band."
86  )
87  minSNDiff = pexConfig.Field(
88  dtype=float,
89  default=3.,
90  doc="If the difference in S/N between another band and the priority band is larger "
91  "than this value (and the S/N in the priority band is less than minSN) "
92  "use the band with the largest S/N as the reference band"
93  )
94  flags = pexConfig.ListField(
95  dtype=str,
96  doc="Require that these flags, if available, are not set",
97  default=["base_PixelFlags_flag_interpolatedCenter", "base_PsfFlux_flag",
98  "ext_photometryKron_KronFlux_flag", "modelfit_CModel_flag", ]
99  )
100  priorityList = pexConfig.ListField(
101  dtype=str,
102  default=[],
103  doc="Priority-ordered list of bands for the merge."
104  )
105  coaddName = pexConfig.Field(
106  dtype=str,
107  default="deep",
108  doc="Name of coadd"
109  )
110 
111  def validate(self):
112  super().validate()
113  if len(self.priorityList) == 0:
114  raise RuntimeError("No priority list provided")
115 
116  def setDefaults(self):
117  super().setDefaults()
118  self.formatTemplateNames({"inputCoaddName": "deep",
119  "outputCoaddName": "deep"})
120  self.quantum.dimensions = ("SkyMap", "Tract", "Patch")
121 
122 
128 
129 
130 class MergeMeasurementsTask(pipeBase.PipelineTask, pipeBase.CmdLineTask):
131  r"""!
132  @anchor MergeMeasurementsTask_
133 
134  @brief Merge measurements from multiple bands
135 
136  @section pipe_tasks_multiBand_Contents Contents
137 
138  - @ref pipe_tasks_multiBand_MergeMeasurementsTask_Purpose
139  - @ref pipe_tasks_multiBand_MergeMeasurementsTask_Initialize
140  - @ref pipe_tasks_multiBand_MergeMeasurementsTask_Run
141  - @ref pipe_tasks_multiBand_MergeMeasurementsTask_Config
142  - @ref pipe_tasks_multiBand_MergeMeasurementsTask_Debug
143  - @ref pipe_tasks_multiband_MergeMeasurementsTask_Example
144 
145  @section pipe_tasks_multiBand_MergeMeasurementsTask_Purpose Description
146 
147  Command-line task that merges measurements from multiple bands.
148 
149  Combines consistent (i.e. with the same peaks and footprints) catalogs of sources from multiple filter
150  bands to construct a unified catalog that is suitable for driving forced photometry. Every source is
151  required to have centroid, shape and flux measurements in each band.
152 
153  @par Inputs:
154  deepCoadd_meas{tract,patch,filter}: SourceCatalog
155  @par Outputs:
156  deepCoadd_ref{tract,patch}: SourceCatalog
157  @par Data Unit:
158  tract, patch
159 
160  MergeMeasurementsTask subclasses @ref CmdLineTask_ "CmdLineTask".
161 
162  @section pipe_tasks_multiBand_MergeMeasurementsTask_Initialize Task initialization
163 
164  @copydoc \_\_init\_\_
165 
166  @section pipe_tasks_multiBand_MergeMeasurementsTask_Run Invoking the Task
167 
168  @copydoc run
169 
170  @section pipe_tasks_multiBand_MergeMeasurementsTask_Config Configuration parameters
171 
172  See @ref MergeMeasurementsConfig_
173 
174  @section pipe_tasks_multiBand_MergeMeasurementsTask_Debug Debug variables
175 
176  The @link lsst.pipe.base.cmdLineTask.CmdLineTask command line task@endlink interface supports a
177  flag @c -d to import @b debug.py from your @c PYTHONPATH; see @ref baseDebug for more about @b debug.py
178  files.
179 
180  MergeMeasurementsTask has no debug variables.
181 
182  @section pipe_tasks_multiband_MergeMeasurementsTask_Example A complete example
183  of using MergeMeasurementsTask
184 
185  MergeMeasurementsTask is meant to be run after deblending & measuring sources in every band.
186  The purpose of the task is to generate a catalog of sources suitable for driving forced photometry in
187  coadds and individual exposures.
188  Command-line usage of MergeMeasurementsTask expects a data reference to the coadds to be processed. A list
189  of the available optional arguments can be obtained by calling mergeCoaddMeasurements.py with the `--help`
190  command line argument:
191  @code
192  mergeCoaddMeasurements.py --help
193  @endcode
194 
195  To demonstrate usage of the DetectCoaddSourcesTask in the larger context of multi-band processing, we
196  will process HSC data in the [ci_hsc](https://github.com/lsst/ci_hsc) package. Assuming one has finished
197  step 7 at @ref pipeTasks_multiBand, one may merge the catalogs generated after deblending and measuring
198  as follows:
199  @code
200  mergeCoaddMeasurements.py $CI_HSC_DIR/DATA --id patch=5,4 tract=0 filter=HSC-I^HSC-R
201  @endcode
202  This will merge the HSC-I & HSC-R band catalogs. The results are written in
203  `$CI_HSC_DIR/DATA/deepCoadd-results/`.
204  """
205  _DefaultName = "mergeCoaddMeasurements"
206  ConfigClass = MergeMeasurementsConfig
207  RunnerClass = MergeSourcesRunner
208  inputDataset = "meas"
209  outputDataset = "ref"
210  getSchemaCatalogs = _makeGetSchemaCatalogs("ref")
211 
212  @classmethod
213  def _makeArgumentParser(cls):
215 
216  def getInputSchema(self, butler=None, schema=None):
217  return getInputSchema(self, butler, schema)
218 
220  return {"outputSchema": afwTable.SourceCatalog(self.schema), }
221 
222  def adaptArgsAndRun(self, inputData, inputDataIds, outputDataIds, butler):
223  catalogDict = {dataId['abstract_filter']: cat for dataId, cat in zip(inputDataIds['catalogs'],
224  inputData['catalogs'])}
225  inputData['catalogs'] = catalogDict
226 
227  return super().adaptArgsAndRun(inputData, inputDataIds, outputDataIds, butler)
228 
229  def __init__(self, butler=None, schema=None, initInputs=None, **kwargs):
230  """!
231  Initialize the task.
232 
233  @param[in] schema: the schema of the detection catalogs used as input to this one
234  @param[in] butler: a butler used to read the input schema from disk, if schema is None
235 
236  The task will set its own self.schema attribute to the schema of the output merged catalog.
237  """
238  super().__init__(**kwargs)
239 
240  if initInputs is not None:
241  inputSchema = initInputs['inputSchema'].schema
242  else:
243  inputSchema = self.getInputSchema(butler=butler, schema=schema)
244  self.schemaMapper = afwTable.SchemaMapper(inputSchema, True)
245  self.schemaMapper.addMinimalSchema(inputSchema, True)
246  self.instFluxKey = inputSchema.find(self.config.snName + "_instFlux").getKey()
247  self.instFluxErrKey = inputSchema.find(self.config.snName + "_instFluxErr").getKey()
248  self.fluxFlagKey = inputSchema.find(self.config.snName + "_flag").getKey()
249 
250  self.flagKeys = {}
251  for band in self.config.priorityList:
252  short = getShortFilterName(band)
253  outputKey = self.schemaMapper.editOutputSchema().addField(
254  "merge_measurement_%s" % short,
255  type="Flag",
256  doc="Flag field set if the measurements here are from the %s filter" % band
257  )
258  peakKey = inputSchema.find("merge_peak_%s" % short).key
259  footprintKey = inputSchema.find("merge_footprint_%s" % short).key
260  self.flagKeys[band] = pipeBase.Struct(peak=peakKey, footprint=footprintKey, output=outputKey)
261  self.schema = self.schemaMapper.getOutputSchema()
262 
264  for filt in self.config.pseudoFilterList:
265  try:
266  self.pseudoFilterKeys.append(self.schema.find("merge_peak_%s" % filt).getKey())
267  except Exception as e:
268  self.log.warn("merge_peak is not set for pseudo-filter %s: %s" % (filt, e))
269 
270  self.badFlags = {}
271  for flag in self.config.flags:
272  try:
273  self.badFlags[flag] = self.schema.find(flag).getKey()
274  except KeyError as exc:
275  self.log.warn("Can't find flag %s in schema: %s" % (flag, exc,))
276 
277  def runDataRef(self, patchRefList):
278  """!
279  @brief Merge coadd sources from multiple bands. Calls @ref `run`.
280  @param[in] patchRefList list of data references for each filter
281  """
282  catalogs = dict(readCatalog(self, patchRef) for patchRef in patchRefList)
283  mergedCatalog = self.run(catalogs).mergedCatalog
284  self.write(patchRefList[0], mergedCatalog)
285 
286  def run(self, catalogs):
287  """!
288  Merge measurement catalogs to create a single reference catalog for forced photometry
289 
290  @param[in] catalogs: the catalogs to be merged
291 
292  For parent sources, we choose the first band in config.priorityList for which the
293  merge_footprint flag for that band is is True.
294 
295  For child sources, the logic is the same, except that we use the merge_peak flags.
296  """
297  # Put catalogs, filters in priority order
298  orderedCatalogs = [catalogs[band] for band in self.config.priorityList if band in catalogs.keys()]
299  orderedKeys = [self.flagKeys[band] for band in self.config.priorityList if band in catalogs.keys()]
300 
301  mergedCatalog = afwTable.SourceCatalog(self.schema)
302  mergedCatalog.reserve(len(orderedCatalogs[0]))
303 
304  idKey = orderedCatalogs[0].table.getIdKey()
305  for catalog in orderedCatalogs[1:]:
306  if numpy.any(orderedCatalogs[0].get(idKey) != catalog.get(idKey)):
307  raise ValueError("Error in inputs to MergeCoaddMeasurements: source IDs do not match")
308 
309  # This first zip iterates over all the catalogs simultaneously, yielding a sequence of one
310  # record for each band, in priority order.
311  for orderedRecords in zip(*orderedCatalogs):
312 
313  maxSNRecord = None
314  maxSNFlagKeys = None
315  maxSN = 0.
316  priorityRecord = None
317  priorityFlagKeys = None
318  prioritySN = 0.
319  hasPseudoFilter = False
320 
321  # Now we iterate over those record-band pairs, keeping track of the priority and the
322  # largest S/N band.
323  for inputRecord, flagKeys in zip(orderedRecords, orderedKeys):
324  parent = (inputRecord.getParent() == 0 and inputRecord.get(flagKeys.footprint))
325  child = (inputRecord.getParent() != 0 and inputRecord.get(flagKeys.peak))
326 
327  if not (parent or child):
328  for pseudoFilterKey in self.pseudoFilterKeys:
329  if inputRecord.get(pseudoFilterKey):
330  hasPseudoFilter = True
331  priorityRecord = inputRecord
332  priorityFlagKeys = flagKeys
333  break
334  if hasPseudoFilter:
335  break
336 
337  isBad = any(inputRecord.get(flag) for flag in self.badFlags)
338  if isBad or inputRecord.get(self.fluxFlagKey) or inputRecord.get(self.instFluxErrKey) == 0:
339  sn = 0.
340  else:
341  sn = inputRecord.get(self.instFluxKey)/inputRecord.get(self.instFluxErrKey)
342  if numpy.isnan(sn) or sn < 0.:
343  sn = 0.
344  if (parent or child) and priorityRecord is None:
345  priorityRecord = inputRecord
346  priorityFlagKeys = flagKeys
347  prioritySN = sn
348  if sn > maxSN:
349  maxSNRecord = inputRecord
350  maxSNFlagKeys = flagKeys
351  maxSN = sn
352 
353  # If the priority band has a low S/N we would like to choose the band with the highest S/N as
354  # the reference band instead. However, we only want to choose the highest S/N band if it is
355  # significantly better than the priority band. Therefore, to choose a band other than the
356  # priority, we require that the priority S/N is below the minimum threshold and that the
357  # difference between the priority and highest S/N is larger than the difference threshold.
358  #
359  # For pseudo code objects we always choose the first band in the priority list.
360  bestRecord = None
361  bestFlagKeys = None
362  if hasPseudoFilter:
363  bestRecord = priorityRecord
364  bestFlagKeys = priorityFlagKeys
365  elif (prioritySN < self.config.minSN and (maxSN - prioritySN) > self.config.minSNDiff and
366  maxSNRecord is not None):
367  bestRecord = maxSNRecord
368  bestFlagKeys = maxSNFlagKeys
369  elif priorityRecord is not None:
370  bestRecord = priorityRecord
371  bestFlagKeys = priorityFlagKeys
372 
373  if bestRecord is not None and bestFlagKeys is not None:
374  outputRecord = mergedCatalog.addNew()
375  outputRecord.assign(bestRecord, self.schemaMapper)
376  outputRecord.set(bestFlagKeys.output, True)
377  else: # if we didn't find any records
378  raise ValueError("Error in inputs to MergeCoaddMeasurements: no valid reference for %s" %
379  inputRecord.getId())
380 
381  # more checking for sane inputs, since zip silently iterates over the smallest sequence
382  for inputCatalog in orderedCatalogs:
383  if len(mergedCatalog) != len(inputCatalog):
384  raise ValueError("Mismatch between catalog sizes: %s != %s" %
385  (len(mergedCatalog), len(orderedCatalogs)))
386 
387  return pipeBase.Struct(
388  mergedCatalog=mergedCatalog
389  )
390 
391  def write(self, patchRef, catalog):
392  """!
393  @brief Write the output.
394 
395  @param[in] patchRef data reference for patch
396  @param[in] catalog catalog
397 
398  We write as the dataset provided by the 'outputDataset'
399  class variable.
400  """
401  patchRef.put(catalog, self.config.coaddName + "Coadd_" + self.outputDataset)
402  # since the filter isn't actually part of the data ID for the dataset we're saving,
403  # it's confusing to see it in the log message, even if the butler simply ignores it.
404  mergeDataId = patchRef.dataId.copy()
405  del mergeDataId["filter"]
406  self.log.info("Wrote merged catalog: %s" % (mergeDataId,))
407 
408  def writeMetadata(self, dataRefList):
409  """!
410  @brief No metadata to write, and not sure how to write it for a list of dataRefs.
411  """
412  pass
def runDataRef(self, patchRefList)
Merge coadd sources from multiple bands.
def makeMergeArgumentParser(name, dataset)
Create a suitable ArgumentParser.
def readCatalog(task, patchRef)
Read input catalog.
def run(self, catalogs)
Merge measurement catalogs to create a single reference catalog for forced photometry.
Configuration parameters for the MergeMeasurementsTask.
def write(self, patchRef, catalog)
Write the output.
def adaptArgsAndRun(self, inputData, inputDataIds, outputDataIds, butler)
def __init__(self, butler=None, schema=None, initInputs=None, kwargs)
Initialize the task.
def writeMetadata(self, dataRefList)
No metadata to write, and not sure how to write it for a list of dataRefs.