lsst.pipe.tasks  16.0-29-gdc8abbdf+2
mergeDetections.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 #
3 # LSST Data Management System
4 # Copyright 2008-2015 AURA/LSST.
5 #
6 # This product includes software developed by the
7 # LSST Project (http://www.lsst.org/).
8 #
9 # This program is free software: you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation, either version 3 of the License, or
12 # (at your option) any later version.
13 #
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
18 #
19 # You should have received a copy of the LSST License Statement and
20 # the GNU General Public License along with this program. If not,
21 # see <https://www.lsstcorp.org/LegalNotices/>.
22 #
23 
24 from .multiBandUtils import (CullPeaksConfig, MergeSourcesRunner, _makeMakeIdFactory, makeMergeArgumentParser,
25  getInputSchema, getShortFilterName, readCatalog)
26 
27 
28 import lsst.afw.detection as afwDetect
29 import lsst.afw.image as afwImage
30 import lsst.afw.table as afwTable
31 
32 from lsst.meas.algorithms import SkyObjectsTask
33 from lsst.pex.config import Config, Field, ListField, ConfigurableField, ConfigField
34 from lsst.pipe.base import CmdLineTask
35 from lsst.pipe.tasks.coaddBase import getSkyInfo
36 
37 
38 class MergeDetectionsConfig(Config):
39  """!
40  @anchor MergeDetectionsConfig_
41 
42  @brief Configuration parameters for the MergeDetectionsTask.
43  """
44  minNewPeak = Field(dtype=float, default=1,
45  doc="Minimum distance from closest peak to create a new one (in arcsec).")
46 
47  maxSamePeak = Field(dtype=float, default=0.3,
48  doc="When adding new catalogs to the merge, all peaks less than this distance "
49  " (in arcsec) to an existing peak will be flagged as detected in that catalog.")
50  cullPeaks = ConfigField(dtype=CullPeaksConfig, doc="Configuration for how to cull peaks.")
51 
52  skyFilterName = Field(dtype=str, default="sky",
53  doc="Name of `filter' used to label sky objects (e.g. flag merge_peak_sky is set)\n"
54  "(N.b. should be in MergeMeasurementsConfig.pseudoFilterList)")
55  skyObjects = ConfigurableField(target=SkyObjectsTask, doc="Generate sky objects")
56  priorityList = ListField(dtype=str, default=[],
57  doc="Priority-ordered list of bands for the merge.")
58  coaddName = Field(dtype=str, default="deep", doc="Name of coadd")
59 
60  def setDefaults(self):
61  Config.setDefaults(self)
62  self.skyObjects.avoidMask = ["DETECTED"] # Nothing else is available in our custom mask
63 
64  def validate(self):
65  Config.validate(self)
66  if len(self.priorityList) == 0:
67  raise RuntimeError("No priority list provided")
68 
69 
70 class MergeDetectionsTask(CmdLineTask):
71  r"""!
72  @anchor MergeDetectionsTask_
73 
74  @brief Merge coadd detections from multiple bands.
75 
76  @section pipe_tasks_multiBand_Contents Contents
77 
78  - @ref pipe_tasks_multiBand_MergeDetectionsTask_Purpose
79  - @ref pipe_tasks_multiBand_MergeDetectionsTask_Init
80  - @ref pipe_tasks_multiBand_MergeDetectionsTask_Run
81  - @ref pipe_tasks_multiBand_MergeDetectionsTask_Config
82  - @ref pipe_tasks_multiBand_MergeDetectionsTask_Debug
83  - @ref pipe_tasks_multiband_MergeDetectionsTask_Example
84 
85  @section pipe_tasks_multiBand_MergeDetectionsTask_Purpose Description
86 
87  Command-line task that merges sources detected in coadds of exposures obtained with different filters.
88 
89  To perform photometry consistently across coadds in multiple filter bands, we create a master catalog of
90  sources from all bands by merging the sources (peaks & footprints) detected in each coadd, while keeping
91  track of which band each source originates in.
92 
93  The catalog merge is performed by @ref getMergedSourceCatalog. Spurious peaks detected around bright
94  objects are culled as described in @ref CullPeaksConfig_.
95 
96  @par Inputs:
97  deepCoadd_det{tract,patch,filter}: SourceCatalog (only parent Footprints)
98  @par Outputs:
99  deepCoadd_mergeDet{tract,patch}: SourceCatalog (only parent Footprints)
100  @par Data Unit:
101  tract, patch
102 
103  @section pipe_tasks_multiBand_MergeDetectionsTask_Init Task initialisation
104 
105  @copydoc \_\_init\_\_
106 
107  @section pipe_tasks_multiBand_MergeDetectionsTask_Run Invoking the Task
108 
109  @copydoc run
110 
111  @section pipe_tasks_multiBand_MergeDetectionsTask_Config Configuration parameters
112 
113  See @ref MergeDetectionsConfig_
114 
115  @section pipe_tasks_multiBand_MergeDetectionsTask_Debug Debug variables
116 
117  The @link lsst.pipe.base.cmdLineTask.CmdLineTask command line task@endlink interface supports a flag @c -d
118  to import @b debug.py from your @c PYTHONPATH; see @ref baseDebug for more about @b debug.py files.
119 
120  MergeDetectionsTask has no debug variables.
121 
122  @section pipe_tasks_multiband_MergeDetectionsTask_Example A complete example of using MergeDetectionsTask
123 
124  MergeDetectionsTask is meant to be run after detecting sources in coadds generated for the chosen subset
125  of the available bands.
126  The purpose of the task is to merge sources (peaks & footprints) detected in the coadds generated from the
127  chosen subset of filters.
128  Subsequent tasks in the multi-band processing procedure will deblend the generated master list of sources
129  and, eventually, perform forced photometry.
130  Command-line usage of MergeDetectionsTask expects data references for all the coadds to be processed.
131  A list of the available optional arguments can be obtained by calling mergeCoaddDetections.py with the
132  `--help` command line argument:
133  @code
134  mergeCoaddDetections.py --help
135  @endcode
136 
137  To demonstrate usage of the DetectCoaddSourcesTask in the larger context of multi-band processing, we
138  will process HSC data in the [ci_hsc](https://github.com/lsst/ci_hsc) package. Assuming one has finished
139  step 5 at @ref pipeTasks_multiBand, one may merge the catalogs of sources from each coadd as follows:
140  @code
141  mergeCoaddDetections.py $CI_HSC_DIR/DATA --id patch=5,4 tract=0 filter=HSC-I^HSC-R
142  @endcode
143  This will merge the HSC-I & -R band parent source catalogs and write the results to
144  `$CI_HSC_DIR/DATA/deepCoadd-results/merged/0/5,4/mergeDet-0-5,4.fits`.
145 
146  The next step in the multi-band processing procedure is
147  @ref MeasureMergedCoaddSourcesTask_ "MeasureMergedCoaddSourcesTask"
148  """
149  ConfigClass = MergeDetectionsConfig
150  RunnerClass = MergeSourcesRunner
151  _DefaultName = "mergeCoaddDetections"
152  inputDataset = "det"
153  outputDataset = "mergeDet"
154  makeIdFactory = _makeMakeIdFactory("MergedCoaddId")
155 
156  @classmethod
157  def _makeArgumentParser(cls):
159 
160  def getInputSchema(self, butler=None, schema=None):
161  return getInputSchema(self, butler, schema)
162 
163  def __init__(self, butler=None, schema=None, **kwargs):
164  """!
165  @brief Initialize the merge detections task.
166 
167  A @ref FootprintMergeList_ "FootprintMergeList" will be used to
168  merge the source catalogs.
169 
170  @param[in] schema the schema of the detection catalogs used as input to this one
171  @param[in] butler a butler used to read the input schema from disk, if schema is None
172  @param[in] **kwargs keyword arguments to be passed to CmdLineTask.__init__
173 
174  The task will set its own self.schema attribute to the schema of the output merged catalog.
175  """
176  CmdLineTask.__init__(self, **kwargs)
177  self.makeSubtask("skyObjects")
178  self.schema = self.getInputSchema(butler=butler, schema=schema)
179 
180  filterNames = [getShortFilterName(name) for name in self.config.priorityList]
181  filterNames += [self.config.skyFilterName]
182  self.merged = afwDetect.FootprintMergeList(self.schema, filterNames)
183 
184  def runDataRef(self, patchRefList):
185  catalogs = dict(readCatalog(self, patchRef) for patchRef in patchRefList)
186  mergeCatalog = self.run(catalogs, patchRefList[0])
187  self.write(patchRefList[0], mergeCatalog)
188 
189  def run(self, catalogs, patchRef):
190  r"""!
191  @brief Merge multiple catalogs.
192 
193  After ordering the catalogs and filters in priority order,
194  @ref getMergedSourceCatalog of the @ref FootprintMergeList_ "FootprintMergeList" created by
195  @ref \_\_init\_\_ is used to perform the actual merging. Finally, @ref cullPeaks is used to remove
196  garbage peaks detected around bright objects.
197 
198  @param[in] catalogs
199  @param[in] patchRef
200  @param[out] mergedList
201  """
202 
203  # Convert distance to tract coordinate
204  skyInfo = getSkyInfo(coaddName=self.config.coaddName, patchRef=patchRef)
205  tractWcs = skyInfo.wcs
206  peakDistance = self.config.minNewPeak / tractWcs.getPixelScale().asArcseconds()
207  samePeakDistance = self.config.maxSamePeak / tractWcs.getPixelScale().asArcseconds()
208 
209  # Put catalogs, filters in priority order
210  orderedCatalogs = [catalogs[band] for band in self.config.priorityList if band in catalogs.keys()]
211  orderedBands = [getShortFilterName(band) for band in self.config.priorityList
212  if band in catalogs.keys()]
213 
214  mergedList = self.merged.getMergedSourceCatalog(orderedCatalogs, orderedBands, peakDistance,
215  self.schema, self.makeIdFactory(patchRef),
216  samePeakDistance)
217 
218  #
219  # Add extra sources that correspond to blank sky
220  #
221  skySeed = patchRef.get(self.config.coaddName + "MergedCoaddId")
222  skySourceFootprints = self.getSkySourceFootprints(mergedList, skyInfo, skySeed)
223  if skySourceFootprints:
224  key = mergedList.schema.find("merge_footprint_%s" % self.config.skyFilterName).key
225  for foot in skySourceFootprints:
226  s = mergedList.addNew()
227  s.setFootprint(foot)
228  s.set(key, True)
229 
230  # Sort Peaks from brightest to faintest
231  for record in mergedList:
232  record.getFootprint().sortPeaks()
233  self.log.info("Merged to %d sources" % len(mergedList))
234  # Attempt to remove garbage peaks
235  self.cullPeaks(mergedList)
236  return mergedList
237 
238  def cullPeaks(self, catalog):
239  """!
240  @brief Attempt to remove garbage peaks (mostly on the outskirts of large blends).
241 
242  @param[in] catalog Source catalog
243  """
244  keys = [item.key for item in self.merged.getPeakSchema().extract("merge_peak_*").values()]
245  assert len(keys) > 0, "Error finding flags that associate peaks with their detection bands."
246  totalPeaks = 0
247  culledPeaks = 0
248  for parentSource in catalog:
249  # Make a list copy so we can clear the attached PeakCatalog and append the ones we're keeping
250  # to it (which is easier than deleting as we iterate).
251  keptPeaks = parentSource.getFootprint().getPeaks()
252  oldPeaks = list(keptPeaks)
253  keptPeaks.clear()
254  familySize = len(oldPeaks)
255  totalPeaks += familySize
256  for rank, peak in enumerate(oldPeaks):
257  if ((rank < self.config.cullPeaks.rankSufficient) or
258  (sum([peak.get(k) for k in keys]) >= self.config.cullPeaks.nBandsSufficient) or
259  (rank < self.config.cullPeaks.rankConsidered and
260  rank < self.config.cullPeaks.rankNormalizedConsidered * familySize)):
261  keptPeaks.append(peak)
262  else:
263  culledPeaks += 1
264  self.log.info("Culled %d of %d peaks" % (culledPeaks, totalPeaks))
265 
266  def getSchemaCatalogs(self):
267  """!
268  Return a dict of empty catalogs for each catalog dataset produced by this task.
269 
270  @param[out] dictionary of empty catalogs
271  """
272  mergeDet = afwTable.SourceCatalog(self.schema)
273  peak = afwDetect.PeakCatalog(self.merged.getPeakSchema())
274  return {self.config.coaddName + "Coadd_mergeDet": mergeDet,
275  self.config.coaddName + "Coadd_peak": peak}
276 
277  def getSkySourceFootprints(self, mergedList, skyInfo, seed):
278  """!
279  @brief Return a list of Footprints of sky objects which don't overlap with anything in mergedList
280 
281  @param mergedList The merged Footprints from all the input bands
282  @param skyInfo A description of the patch
283  @param seed Seed for the random number generator
284  """
285  mask = afwImage.Mask(skyInfo.patchInfo.getOuterBBox())
286  detected = mask.getPlaneBitMask("DETECTED")
287  for s in mergedList:
288  s.getFootprint().spans.setMask(mask, detected)
289 
290  footprints = self.skyObjects.run(mask, seed)
291  if not footprints:
292  return footprints
293 
294  # Need to convert the peak catalog's schema so we can set the "merge_peak_<skyFilterName>" flags
295  schema = self.merged.getPeakSchema()
296  mergeKey = schema.find("merge_peak_%s" % self.config.skyFilterName).key
297  converted = []
298  for oldFoot in footprints:
299  assert len(oldFoot.getPeaks()) == 1, "Should be a single peak only"
300  peak = oldFoot.getPeaks()[0]
301  newFoot = afwDetect.Footprint(oldFoot.spans, schema)
302  newFoot.addPeak(peak.getFx(), peak.getFy(), peak.getPeakValue())
303  newFoot.getPeaks()[0].set(mergeKey, True)
304  converted.append(newFoot)
305 
306  return converted
307 
308  def write(self, patchRef, catalog):
309  """!
310  @brief Write the output.
311 
312  @param[in] patchRef data reference for patch
313  @param[in] catalog catalog
314 
315  We write as the dataset provided by the 'outputDataset'
316  class variable.
317  """
318  patchRef.put(catalog, self.config.coaddName + "Coadd_" + self.outputDataset)
319  # since the filter isn't actually part of the data ID for the dataset we're saving,
320  # it's confusing to see it in the log message, even if the butler simply ignores it.
321  mergeDataId = patchRef.dataId.copy()
322  del mergeDataId["filter"]
323  self.log.info("Wrote merged catalog: %s" % (mergeDataId,))
324 
325  def writeMetadata(self, dataRefList):
326  """!
327  @brief No metadata to write, and not sure how to write it for a list of dataRefs.
328  """
329  pass
def getSchemaCatalogs(self)
Return a dict of empty catalogs for each catalog dataset produced by this task.
def makeMergeArgumentParser(name, dataset)
Create a suitable ArgumentParser.
Merge coadd detections from multiple bands.
def readCatalog(task, patchRef)
Read input catalog.
def run(self, catalogs, patchRef)
Merge multiple catalogs.
def __init__(self, butler=None, schema=None, kwargs)
Initialize the merge detections task.
Configuration parameters for the MergeDetectionsTask.
def getSkyInfo(coaddName, patchRef)
Return the SkyMap, tract and patch information, wcs, and outer bbox of the patch to be coadded...
Definition: coaddBase.py:253
def cullPeaks(self, catalog)
Attempt to remove garbage peaks (mostly on the outskirts of large blends).
def write(self, patchRef, catalog)
Write the output.
def getInputSchema(self, butler=None, schema=None)
def writeMetadata(self, dataRefList)
No metadata to write, and not sure how to write it for a list of dataRefs.
def getSkySourceFootprints(self, mergedList, skyInfo, seed)
Return a list of Footprints of sky objects which don&#39;t overlap with anything in mergedList.