lsst.pipe.tasks  16.0-49-g42e581f7+8
mergeDetections.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 #
3 # LSST Data Management System
4 # Copyright 2008-2015 AURA/LSST.
5 #
6 # This product includes software developed by the
7 # LSST Project (http://www.lsst.org/).
8 #
9 # This program is free software: you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation, either version 3 of the License, or
12 # (at your option) any later version.
13 #
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
18 #
19 # You should have received a copy of the LSST License Statement and
20 # the GNU General Public License along with this program. If not,
21 # see <https://www.lsstcorp.org/LegalNotices/>.
22 #
23 
24 from .multiBandUtils import (CullPeaksConfig, MergeSourcesRunner, _makeMakeIdFactory, makeMergeArgumentParser,
25  getInputSchema, getShortFilterName, readCatalog)
26 
27 
28 import lsst.afw.detection as afwDetect
29 import lsst.afw.image as afwImage
30 import lsst.afw.table as afwTable
31 
32 from lsst.meas.algorithms import SkyObjectsTask
33 from lsst.pex.config import Config, Field, ListField, ConfigurableField, ConfigField
34 from lsst.pipe.base import (CmdLineTask, PipelineTask, PipelineTaskConfig, InitOutputDatasetField,
35  InputDatasetField, InitInputDatasetField, OutputDatasetField, Struct)
36 from lsst.pipe.tasks.coaddBase import getSkyInfo
37 
38 
39 class MergeDetectionsConfig(PipelineTaskConfig):
40  """!
41  @anchor MergeDetectionsConfig_
42 
43  @brief Configuration parameters for the MergeDetectionsTask.
44  """
45  minNewPeak = Field(dtype=float, default=1,
46  doc="Minimum distance from closest peak to create a new one (in arcsec).")
47 
48  maxSamePeak = Field(dtype=float, default=0.3,
49  doc="When adding new catalogs to the merge, all peaks less than this distance "
50  " (in arcsec) to an existing peak will be flagged as detected in that catalog.")
51  cullPeaks = ConfigField(dtype=CullPeaksConfig, doc="Configuration for how to cull peaks.")
52 
53  skyFilterName = Field(dtype=str, default="sky",
54  doc="Name of `filter' used to label sky objects (e.g. flag merge_peak_sky is set)\n"
55  "(N.b. should be in MergeMeasurementsConfig.pseudoFilterList)")
56  skyObjects = ConfigurableField(target=SkyObjectsTask, doc="Generate sky objects")
57  priorityList = ListField(dtype=str, default=[],
58  doc="Priority-ordered list of bands for the merge.")
59  coaddName = Field(dtype=str, default="deep", doc="Name of coadd")
60 
61  schema = InitInputDatasetField(
62  doc="Schema of the input detection catalog",
63  name="",
64  nameTemplate="{inputCoaddName}Coadd_det_schema",
65  storageClass="SourceCatalog"
66  )
67 
68  outputSchema = InitOutputDatasetField(
69  doc="Schema of the merged detection catalog",
70  nameTemplate="{outputCoaddName}Coadd_mergeDet_schema",
71  storageClass="SourceCatalog"
72  )
73 
74  catalogs = InputDatasetField(
75  doc="Detection Catalogs to be merged",
76  nameTemplate="{inputCoaddName}Coadd_det",
77  storageClass="SourceCatalog",
78  dimensions=("Tract", "Patch", "SkyMap", "AbstractFilter")
79  )
80 
81  skyMap = InputDatasetField(
82  doc="SkyMap to be used in merging",
83  nameTemplate="{inputCoaddName}Coadd_skyMap",
84  storageClass="SkyMap",
85  dimensions=("SkyMap",),
86  scalar=True
87  )
88 
89  outputCatalog = OutputDatasetField(
90  doc="Merged Detection catalog",
91  nameTemplate="{outputCoaddName}Coadd_mergeDet",
92  storageClass="SourceCatalog",
93  dimensions=("Tract", "Patch", "SkyMap"),
94  scalar=True
95  )
96 
97  def setDefaults(self):
98  Config.setDefaults(self)
99  self.formatTemplateNames({"inputCoaddName": 'deep', "outputCoaddName": "deep"})
100  self.skyObjects.avoidMask = ["DETECTED"] # Nothing else is available in our custom mask
101  self.quantum.dimensions = ("Tract", "Patch", "SkyMap")
102 
103  def validate(self):
104  super().validate()
105  if len(self.priorityList) == 0:
106  raise RuntimeError("No priority list provided")
107 
108 
109 class MergeDetectionsTask(PipelineTask, CmdLineTask):
110  r"""!
111  @anchor MergeDetectionsTask_
112 
113  @brief Merge coadd detections from multiple bands.
114 
115  @section pipe_tasks_multiBand_Contents Contents
116 
117  - @ref pipe_tasks_multiBand_MergeDetectionsTask_Purpose
118  - @ref pipe_tasks_multiBand_MergeDetectionsTask_Init
119  - @ref pipe_tasks_multiBand_MergeDetectionsTask_Run
120  - @ref pipe_tasks_multiBand_MergeDetectionsTask_Config
121  - @ref pipe_tasks_multiBand_MergeDetectionsTask_Debug
122  - @ref pipe_tasks_multiband_MergeDetectionsTask_Example
123 
124  @section pipe_tasks_multiBand_MergeDetectionsTask_Purpose Description
125 
126  Command-line task that merges sources detected in coadds of exposures obtained with different filters.
127 
128  To perform photometry consistently across coadds in multiple filter bands, we create a master catalog of
129  sources from all bands by merging the sources (peaks & footprints) detected in each coadd, while keeping
130  track of which band each source originates in.
131 
132  The catalog merge is performed by @ref getMergedSourceCatalog. Spurious peaks detected around bright
133  objects are culled as described in @ref CullPeaksConfig_.
134 
135  @par Inputs:
136  deepCoadd_det{tract,patch,filter}: SourceCatalog (only parent Footprints)
137  @par Outputs:
138  deepCoadd_mergeDet{tract,patch}: SourceCatalog (only parent Footprints)
139  @par Data Unit:
140  tract, patch
141 
142  @section pipe_tasks_multiBand_MergeDetectionsTask_Init Task initialisation
143 
144  @copydoc \_\_init\_\_
145 
146  @section pipe_tasks_multiBand_MergeDetectionsTask_Run Invoking the Task
147 
148  @copydoc run
149 
150  @section pipe_tasks_multiBand_MergeDetectionsTask_Config Configuration parameters
151 
152  See @ref MergeDetectionsConfig_
153 
154  @section pipe_tasks_multiBand_MergeDetectionsTask_Debug Debug variables
155 
156  The @link lsst.pipe.base.cmdLineTask.CmdLineTask command line task@endlink interface supports a flag @c -d
157  to import @b debug.py from your @c PYTHONPATH; see @ref baseDebug for more about @b debug.py files.
158 
159  MergeDetectionsTask has no debug variables.
160 
161  @section pipe_tasks_multiband_MergeDetectionsTask_Example A complete example of using MergeDetectionsTask
162 
163  MergeDetectionsTask is meant to be run after detecting sources in coadds generated for the chosen subset
164  of the available bands.
165  The purpose of the task is to merge sources (peaks & footprints) detected in the coadds generated from the
166  chosen subset of filters.
167  Subsequent tasks in the multi-band processing procedure will deblend the generated master list of sources
168  and, eventually, perform forced photometry.
169  Command-line usage of MergeDetectionsTask expects data references for all the coadds to be processed.
170  A list of the available optional arguments can be obtained by calling mergeCoaddDetections.py with the
171  `--help` command line argument:
172  @code
173  mergeCoaddDetections.py --help
174  @endcode
175 
176  To demonstrate usage of the DetectCoaddSourcesTask in the larger context of multi-band processing, we
177  will process HSC data in the [ci_hsc](https://github.com/lsst/ci_hsc) package. Assuming one has finished
178  step 5 at @ref pipeTasks_multiBand, one may merge the catalogs of sources from each coadd as follows:
179  @code
180  mergeCoaddDetections.py $CI_HSC_DIR/DATA --id patch=5,4 tract=0 filter=HSC-I^HSC-R
181  @endcode
182  This will merge the HSC-I & -R band parent source catalogs and write the results to
183  `$CI_HSC_DIR/DATA/deepCoadd-results/merged/0/5,4/mergeDet-0-5,4.fits`.
184 
185  The next step in the multi-band processing procedure is
186  @ref MeasureMergedCoaddSourcesTask_ "MeasureMergedCoaddSourcesTask"
187  """
188  ConfigClass = MergeDetectionsConfig
189  RunnerClass = MergeSourcesRunner
190  _DefaultName = "mergeCoaddDetections"
191  inputDataset = "det"
192  outputDataset = "mergeDet"
193  makeIdFactory = _makeMakeIdFactory("MergedCoaddId")
194 
195  @classmethod
196  def _makeArgumentParser(cls):
198 
199  def getInputSchema(self, butler=None, schema=None):
200  return getInputSchema(self, butler, schema)
201 
202  def __init__(self, butler=None, schema=None, initInputs=None, **kwargs):
203  # Make PipelineTask-only wording less transitional after cmdlineTask is removed
204  """!
205  @brief Initialize the merge detections task.
206 
207  A @ref FootprintMergeList_ "FootprintMergeList" will be used to
208  merge the source catalogs.
209 
210  @param[in] schema the schema of the detection catalogs used as input to this one
211  @param[in] butler a butler used to read the input schema from disk, if schema is None
212  @param[in] initInputs This a PipelineTask-only argument that holds all inputs passed in
213  through the PipelineTask middleware
214  @param[in] **kwargs keyword arguments to be passed to CmdLineTask.__init__
215 
216  The task will set its own self.schema attribute to the schema of the output merged catalog.
217  """
218  super().__init__(**kwargs)
219  if initInputs is not None:
220  schema = initInputs['schema'].schema
221 
222  self.makeSubtask("skyObjects")
223  self.schema = self.getInputSchema(butler=butler, schema=schema)
224 
225  filterNames = [getShortFilterName(name) for name in self.config.priorityList]
226  filterNames += [self.config.skyFilterName]
227  self.merged = afwDetect.FootprintMergeList(self.schema, filterNames)
228 
230  return {"outputSchema": afwTable.SourceCatalog(self.schema)}
231 
232  def runDataRef(self, patchRefList):
233  catalogs = dict(readCatalog(self, patchRef) for patchRef in patchRefList)
234  skyInfo = getSkyInfo(coaddName=self.config.coaddName, patchRef=patchRefList[0])
235  idFactory = self.makeIdFactory(patchRefList[0])
236  skySeed = patchRefList[0].get(self.config.coaddName + "MergedCoaddId")
237  mergeCatalogStruct = self.run(catalogs, skyInfo, idFactory, skySeed)
238  self.write(patchRefList[0], mergeCatalogStruct.outputCatalog)
239 
240  def adaptArgsAndRun(self, inputData, inputDataIds, outputDataIds, butler):
241  # FINDME: DM-15843 needs to come back and address this function with final solution
242  inputData["skySeed"] = 0
243  inputData["idFactory"] = afwTable.IdFactory.makeSimple()
244  catalogDict = {dataId['abstract_filter']: cat for dataId, cat in zip(inputDataIds['catalogs'],
245  inputData['catalogs'])}
246  inputData['catalogs'] = catalogDict
247  skyMap = inputData.pop('skyMap')
248  # Can use the first dataId to find the tract and patch being worked on
249  tractNumber = inputDataIds['catalogs'][0]['tract']
250  tractInfo = skyMap[tractNumber]
251  patchInfo = tractInfo.getPatchInfo(inputDataIds['catalogs'][0]['patch'])
252  skyInfo = Struct(
253  skyMap=skyMap,
254  tractInfo=tractInfo,
255  patchInfo=patchInfo,
256  wcs=tractInfo.getWcs(),
257  bbox=patchInfo.getOuterBBox()
258  )
259  inputData['skyInfo'] = skyInfo
260  return self.run(**inputData)
261 
262  def run(self, catalogs, skyInfo, idFactory, skySeed):
263  r"""!
264  @brief Merge multiple catalogs.
265 
266  After ordering the catalogs and filters in priority order,
267  @ref getMergedSourceCatalog of the @ref FootprintMergeList_ "FootprintMergeList" created by
268  @ref \_\_init\_\_ is used to perform the actual merging. Finally, @ref cullPeaks is used to remove
269  garbage peaks detected around bright objects.
270 
271  @param[in] catalogs
272  @param[in] patchRef
273  @param[out] mergedList
274  """
275 
276  # Convert distance to tract coordinate
277  tractWcs = skyInfo.wcs
278  peakDistance = self.config.minNewPeak / tractWcs.getPixelScale().asArcseconds()
279  samePeakDistance = self.config.maxSamePeak / tractWcs.getPixelScale().asArcseconds()
280 
281  # Put catalogs, filters in priority order
282  orderedCatalogs = [catalogs[band] for band in self.config.priorityList if band in catalogs.keys()]
283  orderedBands = [getShortFilterName(band) for band in self.config.priorityList
284  if band in catalogs.keys()]
285 
286  mergedList = self.merged.getMergedSourceCatalog(orderedCatalogs, orderedBands, peakDistance,
287  self.schema, idFactory,
288  samePeakDistance)
289 
290  #
291  # Add extra sources that correspond to blank sky
292  #
293  skySourceFootprints = self.getSkySourceFootprints(mergedList, skyInfo, skySeed)
294  if skySourceFootprints:
295  key = mergedList.schema.find("merge_footprint_%s" % self.config.skyFilterName).key
296  for foot in skySourceFootprints:
297  s = mergedList.addNew()
298  s.setFootprint(foot)
299  s.set(key, True)
300 
301  # Sort Peaks from brightest to faintest
302  for record in mergedList:
303  record.getFootprint().sortPeaks()
304  self.log.info("Merged to %d sources" % len(mergedList))
305  # Attempt to remove garbage peaks
306  self.cullPeaks(mergedList)
307  return Struct(outputCatalog=mergedList)
308 
309  def cullPeaks(self, catalog):
310  """!
311  @brief Attempt to remove garbage peaks (mostly on the outskirts of large blends).
312 
313  @param[in] catalog Source catalog
314  """
315  keys = [item.key for item in self.merged.getPeakSchema().extract("merge_peak_*").values()]
316  assert len(keys) > 0, "Error finding flags that associate peaks with their detection bands."
317  totalPeaks = 0
318  culledPeaks = 0
319  for parentSource in catalog:
320  # Make a list copy so we can clear the attached PeakCatalog and append the ones we're keeping
321  # to it (which is easier than deleting as we iterate).
322  keptPeaks = parentSource.getFootprint().getPeaks()
323  oldPeaks = list(keptPeaks)
324  keptPeaks.clear()
325  familySize = len(oldPeaks)
326  totalPeaks += familySize
327  for rank, peak in enumerate(oldPeaks):
328  if ((rank < self.config.cullPeaks.rankSufficient) or
329  (sum([peak.get(k) for k in keys]) >= self.config.cullPeaks.nBandsSufficient) or
330  (rank < self.config.cullPeaks.rankConsidered and
331  rank < self.config.cullPeaks.rankNormalizedConsidered * familySize)):
332  keptPeaks.append(peak)
333  else:
334  culledPeaks += 1
335  self.log.info("Culled %d of %d peaks" % (culledPeaks, totalPeaks))
336 
337  def getSchemaCatalogs(self):
338  """!
339  Return a dict of empty catalogs for each catalog dataset produced by this task.
340 
341  @param[out] dictionary of empty catalogs
342  """
343  mergeDet = afwTable.SourceCatalog(self.schema)
344  peak = afwDetect.PeakCatalog(self.merged.getPeakSchema())
345  return {self.config.coaddName + "Coadd_mergeDet": mergeDet,
346  self.config.coaddName + "Coadd_peak": peak}
347 
348  def getSkySourceFootprints(self, mergedList, skyInfo, seed):
349  """!
350  @brief Return a list of Footprints of sky objects which don't overlap with anything in mergedList
351 
352  @param mergedList The merged Footprints from all the input bands
353  @param skyInfo A description of the patch
354  @param seed Seed for the random number generator
355  """
356  mask = afwImage.Mask(skyInfo.patchInfo.getOuterBBox())
357  detected = mask.getPlaneBitMask("DETECTED")
358  for s in mergedList:
359  s.getFootprint().spans.setMask(mask, detected)
360 
361  footprints = self.skyObjects.run(mask, seed)
362  if not footprints:
363  return footprints
364 
365  # Need to convert the peak catalog's schema so we can set the "merge_peak_<skyFilterName>" flags
366  schema = self.merged.getPeakSchema()
367  mergeKey = schema.find("merge_peak_%s" % self.config.skyFilterName).key
368  converted = []
369  for oldFoot in footprints:
370  assert len(oldFoot.getPeaks()) == 1, "Should be a single peak only"
371  peak = oldFoot.getPeaks()[0]
372  newFoot = afwDetect.Footprint(oldFoot.spans, schema)
373  newFoot.addPeak(peak.getFx(), peak.getFy(), peak.getPeakValue())
374  newFoot.getPeaks()[0].set(mergeKey, True)
375  converted.append(newFoot)
376 
377  return converted
378 
379  def write(self, patchRef, catalog):
380  """!
381  @brief Write the output.
382 
383  @param[in] patchRef data reference for patch
384  @param[in] catalog catalog
385 
386  We write as the dataset provided by the 'outputDataset'
387  class variable.
388  """
389  patchRef.put(catalog, self.config.coaddName + "Coadd_" + self.outputDataset)
390  # since the filter isn't actually part of the data ID for the dataset we're saving,
391  # it's confusing to see it in the log message, even if the butler simply ignores it.
392  mergeDataId = patchRef.dataId.copy()
393  del mergeDataId["filter"]
394  self.log.info("Wrote merged catalog: %s" % (mergeDataId,))
395 
396  def writeMetadata(self, dataRefList):
397  """!
398  @brief No metadata to write, and not sure how to write it for a list of dataRefs.
399  """
400  pass
def getSchemaCatalogs(self)
Return a dict of empty catalogs for each catalog dataset produced by this task.
def makeMergeArgumentParser(name, dataset)
Create a suitable ArgumentParser.
Merge coadd detections from multiple bands.
def readCatalog(task, patchRef)
Read input catalog.
def adaptArgsAndRun(self, inputData, inputDataIds, outputDataIds, butler)
def __init__(self, butler=None, schema=None, initInputs=None, kwargs)
Initialize the merge detections task.
def run(self, catalogs, skyInfo, idFactory, skySeed)
Merge multiple catalogs.
Configuration parameters for the MergeDetectionsTask.
def getSkyInfo(coaddName, patchRef)
Return the SkyMap, tract and patch information, wcs, and outer bbox of the patch to be coadded...
Definition: coaddBase.py:231
def cullPeaks(self, catalog)
Attempt to remove garbage peaks (mostly on the outskirts of large blends).
def write(self, patchRef, catalog)
Write the output.
def getInputSchema(self, butler=None, schema=None)
def writeMetadata(self, dataRefList)
No metadata to write, and not sure how to write it for a list of dataRefs.
def getSkySourceFootprints(self, mergedList, skyInfo, seed)
Return a list of Footprints of sky objects which don&#39;t overlap with anything in mergedList.