lsst.pipe.tasks  21.0.0-20-g55224fe4+1b10d9d6c3
mergeDetections.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 #
3 # LSST Data Management System
4 # Copyright 2008-2015 AURA/LSST.
5 #
6 # This product includes software developed by the
7 # LSST Project (http://www.lsst.org/).
8 #
9 # This program is free software: you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation, either version 3 of the License, or
12 # (at your option) any later version.
13 #
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
18 #
19 # You should have received a copy of the LSST License Statement and
20 # the GNU General Public License along with this program. If not,
21 # see <https://www.lsstcorp.org/LegalNotices/>.
22 #
23 
24 from .multiBandUtils import (CullPeaksConfig, MergeSourcesRunner, _makeMakeIdFactory, makeMergeArgumentParser,
25  getInputSchema, getShortFilterName, readCatalog)
26 
27 
28 import lsst.afw.detection as afwDetect
29 import lsst.afw.image as afwImage
30 import lsst.afw.table as afwTable
31 
32 from lsst.meas.algorithms import SkyObjectsTask
33 from lsst.skymap import BaseSkyMap
34 from lsst.pex.config import Config, Field, ListField, ConfigurableField, ConfigField
35 from lsst.pipe.base import (CmdLineTask, PipelineTask, PipelineTaskConfig, Struct,
36  PipelineTaskConnections)
38 from lsst.pipe.tasks.coaddBase import getSkyInfo
39 
40 
41 class MergeDetectionsConnections(PipelineTaskConnections,
42  dimensions=("tract", "patch", "skymap"),
43  defaultTemplates={"inputCoaddName": 'deep', "outputCoaddName": "deep"}):
44  schema = cT.InitInput(
45  doc="Schema of the input detection catalog",
46  name="{inputCoaddName}Coadd_det_schema",
47  storageClass="SourceCatalog"
48  )
49 
50  outputSchema = cT.InitOutput(
51  doc="Schema of the merged detection catalog",
52  name="{outputCoaddName}Coadd_mergeDet_schema",
53  storageClass="SourceCatalog"
54  )
55 
56  outputPeakSchema = cT.InitOutput(
57  doc="Output schema of the Footprint peak catalog",
58  name="{outputCoaddName}Coadd_peak_schema",
59  storageClass="PeakCatalog"
60  )
61 
62  catalogs = cT.Input(
63  doc="Detection Catalogs to be merged",
64  name="{inputCoaddName}Coadd_det",
65  storageClass="SourceCatalog",
66  dimensions=("tract", "patch", "skymap", "band"),
67  multiple=True
68  )
69 
70  skyMap = cT.Input(
71  doc="SkyMap to be used in merging",
72  name=BaseSkyMap.SKYMAP_DATASET_TYPE_NAME,
73  storageClass="SkyMap",
74  dimensions=("skymap",),
75  )
76 
77  outputCatalog = cT.Output(
78  doc="Merged Detection catalog",
79  name="{outputCoaddName}Coadd_mergeDet",
80  storageClass="SourceCatalog",
81  dimensions=("tract", "patch", "skymap"),
82  )
83 
84 
85 class MergeDetectionsConfig(PipelineTaskConfig, pipelineConnections=MergeDetectionsConnections):
86  """!
87  @anchor MergeDetectionsConfig_
88 
89  @brief Configuration parameters for the MergeDetectionsTask.
90  """
91  minNewPeak = Field(dtype=float, default=1,
92  doc="Minimum distance from closest peak to create a new one (in arcsec).")
93 
94  maxSamePeak = Field(dtype=float, default=0.3,
95  doc="When adding new catalogs to the merge, all peaks less than this distance "
96  " (in arcsec) to an existing peak will be flagged as detected in that catalog.")
97  cullPeaks = ConfigField(dtype=CullPeaksConfig, doc="Configuration for how to cull peaks.")
98 
99  skyFilterName = Field(dtype=str, default="sky",
100  doc="Name of `filter' used to label sky objects (e.g. flag merge_peak_sky is set)\n"
101  "(N.b. should be in MergeMeasurementsConfig.pseudoFilterList)")
102  skyObjects = ConfigurableField(target=SkyObjectsTask, doc="Generate sky objects")
103  priorityList = ListField(dtype=str, default=[],
104  doc="Priority-ordered list of bands for the merge.")
105  coaddName = Field(dtype=str, default="deep", doc="Name of coadd")
106 
107  def setDefaults(self):
108  Config.setDefaults(self)
109  self.skyObjects.avoidMask = ["DETECTED"] # Nothing else is available in our custom mask
110 
111  def validate(self):
112  super().validate()
113  if len(self.priorityList) == 0:
114  raise RuntimeError("No priority list provided")
115 
116 
117 class MergeDetectionsTask(PipelineTask, CmdLineTask):
118  r"""!
119  @anchor MergeDetectionsTask_
120 
121  @brief Merge coadd detections from multiple bands.
122 
123  @section pipe_tasks_multiBand_Contents Contents
124 
125  - @ref pipe_tasks_multiBand_MergeDetectionsTask_Purpose
126  - @ref pipe_tasks_multiBand_MergeDetectionsTask_Init
127  - @ref pipe_tasks_multiBand_MergeDetectionsTask_Run
128  - @ref pipe_tasks_multiBand_MergeDetectionsTask_Config
129  - @ref pipe_tasks_multiBand_MergeDetectionsTask_Debug
130  - @ref pipe_tasks_multiband_MergeDetectionsTask_Example
131 
132  @section pipe_tasks_multiBand_MergeDetectionsTask_Purpose Description
133 
134  Command-line task that merges sources detected in coadds of exposures obtained with different filters.
135 
136  To perform photometry consistently across coadds in multiple filter bands, we create a master catalog of
137  sources from all bands by merging the sources (peaks & footprints) detected in each coadd, while keeping
138  track of which band each source originates in.
139 
140  The catalog merge is performed by @ref getMergedSourceCatalog. Spurious peaks detected around bright
141  objects are culled as described in @ref CullPeaksConfig_.
142 
143  @par Inputs:
144  deepCoadd_det{tract,patch,filter}: SourceCatalog (only parent Footprints)
145  @par Outputs:
146  deepCoadd_mergeDet{tract,patch}: SourceCatalog (only parent Footprints)
147  @par Data Unit:
148  tract, patch
149 
150  @section pipe_tasks_multiBand_MergeDetectionsTask_Init Task initialisation
151 
152  @copydoc \_\_init\_\_
153 
154  @section pipe_tasks_multiBand_MergeDetectionsTask_Run Invoking the Task
155 
156  @copydoc run
157 
158  @section pipe_tasks_multiBand_MergeDetectionsTask_Config Configuration parameters
159 
160  See @ref MergeDetectionsConfig_
161 
162  @section pipe_tasks_multiBand_MergeDetectionsTask_Debug Debug variables
163 
164  The @link lsst.pipe.base.cmdLineTask.CmdLineTask command line task@endlink interface supports a flag @c -d
165  to import @b debug.py from your @c PYTHONPATH; see @ref baseDebug for more about @b debug.py files.
166 
167  MergeDetectionsTask has no debug variables.
168 
169  @section pipe_tasks_multiband_MergeDetectionsTask_Example A complete example of using MergeDetectionsTask
170 
171  MergeDetectionsTask is meant to be run after detecting sources in coadds generated for the chosen subset
172  of the available bands.
173  The purpose of the task is to merge sources (peaks & footprints) detected in the coadds generated from the
174  chosen subset of filters.
175  Subsequent tasks in the multi-band processing procedure will deblend the generated master list of sources
176  and, eventually, perform forced photometry.
177  Command-line usage of MergeDetectionsTask expects data references for all the coadds to be processed.
178  A list of the available optional arguments can be obtained by calling mergeCoaddDetections.py with the
179  `--help` command line argument:
180  @code
181  mergeCoaddDetections.py --help
182  @endcode
183 
184  To demonstrate usage of the DetectCoaddSourcesTask in the larger context of multi-band processing, we
185  will process HSC data in the [ci_hsc](https://github.com/lsst/ci_hsc) package. Assuming one has finished
186  step 5 at @ref pipeTasks_multiBand, one may merge the catalogs of sources from each coadd as follows:
187  @code
188  mergeCoaddDetections.py $CI_HSC_DIR/DATA --id patch=5,4 tract=0 filter=HSC-I^HSC-R
189  @endcode
190  This will merge the HSC-I & -R band parent source catalogs and write the results to
191  `$CI_HSC_DIR/DATA/deepCoadd-results/merged/0/5,4/mergeDet-0-5,4.fits`.
192 
193  The next step in the multi-band processing procedure is
194  @ref MeasureMergedCoaddSourcesTask_ "MeasureMergedCoaddSourcesTask"
195  """
196  ConfigClass = MergeDetectionsConfig
197  RunnerClass = MergeSourcesRunner
198  _DefaultName = "mergeCoaddDetections"
199  inputDataset = "det"
200  outputDataset = "mergeDet"
201  makeIdFactory = _makeMakeIdFactory("MergedCoaddId")
202 
203  @classmethod
204  def _makeArgumentParser(cls):
205  return makeMergeArgumentParser(cls._DefaultName, cls.inputDataset)
206 
207  def getInputSchema(self, butler=None, schema=None):
208  return getInputSchema(self, butler, schema)
209 
210  def __init__(self, butler=None, schema=None, initInputs=None, **kwargs):
211  # Make PipelineTask-only wording less transitional after cmdlineTask is removed
212  """!
213  @brief Initialize the merge detections task.
214 
215  A @ref FootprintMergeList_ "FootprintMergeList" will be used to
216  merge the source catalogs.
217 
218  @param[in] schema the schema of the detection catalogs used as input to this one
219  @param[in] butler a butler used to read the input schema from disk, if schema is None
220  @param[in] initInputs This a PipelineTask-only argument that holds all inputs passed in
221  through the PipelineTask middleware
222  @param[in] **kwargs keyword arguments to be passed to CmdLineTask.__init__
223 
224  The task will set its own self.schema attribute to the schema of the output merged catalog.
225  """
226  super().__init__(**kwargs)
227  if initInputs is not None:
228  schema = initInputs['schema'].schema
229 
230  self.makeSubtask("skyObjects")
231  self.schema = self.getInputSchema(butler=butler, schema=schema)
232 
233  filterNames = [getShortFilterName(name) for name in self.config.priorityList]
234  filterNames += [self.config.skyFilterName]
235  self.merged = afwDetect.FootprintMergeList(self.schema, filterNames)
236  self.outputSchema = afwTable.SourceCatalog(self.schema)
237  self.outputPeakSchema = afwDetect.PeakCatalog(self.merged.getPeakSchema())
238 
239  def runDataRef(self, patchRefList):
240  catalogs = dict(readCatalog(self, patchRef) for patchRef in patchRefList)
241  skyInfo = getSkyInfo(coaddName=self.config.coaddName, patchRef=patchRefList[0])
242  idFactory = self.makeIdFactory(patchRefList[0])
243  skySeed = patchRefList[0].get(self.config.coaddName + "MergedCoaddId")
244  mergeCatalogStruct = self.run(catalogs, skyInfo, idFactory, skySeed)
245  self.write(patchRefList[0], mergeCatalogStruct.outputCatalog)
246 
247  def runQuantum(self, butlerQC, inputRefs, outputRefs):
248  inputs = butlerQC.get(inputRefs)
249  packedId, maxBits = butlerQC.quantum.dataId.pack("tract_patch", returnMaxBits=True)
250  inputs["skySeed"] = packedId
251  inputs["idFactory"] = afwTable.IdFactory.makeSource(packedId, 64 - maxBits)
252  catalogDict = {ref.dataId['band']: cat for ref, cat in zip(inputRefs.catalogs,
253  inputs['catalogs'])}
254  inputs['catalogs'] = catalogDict
255  skyMap = inputs.pop('skyMap')
256  # Can use the first dataId to find the tract and patch being worked on
257  tractNumber = inputRefs.catalogs[0].dataId['tract']
258  tractInfo = skyMap[tractNumber]
259  patchInfo = tractInfo.getPatchInfo(inputRefs.catalogs[0].dataId['patch'])
260  skyInfo = Struct(
261  skyMap=skyMap,
262  tractInfo=tractInfo,
263  patchInfo=patchInfo,
264  wcs=tractInfo.getWcs(),
265  bbox=patchInfo.getOuterBBox()
266  )
267  inputs['skyInfo'] = skyInfo
268 
269  outputs = self.run(**inputs)
270  butlerQC.put(outputs, outputRefs)
271 
272  def run(self, catalogs, skyInfo, idFactory, skySeed):
273  r"""!
274  @brief Merge multiple catalogs.
275 
276  After ordering the catalogs and filters in priority order,
277  @ref getMergedSourceCatalog of the @ref FootprintMergeList_ "FootprintMergeList" created by
278  @ref \_\_init\_\_ is used to perform the actual merging. Finally, @ref cullPeaks is used to remove
279  garbage peaks detected around bright objects.
280 
281  @param[in] catalogs
282  @param[in] patchRef
283  @param[out] mergedList
284  """
285 
286  # Convert distance to tract coordinate
287  tractWcs = skyInfo.wcs
288  peakDistance = self.config.minNewPeak / tractWcs.getPixelScale().asArcseconds()
289  samePeakDistance = self.config.maxSamePeak / tractWcs.getPixelScale().asArcseconds()
290 
291  # Put catalogs, filters in priority order
292  orderedCatalogs = [catalogs[band] for band in self.config.priorityList if band in catalogs.keys()]
293  orderedBands = [getShortFilterName(band) for band in self.config.priorityList
294  if band in catalogs.keys()]
295 
296  mergedList = self.merged.getMergedSourceCatalog(orderedCatalogs, orderedBands, peakDistance,
297  self.schema, idFactory,
298  samePeakDistance)
299 
300  #
301  # Add extra sources that correspond to blank sky
302  #
303  skySourceFootprints = self.getSkySourceFootprints(mergedList, skyInfo, skySeed)
304  if skySourceFootprints:
305  key = mergedList.schema.find("merge_footprint_%s" % self.config.skyFilterName).key
306  for foot in skySourceFootprints:
307  s = mergedList.addNew()
308  s.setFootprint(foot)
309  s.set(key, True)
310 
311  # Sort Peaks from brightest to faintest
312  for record in mergedList:
313  record.getFootprint().sortPeaks()
314  self.log.info("Merged to %d sources" % len(mergedList))
315  # Attempt to remove garbage peaks
316  self.cullPeaks(mergedList)
317  return Struct(outputCatalog=mergedList)
318 
319  def cullPeaks(self, catalog):
320  """!
321  @brief Attempt to remove garbage peaks (mostly on the outskirts of large blends).
322 
323  @param[in] catalog Source catalog
324  """
325  keys = [item.key for item in self.merged.getPeakSchema().extract("merge_peak_*").values()]
326  assert len(keys) > 0, "Error finding flags that associate peaks with their detection bands."
327  totalPeaks = 0
328  culledPeaks = 0
329  for parentSource in catalog:
330  # Make a list copy so we can clear the attached PeakCatalog and append the ones we're keeping
331  # to it (which is easier than deleting as we iterate).
332  keptPeaks = parentSource.getFootprint().getPeaks()
333  oldPeaks = list(keptPeaks)
334  keptPeaks.clear()
335  familySize = len(oldPeaks)
336  totalPeaks += familySize
337  for rank, peak in enumerate(oldPeaks):
338  if ((rank < self.config.cullPeaks.rankSufficient)
339  or (sum([peak.get(k) for k in keys]) >= self.config.cullPeaks.nBandsSufficient)
340  or (rank < self.config.cullPeaks.rankConsidered
341  and rank < self.config.cullPeaks.rankNormalizedConsidered * familySize)):
342  keptPeaks.append(peak)
343  else:
344  culledPeaks += 1
345  self.log.info("Culled %d of %d peaks" % (culledPeaks, totalPeaks))
346 
347  def getSchemaCatalogs(self):
348  """!
349  Return a dict of empty catalogs for each catalog dataset produced by this task.
350 
351  @param[out] dictionary of empty catalogs
352  """
353  mergeDet = afwTable.SourceCatalog(self.schema)
354  peak = afwDetect.PeakCatalog(self.merged.getPeakSchema())
355  return {self.config.coaddName + "Coadd_mergeDet": mergeDet,
356  self.config.coaddName + "Coadd_peak": peak}
357 
358  def getSkySourceFootprints(self, mergedList, skyInfo, seed):
359  """!
360  @brief Return a list of Footprints of sky objects which don't overlap with anything in mergedList
361 
362  @param mergedList The merged Footprints from all the input bands
363  @param skyInfo A description of the patch
364  @param seed Seed for the random number generator
365  """
366  mask = afwImage.Mask(skyInfo.patchInfo.getOuterBBox())
367  detected = mask.getPlaneBitMask("DETECTED")
368  for s in mergedList:
369  s.getFootprint().spans.setMask(mask, detected)
370 
371  footprints = self.skyObjects.run(mask, seed)
372  if not footprints:
373  return footprints
374 
375  # Need to convert the peak catalog's schema so we can set the "merge_peak_<skyFilterName>" flags
376  schema = self.merged.getPeakSchema()
377  mergeKey = schema.find("merge_peak_%s" % self.config.skyFilterName).key
378  converted = []
379  for oldFoot in footprints:
380  assert len(oldFoot.getPeaks()) == 1, "Should be a single peak only"
381  peak = oldFoot.getPeaks()[0]
382  newFoot = afwDetect.Footprint(oldFoot.spans, schema)
383  newFoot.addPeak(peak.getFx(), peak.getFy(), peak.getPeakValue())
384  newFoot.getPeaks()[0].set(mergeKey, True)
385  converted.append(newFoot)
386 
387  return converted
388 
389  def write(self, patchRef, catalog):
390  """!
391  @brief Write the output.
392 
393  @param[in] patchRef data reference for patch
394  @param[in] catalog catalog
395 
396  We write as the dataset provided by the 'outputDataset'
397  class variable.
398  """
399  patchRef.put(catalog, self.config.coaddName + "Coadd_" + self.outputDataset)
400  # since the filter isn't actually part of the data ID for the dataset we're saving,
401  # it's confusing to see it in the log message, even if the butler simply ignores it.
402  mergeDataId = patchRef.dataId.copy()
403  del mergeDataId["filter"]
404  self.log.info("Wrote merged catalog: %s" % (mergeDataId,))
405 
406  def writeMetadata(self, dataRefList):
407  """!
408  @brief No metadata to write, and not sure how to write it for a list of dataRefs.
409  """
410  pass
lsst::afw::image
lsst.pipe.tasks.coaddBase.getSkyInfo
def getSkyInfo(coaddName, patchRef)
Return the SkyMap, tract and patch information, wcs, and outer bbox of the patch to be coadded.
Definition: coaddBase.py:261
lsst.pipe.tasks.multiBandUtils.makeMergeArgumentParser
def makeMergeArgumentParser(name, dataset)
Create a suitable ArgumentParser.
Definition: multiBandUtils.py:112
lsst.pipe.tasks.mergeDetections.write
def write(self, patchRef, catalog)
Write the output.
Definition: mergeDetections.py:389
lsst.pipe.tasks.multiBandUtils.getInputSchema
def getInputSchema(task, butler=None, schema=None)
Obtain the input schema either directly or froma butler reference.
Definition: multiBandUtils.py:127
lsst.pipe.tasks.assembleCoadd.run
def run(self, skyInfo, tempExpRefList, imageScalerList, weightList, altMaskList=None, mask=None, supplementaryData=None)
Definition: assembleCoadd.py:721
lsst.pipe.tasks.multiBandUtils.getShortFilterName
def getShortFilterName(name)
Definition: multiBandUtils.py:142
lsst::afw::table
lsst::pex::config
lsst.pipe.tasks.multiBandUtils.readCatalog
def readCatalog(task, patchRef)
Read input catalog.
Definition: multiBandUtils.py:152
lsst::afw::detection
lsst.pipe.tasks.coaddBase
Definition: coaddBase.py:1
lsst::skymap
lsst.pipe.tasks.mergeDetections.MergeDetectionsConnections
Definition: mergeDetections.py:43
lsst.pipe::base
lsst::meas::algorithms
lsst.pipe::base::connectionTypes
lsst.pipe.tasks.mergeDetections.writeMetadata
def writeMetadata(self, dataRefList)
No metadata to write, and not sure how to write it for a list of dataRefs.
Definition: mergeDetections.py:406