lsst.pipe.tasks  21.0.0-46-g880d6fab+d14224996b
mergeDetections.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 #
3 # LSST Data Management System
4 # Copyright 2008-2015 AURA/LSST.
5 #
6 # This product includes software developed by the
7 # LSST Project (http://www.lsst.org/).
8 #
9 # This program is free software: you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation, either version 3 of the License, or
12 # (at your option) any later version.
13 #
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
18 #
19 # You should have received a copy of the LSST License Statement and
20 # the GNU General Public License along with this program. If not,
21 # see <https://www.lsstcorp.org/LegalNotices/>.
22 #
23 
24 from .multiBandUtils import (CullPeaksConfig, MergeSourcesRunner, _makeMakeIdFactory, makeMergeArgumentParser,
25  getInputSchema, readCatalog)
26 
27 
28 import lsst.afw.detection as afwDetect
29 import lsst.afw.image as afwImage
30 import lsst.afw.table as afwTable
31 
32 from lsst.meas.algorithms import SkyObjectsTask
33 from lsst.skymap import BaseSkyMap
34 from lsst.pex.config import Config, Field, ListField, ConfigurableField, ConfigField
35 from lsst.pipe.base import (CmdLineTask, PipelineTask, PipelineTaskConfig, Struct,
36  PipelineTaskConnections)
38 from lsst.pipe.tasks.coaddBase import getSkyInfo
39 
40 
41 class MergeDetectionsConnections(PipelineTaskConnections,
42  dimensions=("tract", "patch", "skymap"),
43  defaultTemplates={"inputCoaddName": 'deep', "outputCoaddName": "deep"}):
44  schema = cT.InitInput(
45  doc="Schema of the input detection catalog",
46  name="{inputCoaddName}Coadd_det_schema",
47  storageClass="SourceCatalog"
48  )
49 
50  outputSchema = cT.InitOutput(
51  doc="Schema of the merged detection catalog",
52  name="{outputCoaddName}Coadd_mergeDet_schema",
53  storageClass="SourceCatalog"
54  )
55 
56  outputPeakSchema = cT.InitOutput(
57  doc="Output schema of the Footprint peak catalog",
58  name="{outputCoaddName}Coadd_peak_schema",
59  storageClass="PeakCatalog"
60  )
61 
62  catalogs = cT.Input(
63  doc="Detection Catalogs to be merged",
64  name="{inputCoaddName}Coadd_det",
65  storageClass="SourceCatalog",
66  dimensions=("tract", "patch", "skymap", "band"),
67  multiple=True
68  )
69 
70  skyMap = cT.Input(
71  doc="SkyMap to be used in merging",
72  name=BaseSkyMap.SKYMAP_DATASET_TYPE_NAME,
73  storageClass="SkyMap",
74  dimensions=("skymap",),
75  )
76 
77  outputCatalog = cT.Output(
78  doc="Merged Detection catalog",
79  name="{outputCoaddName}Coadd_mergeDet",
80  storageClass="SourceCatalog",
81  dimensions=("tract", "patch", "skymap"),
82  )
83 
84 
85 class MergeDetectionsConfig(PipelineTaskConfig, pipelineConnections=MergeDetectionsConnections):
86  """!
87  @anchor MergeDetectionsConfig_
88 
89  @brief Configuration parameters for the MergeDetectionsTask.
90  """
91  minNewPeak = Field(dtype=float, default=1,
92  doc="Minimum distance from closest peak to create a new one (in arcsec).")
93 
94  maxSamePeak = Field(dtype=float, default=0.3,
95  doc="When adding new catalogs to the merge, all peaks less than this distance "
96  " (in arcsec) to an existing peak will be flagged as detected in that catalog.")
97  cullPeaks = ConfigField(dtype=CullPeaksConfig, doc="Configuration for how to cull peaks.")
98 
99  skyFilterName = Field(dtype=str, default="sky",
100  doc="Name of `filter' used to label sky objects (e.g. flag merge_peak_sky is set)\n"
101  "(N.b. should be in MergeMeasurementsConfig.pseudoFilterList)")
102  skyObjects = ConfigurableField(target=SkyObjectsTask, doc="Generate sky objects")
103  priorityList = ListField(dtype=str, default=[],
104  doc="Priority-ordered list of filter bands for the merge.")
105  coaddName = Field(dtype=str, default="deep", doc="Name of coadd")
106 
107  def setDefaults(self):
108  Config.setDefaults(self)
109  self.skyObjects.avoidMask = ["DETECTED"] # Nothing else is available in our custom mask
110 
111  def validate(self):
112  super().validate()
113  if len(self.priorityList) == 0:
114  raise RuntimeError("No priority list provided")
115 
116 
117 class MergeDetectionsTask(PipelineTask, CmdLineTask):
118  r"""!
119  @anchor MergeDetectionsTask_
120 
121  @brief Merge coadd detections from multiple bands.
122 
123  @section pipe_tasks_multiBand_Contents Contents
124 
125  - @ref pipe_tasks_multiBand_MergeDetectionsTask_Purpose
126  - @ref pipe_tasks_multiBand_MergeDetectionsTask_Init
127  - @ref pipe_tasks_multiBand_MergeDetectionsTask_Run
128  - @ref pipe_tasks_multiBand_MergeDetectionsTask_Config
129  - @ref pipe_tasks_multiBand_MergeDetectionsTask_Debug
130  - @ref pipe_tasks_multiband_MergeDetectionsTask_Example
131 
132  @section pipe_tasks_multiBand_MergeDetectionsTask_Purpose Description
133 
134  Command-line task that merges sources detected in coadds of exposures obtained with different filters.
135 
136  To perform photometry consistently across coadds in multiple filter bands, we create a master catalog of
137  sources from all bands by merging the sources (peaks & footprints) detected in each coadd, while keeping
138  track of which band each source originates in.
139 
140  The catalog merge is performed by @ref getMergedSourceCatalog. Spurious peaks detected around bright
141  objects are culled as described in @ref CullPeaksConfig_.
142 
143  @par Inputs:
144  deepCoadd_det{tract,patch,filter}: SourceCatalog (only parent Footprints)
145  @par Outputs:
146  deepCoadd_mergeDet{tract,patch}: SourceCatalog (only parent Footprints)
147  @par Data Unit:
148  tract, patch
149 
150  @section pipe_tasks_multiBand_MergeDetectionsTask_Init Task initialisation
151 
152  @copydoc \_\_init\_\_
153 
154  @section pipe_tasks_multiBand_MergeDetectionsTask_Run Invoking the Task
155 
156  @copydoc run
157 
158  @section pipe_tasks_multiBand_MergeDetectionsTask_Config Configuration parameters
159 
160  See @ref MergeDetectionsConfig_
161 
162  @section pipe_tasks_multiBand_MergeDetectionsTask_Debug Debug variables
163 
164  The @link lsst.pipe.base.cmdLineTask.CmdLineTask command line task@endlink interface supports a flag @c -d
165  to import @b debug.py from your @c PYTHONPATH; see @ref baseDebug for more about @b debug.py files.
166 
167  MergeDetectionsTask has no debug variables.
168 
169  @section pipe_tasks_multiband_MergeDetectionsTask_Example A complete example of using MergeDetectionsTask
170 
171  MergeDetectionsTask is meant to be run after detecting sources in coadds generated for the chosen subset
172  of the available bands.
173  The purpose of the task is to merge sources (peaks & footprints) detected in the coadds generated from the
174  chosen subset of filters.
175  Subsequent tasks in the multi-band processing procedure will deblend the generated master list of sources
176  and, eventually, perform forced photometry.
177  Command-line usage of MergeDetectionsTask expects data references for all the coadds to be processed.
178  A list of the available optional arguments can be obtained by calling mergeCoaddDetections.py with the
179  `--help` command line argument:
180  @code
181  mergeCoaddDetections.py --help
182  @endcode
183 
184  To demonstrate usage of the DetectCoaddSourcesTask in the larger context of multi-band processing, we
185  will process HSC data in the [ci_hsc](https://github.com/lsst/ci_hsc) package. Assuming one has finished
186  step 5 at @ref pipeTasks_multiBand, one may merge the catalogs of sources from each coadd as follows:
187  @code
188  mergeCoaddDetections.py $CI_HSC_DIR/DATA --id patch=5,4 tract=0 filter=HSC-I^HSC-R
189  @endcode
190  This will merge the HSC-I & -R band parent source catalogs and write the results to
191  `$CI_HSC_DIR/DATA/deepCoadd-results/merged/0/5,4/mergeDet-0-5,4.fits`.
192 
193  The next step in the multi-band processing procedure is
194  @ref MeasureMergedCoaddSourcesTask_ "MeasureMergedCoaddSourcesTask"
195  """
196  ConfigClass = MergeDetectionsConfig
197  RunnerClass = MergeSourcesRunner
198  _DefaultName = "mergeCoaddDetections"
199  inputDataset = "det"
200  outputDataset = "mergeDet"
201  makeIdFactory = _makeMakeIdFactory("MergedCoaddId")
202 
203  @classmethod
204  def _makeArgumentParser(cls):
205  return makeMergeArgumentParser(cls._DefaultName, cls.inputDataset)
206 
207  def getInputSchema(self, butler=None, schema=None):
208  return getInputSchema(self, butler, schema)
209 
210  def __init__(self, butler=None, schema=None, initInputs=None, **kwargs):
211  # Make PipelineTask-only wording less transitional after cmdlineTask is removed
212  """!
213  @brief Initialize the merge detections task.
214 
215  A @ref FootprintMergeList_ "FootprintMergeList" will be used to
216  merge the source catalogs.
217 
218  @param[in] schema the schema of the detection catalogs used as input to this one
219  @param[in] butler a butler used to read the input schema from disk, if schema is None
220  @param[in] initInputs This a PipelineTask-only argument that holds all inputs passed in
221  through the PipelineTask middleware
222  @param[in] **kwargs keyword arguments to be passed to CmdLineTask.__init__
223 
224  The task will set its own self.schema attribute to the schema of the output merged catalog.
225  """
226  super().__init__(**kwargs)
227  if initInputs is not None:
228  schema = initInputs['schema'].schema
229 
230  self.makeSubtask("skyObjects")
231  self.schema = self.getInputSchema(butler=butler, schema=schema)
232 
233  filterNames = list(self.config.priorityList)
234  filterNames.append(self.config.skyFilterName)
235  self.merged = afwDetect.FootprintMergeList(self.schema, filterNames)
236  self.outputSchema = afwTable.SourceCatalog(self.schema)
237  self.outputPeakSchema = afwDetect.PeakCatalog(self.merged.getPeakSchema())
238 
239  def runDataRef(self, patchRefList):
240  catalogs = dict(readCatalog(self, patchRef) for patchRef in patchRefList)
241  skyInfo = getSkyInfo(coaddName=self.config.coaddName, patchRef=patchRefList[0])
242  idFactory = self.makeIdFactory(patchRefList[0])
243  skySeed = patchRefList[0].get(self.config.coaddName + "MergedCoaddId")
244  mergeCatalogStruct = self.run(catalogs, skyInfo, idFactory, skySeed)
245  self.write(patchRefList[0], mergeCatalogStruct.outputCatalog)
246 
247  def runQuantum(self, butlerQC, inputRefs, outputRefs):
248  inputs = butlerQC.get(inputRefs)
249  packedId, maxBits = butlerQC.quantum.dataId.pack("tract_patch", returnMaxBits=True)
250  inputs["skySeed"] = packedId
251  inputs["idFactory"] = afwTable.IdFactory.makeSource(packedId, 64 - maxBits)
252  catalogDict = {ref.dataId['band']: cat for ref, cat in zip(inputRefs.catalogs,
253  inputs['catalogs'])}
254  inputs['catalogs'] = catalogDict
255  skyMap = inputs.pop('skyMap')
256  # Can use the first dataId to find the tract and patch being worked on
257  tractNumber = inputRefs.catalogs[0].dataId['tract']
258  tractInfo = skyMap[tractNumber]
259  patchInfo = tractInfo.getPatchInfo(inputRefs.catalogs[0].dataId['patch'])
260  skyInfo = Struct(
261  skyMap=skyMap,
262  tractInfo=tractInfo,
263  patchInfo=patchInfo,
264  wcs=tractInfo.getWcs(),
265  bbox=patchInfo.getOuterBBox()
266  )
267  inputs['skyInfo'] = skyInfo
268 
269  outputs = self.run(**inputs)
270  butlerQC.put(outputs, outputRefs)
271 
272  def run(self, catalogs, skyInfo, idFactory, skySeed):
273  r"""!
274  @brief Merge multiple catalogs.
275 
276  After ordering the catalogs and filters in priority order,
277  @ref getMergedSourceCatalog of the @ref FootprintMergeList_ "FootprintMergeList" created by
278  @ref \_\_init\_\_ is used to perform the actual merging. Finally, @ref cullPeaks is used to remove
279  garbage peaks detected around bright objects.
280 
281  @param[in] catalogs
282  @param[in] patchRef
283  @param[out] mergedList
284  """
285 
286  # Convert distance to tract coordinate
287  tractWcs = skyInfo.wcs
288  peakDistance = self.config.minNewPeak / tractWcs.getPixelScale().asArcseconds()
289  samePeakDistance = self.config.maxSamePeak / tractWcs.getPixelScale().asArcseconds()
290 
291  # Put catalogs, filters in priority order
292  orderedCatalogs = [catalogs[band] for band in self.config.priorityList if band in catalogs.keys()]
293  orderedBands = [band for band in self.config.priorityList if band in catalogs.keys()]
294 
295  mergedList = self.merged.getMergedSourceCatalog(orderedCatalogs, orderedBands, peakDistance,
296  self.schema, idFactory,
297  samePeakDistance)
298 
299  #
300  # Add extra sources that correspond to blank sky
301  #
302  skySourceFootprints = self.getSkySourceFootprints(mergedList, skyInfo, skySeed)
303  if skySourceFootprints:
304  key = mergedList.schema.find("merge_footprint_%s" % self.config.skyFilterName).key
305  for foot in skySourceFootprints:
306  s = mergedList.addNew()
307  s.setFootprint(foot)
308  s.set(key, True)
309 
310  # Sort Peaks from brightest to faintest
311  for record in mergedList:
312  record.getFootprint().sortPeaks()
313  self.log.info("Merged to %d sources" % len(mergedList))
314  # Attempt to remove garbage peaks
315  self.cullPeaks(mergedList)
316  return Struct(outputCatalog=mergedList)
317 
318  def cullPeaks(self, catalog):
319  """!
320  @brief Attempt to remove garbage peaks (mostly on the outskirts of large blends).
321 
322  @param[in] catalog Source catalog
323  """
324  keys = [item.key for item in self.merged.getPeakSchema().extract("merge_peak_*").values()]
325  assert len(keys) > 0, "Error finding flags that associate peaks with their detection bands."
326  totalPeaks = 0
327  culledPeaks = 0
328  for parentSource in catalog:
329  # Make a list copy so we can clear the attached PeakCatalog and append the ones we're keeping
330  # to it (which is easier than deleting as we iterate).
331  keptPeaks = parentSource.getFootprint().getPeaks()
332  oldPeaks = list(keptPeaks)
333  keptPeaks.clear()
334  familySize = len(oldPeaks)
335  totalPeaks += familySize
336  for rank, peak in enumerate(oldPeaks):
337  if ((rank < self.config.cullPeaks.rankSufficient)
338  or (sum([peak.get(k) for k in keys]) >= self.config.cullPeaks.nBandsSufficient)
339  or (rank < self.config.cullPeaks.rankConsidered
340  and rank < self.config.cullPeaks.rankNormalizedConsidered * familySize)):
341  keptPeaks.append(peak)
342  else:
343  culledPeaks += 1
344  self.log.info("Culled %d of %d peaks" % (culledPeaks, totalPeaks))
345 
346  def getSchemaCatalogs(self):
347  """!
348  Return a dict of empty catalogs for each catalog dataset produced by this task.
349 
350  @param[out] dictionary of empty catalogs
351  """
352  mergeDet = afwTable.SourceCatalog(self.schema)
353  peak = afwDetect.PeakCatalog(self.merged.getPeakSchema())
354  return {self.config.coaddName + "Coadd_mergeDet": mergeDet,
355  self.config.coaddName + "Coadd_peak": peak}
356 
357  def getSkySourceFootprints(self, mergedList, skyInfo, seed):
358  """!
359  @brief Return a list of Footprints of sky objects which don't overlap with anything in mergedList
360 
361  @param mergedList The merged Footprints from all the input bands
362  @param skyInfo A description of the patch
363  @param seed Seed for the random number generator
364  """
365  mask = afwImage.Mask(skyInfo.patchInfo.getOuterBBox())
366  detected = mask.getPlaneBitMask("DETECTED")
367  for s in mergedList:
368  s.getFootprint().spans.setMask(mask, detected)
369 
370  footprints = self.skyObjects.run(mask, seed)
371  if not footprints:
372  return footprints
373 
374  # Need to convert the peak catalog's schema so we can set the "merge_peak_<skyFilterName>" flags
375  schema = self.merged.getPeakSchema()
376  mergeKey = schema.find("merge_peak_%s" % self.config.skyFilterName).key
377  converted = []
378  for oldFoot in footprints:
379  assert len(oldFoot.getPeaks()) == 1, "Should be a single peak only"
380  peak = oldFoot.getPeaks()[0]
381  newFoot = afwDetect.Footprint(oldFoot.spans, schema)
382  newFoot.addPeak(peak.getFx(), peak.getFy(), peak.getPeakValue())
383  newFoot.getPeaks()[0].set(mergeKey, True)
384  converted.append(newFoot)
385 
386  return converted
387 
388  def write(self, patchRef, catalog):
389  """!
390  @brief Write the output.
391 
392  @param[in] patchRef data reference for patch
393  @param[in] catalog catalog
394 
395  We write as the dataset provided by the 'outputDataset'
396  class variable.
397  """
398  patchRef.put(catalog, self.config.coaddName + "Coadd_" + self.outputDataset)
399  # since the filter isn't actually part of the data ID for the dataset we're saving,
400  # it's confusing to see it in the log message, even if the butler simply ignores it.
401  mergeDataId = patchRef.dataId.copy()
402  del mergeDataId["filter"]
403  self.log.info("Wrote merged catalog: %s" % (mergeDataId,))
404 
405  def writeMetadata(self, dataRefList):
406  """!
407  @brief No metadata to write, and not sure how to write it for a list of dataRefs.
408  """
409  pass
def run(self, skyInfo, tempExpRefList, imageScalerList, weightList, altMaskList=None, mask=None, supplementaryData=None)
def getSkyInfo(coaddName, patchRef)
Return the SkyMap, tract and patch information, wcs, and outer bbox of the patch to be coadded.
Definition: coaddBase.py:271
def writeMetadata(self, dataRefList)
No metadata to write, and not sure how to write it for a list of dataRefs.
def write(self, patchRef, catalog)
Write the output.
def makeMergeArgumentParser(name, dataset)
Create a suitable ArgumentParser.
def getInputSchema(task, butler=None, schema=None)
Obtain the input schema either directly or froma butler reference.
def readCatalog(task, patchRef)
Read input catalog.