lsst.pipe.tasks  16.0-65-g12857137+1
mergeDetections.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 #
3 # LSST Data Management System
4 # Copyright 2008-2015 AURA/LSST.
5 #
6 # This product includes software developed by the
7 # LSST Project (http://www.lsst.org/).
8 #
9 # This program is free software: you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation, either version 3 of the License, or
12 # (at your option) any later version.
13 #
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
18 #
19 # You should have received a copy of the LSST License Statement and
20 # the GNU General Public License along with this program. If not,
21 # see <https://www.lsstcorp.org/LegalNotices/>.
22 #
23 
24 from .multiBandUtils import (CullPeaksConfig, MergeSourcesRunner, _makeMakeIdFactory, makeMergeArgumentParser,
25  getInputSchema, getShortFilterName, readCatalog)
26 
27 
28 import lsst.afw.detection as afwDetect
29 import lsst.afw.image as afwImage
30 import lsst.afw.table as afwTable
31 
32 from lsst.meas.algorithms import SkyObjectsTask
33 from lsst.pex.config import Config, Field, ListField, ConfigurableField, ConfigField
34 from lsst.pipe.base import (CmdLineTask, PipelineTask, PipelineTaskConfig, InitOutputDatasetField,
35  InputDatasetField, InitInputDatasetField, OutputDatasetField, Struct)
36 from lsst.pipe.tasks.coaddBase import getSkyInfo
37 
38 
39 class MergeDetectionsConfig(PipelineTaskConfig):
40  """!
41  @anchor MergeDetectionsConfig_
42 
43  @brief Configuration parameters for the MergeDetectionsTask.
44  """
45  minNewPeak = Field(dtype=float, default=1,
46  doc="Minimum distance from closest peak to create a new one (in arcsec).")
47 
48  maxSamePeak = Field(dtype=float, default=0.3,
49  doc="When adding new catalogs to the merge, all peaks less than this distance "
50  " (in arcsec) to an existing peak will be flagged as detected in that catalog.")
51  cullPeaks = ConfigField(dtype=CullPeaksConfig, doc="Configuration for how to cull peaks.")
52 
53  skyFilterName = Field(dtype=str, default="sky",
54  doc="Name of `filter' used to label sky objects (e.g. flag merge_peak_sky is set)\n"
55  "(N.b. should be in MergeMeasurementsConfig.pseudoFilterList)")
56  skyObjects = ConfigurableField(target=SkyObjectsTask, doc="Generate sky objects")
57  priorityList = ListField(dtype=str, default=[],
58  doc="Priority-ordered list of bands for the merge.")
59  coaddName = Field(dtype=str, default="deep", doc="Name of coadd")
60 
61  schema = InitInputDatasetField(
62  doc="Schema of the input detection catalog",
63  name="",
64  nameTemplate="{inputCoaddName}Coadd_det_schema",
65  storageClass="SourceCatalog"
66  )
67 
68  outputSchema = InitOutputDatasetField(
69  doc="Schema of the merged detection catalog",
70  nameTemplate="{outputCoaddName}Coadd_mergeDet_schema",
71  storageClass="SourceCatalog"
72  )
73 
74  outputPeakSchema = InitOutputDatasetField(
75  doc="Output schema of the Footprint peak catalog",
76  nameTemplate="{outputCoaddName}Coadd_peak_schema",
77  storageClass="PeakCatalog"
78  )
79 
80  catalogs = InputDatasetField(
81  doc="Detection Catalogs to be merged",
82  nameTemplate="{inputCoaddName}Coadd_det",
83  storageClass="SourceCatalog",
84  dimensions=("Tract", "Patch", "SkyMap", "AbstractFilter")
85  )
86 
87  skyMap = InputDatasetField(
88  doc="SkyMap to be used in merging",
89  nameTemplate="{inputCoaddName}Coadd_skyMap",
90  storageClass="SkyMap",
91  dimensions=("SkyMap",),
92  scalar=True
93  )
94 
95  outputCatalog = OutputDatasetField(
96  doc="Merged Detection catalog",
97  nameTemplate="{outputCoaddName}Coadd_mergeDet",
98  storageClass="SourceCatalog",
99  dimensions=("Tract", "Patch", "SkyMap"),
100  scalar=True
101  )
102 
103  def setDefaults(self):
104  Config.setDefaults(self)
105  self.formatTemplateNames({"inputCoaddName": 'deep', "outputCoaddName": "deep"})
106  self.skyObjects.avoidMask = ["DETECTED"] # Nothing else is available in our custom mask
107  self.quantum.dimensions = ("Tract", "Patch", "SkyMap")
108 
109  def validate(self):
110  super().validate()
111  if len(self.priorityList) == 0:
112  raise RuntimeError("No priority list provided")
113 
114 
115 class MergeDetectionsTask(PipelineTask, CmdLineTask):
116  r"""!
117  @anchor MergeDetectionsTask_
118 
119  @brief Merge coadd detections from multiple bands.
120 
121  @section pipe_tasks_multiBand_Contents Contents
122 
123  - @ref pipe_tasks_multiBand_MergeDetectionsTask_Purpose
124  - @ref pipe_tasks_multiBand_MergeDetectionsTask_Init
125  - @ref pipe_tasks_multiBand_MergeDetectionsTask_Run
126  - @ref pipe_tasks_multiBand_MergeDetectionsTask_Config
127  - @ref pipe_tasks_multiBand_MergeDetectionsTask_Debug
128  - @ref pipe_tasks_multiband_MergeDetectionsTask_Example
129 
130  @section pipe_tasks_multiBand_MergeDetectionsTask_Purpose Description
131 
132  Command-line task that merges sources detected in coadds of exposures obtained with different filters.
133 
134  To perform photometry consistently across coadds in multiple filter bands, we create a master catalog of
135  sources from all bands by merging the sources (peaks & footprints) detected in each coadd, while keeping
136  track of which band each source originates in.
137 
138  The catalog merge is performed by @ref getMergedSourceCatalog. Spurious peaks detected around bright
139  objects are culled as described in @ref CullPeaksConfig_.
140 
141  @par Inputs:
142  deepCoadd_det{tract,patch,filter}: SourceCatalog (only parent Footprints)
143  @par Outputs:
144  deepCoadd_mergeDet{tract,patch}: SourceCatalog (only parent Footprints)
145  @par Data Unit:
146  tract, patch
147 
148  @section pipe_tasks_multiBand_MergeDetectionsTask_Init Task initialisation
149 
150  @copydoc \_\_init\_\_
151 
152  @section pipe_tasks_multiBand_MergeDetectionsTask_Run Invoking the Task
153 
154  @copydoc run
155 
156  @section pipe_tasks_multiBand_MergeDetectionsTask_Config Configuration parameters
157 
158  See @ref MergeDetectionsConfig_
159 
160  @section pipe_tasks_multiBand_MergeDetectionsTask_Debug Debug variables
161 
162  The @link lsst.pipe.base.cmdLineTask.CmdLineTask command line task@endlink interface supports a flag @c -d
163  to import @b debug.py from your @c PYTHONPATH; see @ref baseDebug for more about @b debug.py files.
164 
165  MergeDetectionsTask has no debug variables.
166 
167  @section pipe_tasks_multiband_MergeDetectionsTask_Example A complete example of using MergeDetectionsTask
168 
169  MergeDetectionsTask is meant to be run after detecting sources in coadds generated for the chosen subset
170  of the available bands.
171  The purpose of the task is to merge sources (peaks & footprints) detected in the coadds generated from the
172  chosen subset of filters.
173  Subsequent tasks in the multi-band processing procedure will deblend the generated master list of sources
174  and, eventually, perform forced photometry.
175  Command-line usage of MergeDetectionsTask expects data references for all the coadds to be processed.
176  A list of the available optional arguments can be obtained by calling mergeCoaddDetections.py with the
177  `--help` command line argument:
178  @code
179  mergeCoaddDetections.py --help
180  @endcode
181 
182  To demonstrate usage of the DetectCoaddSourcesTask in the larger context of multi-band processing, we
183  will process HSC data in the [ci_hsc](https://github.com/lsst/ci_hsc) package. Assuming one has finished
184  step 5 at @ref pipeTasks_multiBand, one may merge the catalogs of sources from each coadd as follows:
185  @code
186  mergeCoaddDetections.py $CI_HSC_DIR/DATA --id patch=5,4 tract=0 filter=HSC-I^HSC-R
187  @endcode
188  This will merge the HSC-I & -R band parent source catalogs and write the results to
189  `$CI_HSC_DIR/DATA/deepCoadd-results/merged/0/5,4/mergeDet-0-5,4.fits`.
190 
191  The next step in the multi-band processing procedure is
192  @ref MeasureMergedCoaddSourcesTask_ "MeasureMergedCoaddSourcesTask"
193  """
194  ConfigClass = MergeDetectionsConfig
195  RunnerClass = MergeSourcesRunner
196  _DefaultName = "mergeCoaddDetections"
197  inputDataset = "det"
198  outputDataset = "mergeDet"
199  makeIdFactory = _makeMakeIdFactory("MergedCoaddId")
200 
201  @classmethod
202  def _makeArgumentParser(cls):
204 
205  def getInputSchema(self, butler=None, schema=None):
206  return getInputSchema(self, butler, schema)
207 
208  def __init__(self, butler=None, schema=None, initInputs=None, **kwargs):
209  # Make PipelineTask-only wording less transitional after cmdlineTask is removed
210  """!
211  @brief Initialize the merge detections task.
212 
213  A @ref FootprintMergeList_ "FootprintMergeList" will be used to
214  merge the source catalogs.
215 
216  @param[in] schema the schema of the detection catalogs used as input to this one
217  @param[in] butler a butler used to read the input schema from disk, if schema is None
218  @param[in] initInputs This a PipelineTask-only argument that holds all inputs passed in
219  through the PipelineTask middleware
220  @param[in] **kwargs keyword arguments to be passed to CmdLineTask.__init__
221 
222  The task will set its own self.schema attribute to the schema of the output merged catalog.
223  """
224  super().__init__(**kwargs)
225  if initInputs is not None:
226  schema = initInputs['schema'].schema
227 
228  self.makeSubtask("skyObjects")
229  self.schema = self.getInputSchema(butler=butler, schema=schema)
230 
231  filterNames = [getShortFilterName(name) for name in self.config.priorityList]
232  filterNames += [self.config.skyFilterName]
233  self.merged = afwDetect.FootprintMergeList(self.schema, filterNames)
234 
236  return {"outputSchema": afwTable.SourceCatalog(self.schema),
237  "outputPeakSchema": afwDetect.PeakCatalog(self.merged.getPeakSchema())}
238 
239  def runDataRef(self, patchRefList):
240  catalogs = dict(readCatalog(self, patchRef) for patchRef in patchRefList)
241  skyInfo = getSkyInfo(coaddName=self.config.coaddName, patchRef=patchRefList[0])
242  idFactory = self.makeIdFactory(patchRefList[0])
243  skySeed = patchRefList[0].get(self.config.coaddName + "MergedCoaddId")
244  mergeCatalogStruct = self.run(catalogs, skyInfo, idFactory, skySeed)
245  self.write(patchRefList[0], mergeCatalogStruct.outputCatalog)
246 
247  def adaptArgsAndRun(self, inputData, inputDataIds, outputDataIds, butler):
248  # FINDME: DM-15843 needs to come back and address this function with final solution
249  inputData["skySeed"] = 0
250  inputData["idFactory"] = afwTable.IdFactory.makeSimple()
251  catalogDict = {dataId['abstract_filter']: cat for dataId, cat in zip(inputDataIds['catalogs'],
252  inputData['catalogs'])}
253  inputData['catalogs'] = catalogDict
254  skyMap = inputData.pop('skyMap')
255  # Can use the first dataId to find the tract and patch being worked on
256  tractNumber = inputDataIds['catalogs'][0]['tract']
257  tractInfo = skyMap[tractNumber]
258  patchInfo = tractInfo.getPatchInfo(inputDataIds['catalogs'][0]['patch'])
259  skyInfo = Struct(
260  skyMap=skyMap,
261  tractInfo=tractInfo,
262  patchInfo=patchInfo,
263  wcs=tractInfo.getWcs(),
264  bbox=patchInfo.getOuterBBox()
265  )
266  inputData['skyInfo'] = skyInfo
267  return self.run(**inputData)
268 
269  def run(self, catalogs, skyInfo, idFactory, skySeed):
270  r"""!
271  @brief Merge multiple catalogs.
272 
273  After ordering the catalogs and filters in priority order,
274  @ref getMergedSourceCatalog of the @ref FootprintMergeList_ "FootprintMergeList" created by
275  @ref \_\_init\_\_ is used to perform the actual merging. Finally, @ref cullPeaks is used to remove
276  garbage peaks detected around bright objects.
277 
278  @param[in] catalogs
279  @param[in] patchRef
280  @param[out] mergedList
281  """
282 
283  # Convert distance to tract coordinate
284  tractWcs = skyInfo.wcs
285  peakDistance = self.config.minNewPeak / tractWcs.getPixelScale().asArcseconds()
286  samePeakDistance = self.config.maxSamePeak / tractWcs.getPixelScale().asArcseconds()
287 
288  # Put catalogs, filters in priority order
289  orderedCatalogs = [catalogs[band] for band in self.config.priorityList if band in catalogs.keys()]
290  orderedBands = [getShortFilterName(band) for band in self.config.priorityList
291  if band in catalogs.keys()]
292 
293  mergedList = self.merged.getMergedSourceCatalog(orderedCatalogs, orderedBands, peakDistance,
294  self.schema, idFactory,
295  samePeakDistance)
296 
297  #
298  # Add extra sources that correspond to blank sky
299  #
300  skySourceFootprints = self.getSkySourceFootprints(mergedList, skyInfo, skySeed)
301  if skySourceFootprints:
302  key = mergedList.schema.find("merge_footprint_%s" % self.config.skyFilterName).key
303  for foot in skySourceFootprints:
304  s = mergedList.addNew()
305  s.setFootprint(foot)
306  s.set(key, True)
307 
308  # Sort Peaks from brightest to faintest
309  for record in mergedList:
310  record.getFootprint().sortPeaks()
311  self.log.info("Merged to %d sources" % len(mergedList))
312  # Attempt to remove garbage peaks
313  self.cullPeaks(mergedList)
314  return Struct(outputCatalog=mergedList)
315 
316  def cullPeaks(self, catalog):
317  """!
318  @brief Attempt to remove garbage peaks (mostly on the outskirts of large blends).
319 
320  @param[in] catalog Source catalog
321  """
322  keys = [item.key for item in self.merged.getPeakSchema().extract("merge_peak_*").values()]
323  assert len(keys) > 0, "Error finding flags that associate peaks with their detection bands."
324  totalPeaks = 0
325  culledPeaks = 0
326  for parentSource in catalog:
327  # Make a list copy so we can clear the attached PeakCatalog and append the ones we're keeping
328  # to it (which is easier than deleting as we iterate).
329  keptPeaks = parentSource.getFootprint().getPeaks()
330  oldPeaks = list(keptPeaks)
331  keptPeaks.clear()
332  familySize = len(oldPeaks)
333  totalPeaks += familySize
334  for rank, peak in enumerate(oldPeaks):
335  if ((rank < self.config.cullPeaks.rankSufficient) or
336  (sum([peak.get(k) for k in keys]) >= self.config.cullPeaks.nBandsSufficient) or
337  (rank < self.config.cullPeaks.rankConsidered and
338  rank < self.config.cullPeaks.rankNormalizedConsidered * familySize)):
339  keptPeaks.append(peak)
340  else:
341  culledPeaks += 1
342  self.log.info("Culled %d of %d peaks" % (culledPeaks, totalPeaks))
343 
344  def getSchemaCatalogs(self):
345  """!
346  Return a dict of empty catalogs for each catalog dataset produced by this task.
347 
348  @param[out] dictionary of empty catalogs
349  """
350  mergeDet = afwTable.SourceCatalog(self.schema)
351  peak = afwDetect.PeakCatalog(self.merged.getPeakSchema())
352  return {self.config.coaddName + "Coadd_mergeDet": mergeDet,
353  self.config.coaddName + "Coadd_peak": peak}
354 
355  def getSkySourceFootprints(self, mergedList, skyInfo, seed):
356  """!
357  @brief Return a list of Footprints of sky objects which don't overlap with anything in mergedList
358 
359  @param mergedList The merged Footprints from all the input bands
360  @param skyInfo A description of the patch
361  @param seed Seed for the random number generator
362  """
363  mask = afwImage.Mask(skyInfo.patchInfo.getOuterBBox())
364  detected = mask.getPlaneBitMask("DETECTED")
365  for s in mergedList:
366  s.getFootprint().spans.setMask(mask, detected)
367 
368  footprints = self.skyObjects.run(mask, seed)
369  if not footprints:
370  return footprints
371 
372  # Need to convert the peak catalog's schema so we can set the "merge_peak_<skyFilterName>" flags
373  schema = self.merged.getPeakSchema()
374  mergeKey = schema.find("merge_peak_%s" % self.config.skyFilterName).key
375  converted = []
376  for oldFoot in footprints:
377  assert len(oldFoot.getPeaks()) == 1, "Should be a single peak only"
378  peak = oldFoot.getPeaks()[0]
379  newFoot = afwDetect.Footprint(oldFoot.spans, schema)
380  newFoot.addPeak(peak.getFx(), peak.getFy(), peak.getPeakValue())
381  newFoot.getPeaks()[0].set(mergeKey, True)
382  converted.append(newFoot)
383 
384  return converted
385 
386  def write(self, patchRef, catalog):
387  """!
388  @brief Write the output.
389 
390  @param[in] patchRef data reference for patch
391  @param[in] catalog catalog
392 
393  We write as the dataset provided by the 'outputDataset'
394  class variable.
395  """
396  patchRef.put(catalog, self.config.coaddName + "Coadd_" + self.outputDataset)
397  # since the filter isn't actually part of the data ID for the dataset we're saving,
398  # it's confusing to see it in the log message, even if the butler simply ignores it.
399  mergeDataId = patchRef.dataId.copy()
400  del mergeDataId["filter"]
401  self.log.info("Wrote merged catalog: %s" % (mergeDataId,))
402 
403  def writeMetadata(self, dataRefList):
404  """!
405  @brief No metadata to write, and not sure how to write it for a list of dataRefs.
406  """
407  pass
def getSchemaCatalogs(self)
Return a dict of empty catalogs for each catalog dataset produced by this task.
def makeMergeArgumentParser(name, dataset)
Create a suitable ArgumentParser.
Merge coadd detections from multiple bands.
def readCatalog(task, patchRef)
Read input catalog.
def adaptArgsAndRun(self, inputData, inputDataIds, outputDataIds, butler)
def __init__(self, butler=None, schema=None, initInputs=None, kwargs)
Initialize the merge detections task.
def run(self, catalogs, skyInfo, idFactory, skySeed)
Merge multiple catalogs.
Configuration parameters for the MergeDetectionsTask.
def getSkyInfo(coaddName, patchRef)
Return the SkyMap, tract and patch information, wcs, and outer bbox of the patch to be coadded...
Definition: coaddBase.py:231
def cullPeaks(self, catalog)
Attempt to remove garbage peaks (mostly on the outskirts of large blends).
def write(self, patchRef, catalog)
Write the output.
def getInputSchema(self, butler=None, schema=None)
def writeMetadata(self, dataRefList)
No metadata to write, and not sure how to write it for a list of dataRefs.
def getSkySourceFootprints(self, mergedList, skyInfo, seed)
Return a list of Footprints of sky objects which don&#39;t overlap with anything in mergedList.