Coverage for python/lsst/pipe/tasks/mergeDetections.py: 26%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

172 statements  

1#!/usr/bin/env python 

2# 

3# LSST Data Management System 

4# Copyright 2008-2015 AURA/LSST. 

5# 

6# This product includes software developed by the 

7# LSST Project (http://www.lsst.org/). 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the LSST License Statement and 

20# the GNU General Public License along with this program. If not, 

21# see <https://www.lsstcorp.org/LegalNotices/>. 

22# 

23 

24import numpy as np 

25from numpy.lib.recfunctions import rec_join 

26 

27from .multiBandUtils import (CullPeaksConfig, MergeSourcesRunner, _makeMakeIdFactory, makeMergeArgumentParser, 

28 getInputSchema, readCatalog) 

29 

30 

31import lsst.afw.detection as afwDetect 

32import lsst.afw.image as afwImage 

33import lsst.afw.table as afwTable 

34 

35from lsst.coadd.utils.getGen3CoaddExposureId import getGen3CoaddExposureId 

36from lsst.meas.algorithms import SkyObjectsTask 

37from lsst.skymap import BaseSkyMap 

38from lsst.pex.config import Config, Field, ListField, ConfigurableField, ConfigField 

39from lsst.pipe.base import (CmdLineTask, PipelineTask, PipelineTaskConfig, Struct, 

40 PipelineTaskConnections) 

41import lsst.pipe.base.connectionTypes as cT 

42from lsst.pipe.tasks.coaddBase import getSkyInfo 

43from lsst.obs.base import ExposureIdInfo 

44 

45 

46def matchCatalogsExact(catalog1, catalog2, patch1=None, patch2=None): 

47 """Match two catalogs derived from the same mergeDet catalog 

48 

49 When testing downstream features, like deblending methods/parameters 

50 and measurement algorithms/parameters, it is useful to to compare 

51 the same sources in two catalogs. In most cases this must be done 

52 by matching on either RA/DEC or XY positions, which occassionally 

53 will mismatch one source with another. 

54 

55 For a more robust solution, as long as the downstream catalog is 

56 derived from the same mergeDet catalog, exact source matching 

57 can be done via the unique ``(parent, deblend_peakID)`` 

58 combination. So this function performs this exact matching for 

59 all sources both catalogs. 

60 

61 Parameters 

62 ---------- 

63 catalog1, catalog2 : `lsst.afw.table.SourceCatalog` 

64 The two catalogs to merge 

65 

66 patch1, patch2 : array of int 

67 Patch for each row, converted into an integer. 

68 In the gen3 butler this is done already, in gen2 

69 it is recommended to use `patch2Int`, assuming that 

70 the patches are the same structure as HSC, that range 

71 from '0,0' to '9,9'. 

72 

73 Returns 

74 ------- 

75 result: list of `lsst.afw.table.SourceMatch` 

76 List of matches for each source (using an inner join). 

77 """ 

78 # Only match the individual sources, the parents will 

79 # already be matched by the mergeDet catalog 

80 sidx1 = catalog1["parent"] != 0 

81 sidx2 = catalog2["parent"] != 0 

82 

83 # Create the keys used to merge the catalogs 

84 parents1 = np.array(catalog1["parent"][sidx1]) 

85 peaks1 = np.array(catalog1["deblend_peakId"][sidx1]) 

86 index1 = np.arange(len(catalog1))[sidx1] 

87 parents2 = np.array(catalog2["parent"][sidx2]) 

88 peaks2 = np.array(catalog2["deblend_peakId"][sidx2]) 

89 index2 = np.arange(len(catalog2))[sidx2] 

90 

91 if patch1 is not None: 

92 if patch2 is None: 

93 msg = ("If the catalogs are from different patches then patch1 and patch2 must be specified" 

94 ", got {} and {}").format(patch1, patch2) 

95 raise ValueError(msg) 

96 patch1 = patch1[sidx1] 

97 patch2 = patch2[sidx2] 

98 

99 key1 = np.rec.array((parents1, peaks1, patch1, index1), 

100 dtype=[('parent', np.int64), ('peakId', np.int32), 

101 ("patch", patch1.dtype), ("index", np.int32)]) 

102 key2 = np.rec.array((parents2, peaks2, patch2, index2), 

103 dtype=[('parent', np.int64), ('peakId', np.int32), 

104 ("patch", patch2.dtype), ("index", np.int32)]) 

105 matchColumns = ("parent", "peakId", "patch") 

106 else: 

107 key1 = np.rec.array((parents1, peaks1, index1), 

108 dtype=[('parent', np.int64), ('peakId', np.int32), ("index", np.int32)]) 

109 key2 = np.rec.array((parents2, peaks2, index2), 

110 dtype=[('parent', np.int64), ('peakId', np.int32), ("index", np.int32)]) 

111 matchColumns = ("parent", "peakId") 

112 # Match the two keys. 

113 # This line performs an inner join on the structured 

114 # arrays `key1` and `key2`, which stores their indices 

115 # as columns in a structured array. 

116 matched = rec_join(matchColumns, key1, key2, jointype="inner") 

117 

118 # Create the full index for both catalogs 

119 indices1 = matched["index1"] 

120 indices2 = matched["index2"] 

121 

122 # Re-index the resulting catalogs 

123 matches = [ 

124 afwTable.SourceMatch(catalog1[int(i1)], catalog2[int(i2)], 0.0) 

125 for i1, i2 in zip(indices1, indices2) 

126 ] 

127 

128 return matches 

129 

130 

131class MergeDetectionsConnections(PipelineTaskConnections, 

132 dimensions=("tract", "patch", "skymap"), 

133 defaultTemplates={"inputCoaddName": 'deep', "outputCoaddName": "deep"}): 

134 schema = cT.InitInput( 

135 doc="Schema of the input detection catalog", 

136 name="{inputCoaddName}Coadd_det_schema", 

137 storageClass="SourceCatalog" 

138 ) 

139 

140 outputSchema = cT.InitOutput( 

141 doc="Schema of the merged detection catalog", 

142 name="{outputCoaddName}Coadd_mergeDet_schema", 

143 storageClass="SourceCatalog" 

144 ) 

145 

146 outputPeakSchema = cT.InitOutput( 

147 doc="Output schema of the Footprint peak catalog", 

148 name="{outputCoaddName}Coadd_peak_schema", 

149 storageClass="PeakCatalog" 

150 ) 

151 

152 catalogs = cT.Input( 

153 doc="Detection Catalogs to be merged", 

154 name="{inputCoaddName}Coadd_det", 

155 storageClass="SourceCatalog", 

156 dimensions=("tract", "patch", "skymap", "band"), 

157 multiple=True 

158 ) 

159 

160 skyMap = cT.Input( 

161 doc="SkyMap to be used in merging", 

162 name=BaseSkyMap.SKYMAP_DATASET_TYPE_NAME, 

163 storageClass="SkyMap", 

164 dimensions=("skymap",), 

165 ) 

166 

167 outputCatalog = cT.Output( 

168 doc="Merged Detection catalog", 

169 name="{outputCoaddName}Coadd_mergeDet", 

170 storageClass="SourceCatalog", 

171 dimensions=("tract", "patch", "skymap"), 

172 ) 

173 

174 

175class MergeDetectionsConfig(PipelineTaskConfig, pipelineConnections=MergeDetectionsConnections): 

176 """! 

177 @anchor MergeDetectionsConfig_ 

178 

179 @brief Configuration parameters for the MergeDetectionsTask. 

180 """ 

181 minNewPeak = Field(dtype=float, default=1, 

182 doc="Minimum distance from closest peak to create a new one (in arcsec).") 

183 

184 maxSamePeak = Field(dtype=float, default=0.3, 

185 doc="When adding new catalogs to the merge, all peaks less than this distance " 

186 " (in arcsec) to an existing peak will be flagged as detected in that catalog.") 

187 cullPeaks = ConfigField(dtype=CullPeaksConfig, doc="Configuration for how to cull peaks.") 

188 

189 skyFilterName = Field(dtype=str, default="sky", 

190 doc="Name of `filter' used to label sky objects (e.g. flag merge_peak_sky is set)\n" 

191 "(N.b. should be in MergeMeasurementsConfig.pseudoFilterList)") 

192 skyObjects = ConfigurableField(target=SkyObjectsTask, doc="Generate sky objects") 

193 priorityList = ListField(dtype=str, default=[], 

194 doc="Priority-ordered list of filter bands for the merge.") 

195 coaddName = Field(dtype=str, default="deep", doc="Name of coadd") 

196 

197 def setDefaults(self): 

198 Config.setDefaults(self) 

199 self.skyObjects.avoidMask = ["DETECTED"] # Nothing else is available in our custom mask 

200 

201 def validate(self): 

202 super().validate() 

203 if len(self.priorityList) == 0: 

204 raise RuntimeError("No priority list provided") 

205 

206 

207class MergeDetectionsTask(PipelineTask, CmdLineTask): 

208 r"""! 

209 @anchor MergeDetectionsTask_ 

210 

211 @brief Merge coadd detections from multiple bands. 

212 

213 @section pipe_tasks_multiBand_Contents Contents 

214 

215 - @ref pipe_tasks_multiBand_MergeDetectionsTask_Purpose 

216 - @ref pipe_tasks_multiBand_MergeDetectionsTask_Init 

217 - @ref pipe_tasks_multiBand_MergeDetectionsTask_Run 

218 - @ref pipe_tasks_multiBand_MergeDetectionsTask_Config 

219 - @ref pipe_tasks_multiBand_MergeDetectionsTask_Debug 

220 - @ref pipe_tasks_multiband_MergeDetectionsTask_Example 

221 

222 @section pipe_tasks_multiBand_MergeDetectionsTask_Purpose Description 

223 

224 Command-line task that merges sources detected in coadds of exposures obtained with different filters. 

225 

226 To perform photometry consistently across coadds in multiple filter bands, we create a master catalog of 

227 sources from all bands by merging the sources (peaks & footprints) detected in each coadd, while keeping 

228 track of which band each source originates in. 

229 

230 The catalog merge is performed by @ref getMergedSourceCatalog. Spurious peaks detected around bright 

231 objects are culled as described in @ref CullPeaksConfig_. 

232 

233 @par Inputs: 

234 deepCoadd_det{tract,patch,filter}: SourceCatalog (only parent Footprints) 

235 @par Outputs: 

236 deepCoadd_mergeDet{tract,patch}: SourceCatalog (only parent Footprints) 

237 @par Data Unit: 

238 tract, patch 

239 

240 @section pipe_tasks_multiBand_MergeDetectionsTask_Init Task initialisation 

241 

242 @copydoc \_\_init\_\_ 

243 

244 @section pipe_tasks_multiBand_MergeDetectionsTask_Run Invoking the Task 

245 

246 @copydoc run 

247 

248 @section pipe_tasks_multiBand_MergeDetectionsTask_Config Configuration parameters 

249 

250 See @ref MergeDetectionsConfig_ 

251 

252 @section pipe_tasks_multiBand_MergeDetectionsTask_Debug Debug variables 

253 

254 The @link lsst.pipe.base.cmdLineTask.CmdLineTask command line task@endlink interface supports a flag @c -d 

255 to import @b debug.py from your @c PYTHONPATH; see @ref baseDebug for more about @b debug.py files. 

256 

257 MergeDetectionsTask has no debug variables. 

258 

259 @section pipe_tasks_multiband_MergeDetectionsTask_Example A complete example of using MergeDetectionsTask 

260 

261 MergeDetectionsTask is meant to be run after detecting sources in coadds generated for the chosen subset 

262 of the available bands. 

263 The purpose of the task is to merge sources (peaks & footprints) detected in the coadds generated from the 

264 chosen subset of filters. 

265 Subsequent tasks in the multi-band processing procedure will deblend the generated master list of sources 

266 and, eventually, perform forced photometry. 

267 Command-line usage of MergeDetectionsTask expects data references for all the coadds to be processed. 

268 A list of the available optional arguments can be obtained by calling mergeCoaddDetections.py with the 

269 `--help` command line argument: 

270 @code 

271 mergeCoaddDetections.py --help 

272 @endcode 

273 

274 To demonstrate usage of the DetectCoaddSourcesTask in the larger context of multi-band processing, we 

275 will process HSC data in the [ci_hsc](https://github.com/lsst/ci_hsc) package. Assuming one has finished 

276 step 5 at @ref pipeTasks_multiBand, one may merge the catalogs of sources from each coadd as follows: 

277 @code 

278 mergeCoaddDetections.py $CI_HSC_DIR/DATA --id patch=5,4 tract=0 filter=HSC-I^HSC-R 

279 @endcode 

280 This will merge the HSC-I & -R band parent source catalogs and write the results to 

281 `$CI_HSC_DIR/DATA/deepCoadd-results/merged/0/5,4/mergeDet-0-5,4.fits`. 

282 

283 The next step in the multi-band processing procedure is 

284 @ref MeasureMergedCoaddSourcesTask_ "MeasureMergedCoaddSourcesTask" 

285 """ 

286 ConfigClass = MergeDetectionsConfig 

287 RunnerClass = MergeSourcesRunner 

288 _DefaultName = "mergeCoaddDetections" 

289 inputDataset = "det" 

290 outputDataset = "mergeDet" 

291 makeIdFactory = _makeMakeIdFactory("MergedCoaddId", includeBand=False) 

292 

293 @classmethod 

294 def _makeArgumentParser(cls): 

295 return makeMergeArgumentParser(cls._DefaultName, cls.inputDataset) 

296 

297 def getInputSchema(self, butler=None, schema=None): 

298 return getInputSchema(self, butler, schema) 

299 

300 def __init__(self, butler=None, schema=None, initInputs=None, **kwargs): 

301 # Make PipelineTask-only wording less transitional after cmdlineTask is removed 

302 """! 

303 @brief Initialize the merge detections task. 

304 

305 A @ref FootprintMergeList_ "FootprintMergeList" will be used to 

306 merge the source catalogs. 

307 

308 @param[in] schema the schema of the detection catalogs used as input to this one 

309 @param[in] butler a butler used to read the input schema from disk, if schema is None 

310 @param[in] initInputs This a PipelineTask-only argument that holds all inputs passed in 

311 through the PipelineTask middleware 

312 @param[in] **kwargs keyword arguments to be passed to CmdLineTask.__init__ 

313 

314 The task will set its own self.schema attribute to the schema of the output merged catalog. 

315 """ 

316 super().__init__(**kwargs) 

317 if initInputs is not None: 

318 schema = initInputs['schema'].schema 

319 

320 self.makeSubtask("skyObjects") 

321 self.schema = self.getInputSchema(butler=butler, schema=schema) 

322 

323 filterNames = list(self.config.priorityList) 

324 filterNames.append(self.config.skyFilterName) 

325 self.merged = afwDetect.FootprintMergeList(self.schema, filterNames) 

326 self.outputSchema = afwTable.SourceCatalog(self.schema) 

327 self.outputPeakSchema = afwDetect.PeakCatalog(self.merged.getPeakSchema()) 

328 

329 def runDataRef(self, patchRefList): 

330 catalogs = dict(readCatalog(self, patchRef) for patchRef in patchRefList) 

331 skyInfo = getSkyInfo(coaddName=self.config.coaddName, patchRef=patchRefList[0]) 

332 idFactory = self.makeIdFactory(patchRefList[0]) 

333 skySeed = getGen3CoaddExposureId(patchRefList[0], coaddName=self.config.coaddName, includeBand=False, 

334 log=self.log) 

335 mergeCatalogStruct = self.run(catalogs, skyInfo, idFactory, skySeed) 

336 self.write(patchRefList[0], mergeCatalogStruct.outputCatalog) 

337 

338 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

339 inputs = butlerQC.get(inputRefs) 

340 exposureIdInfo = ExposureIdInfo.fromDataId(butlerQC.quantum.dataId, "tract_patch") 

341 inputs["skySeed"] = exposureIdInfo.expId 

342 inputs["idFactory"] = exposureIdInfo.makeSourceIdFactory() 

343 catalogDict = {ref.dataId['band']: cat for ref, cat in zip(inputRefs.catalogs, 

344 inputs['catalogs'])} 

345 inputs['catalogs'] = catalogDict 

346 skyMap = inputs.pop('skyMap') 

347 # Can use the first dataId to find the tract and patch being worked on 

348 tractNumber = inputRefs.catalogs[0].dataId['tract'] 

349 tractInfo = skyMap[tractNumber] 

350 patchInfo = tractInfo.getPatchInfo(inputRefs.catalogs[0].dataId['patch']) 

351 skyInfo = Struct( 

352 skyMap=skyMap, 

353 tractInfo=tractInfo, 

354 patchInfo=patchInfo, 

355 wcs=tractInfo.getWcs(), 

356 bbox=patchInfo.getOuterBBox() 

357 ) 

358 inputs['skyInfo'] = skyInfo 

359 

360 outputs = self.run(**inputs) 

361 butlerQC.put(outputs, outputRefs) 

362 

363 def run(self, catalogs, skyInfo, idFactory, skySeed): 

364 r"""! 

365 @brief Merge multiple catalogs. 

366 

367 After ordering the catalogs and filters in priority order, 

368 @ref getMergedSourceCatalog of the @ref FootprintMergeList_ "FootprintMergeList" created by 

369 @ref \_\_init\_\_ is used to perform the actual merging. Finally, @ref cullPeaks is used to remove 

370 garbage peaks detected around bright objects. 

371 

372 @param[in] catalogs 

373 @param[in] patchRef 

374 @param[out] mergedList 

375 """ 

376 

377 # Convert distance to tract coordinate 

378 tractWcs = skyInfo.wcs 

379 peakDistance = self.config.minNewPeak / tractWcs.getPixelScale().asArcseconds() 

380 samePeakDistance = self.config.maxSamePeak / tractWcs.getPixelScale().asArcseconds() 

381 

382 # Put catalogs, filters in priority order 

383 orderedCatalogs = [catalogs[band] for band in self.config.priorityList if band in catalogs.keys()] 

384 orderedBands = [band for band in self.config.priorityList if band in catalogs.keys()] 

385 

386 mergedList = self.merged.getMergedSourceCatalog(orderedCatalogs, orderedBands, peakDistance, 

387 self.schema, idFactory, 

388 samePeakDistance) 

389 

390 # 

391 # Add extra sources that correspond to blank sky 

392 # 

393 skySourceFootprints = self.getSkySourceFootprints(mergedList, skyInfo, skySeed) 

394 if skySourceFootprints: 

395 key = mergedList.schema.find("merge_footprint_%s" % self.config.skyFilterName).key 

396 for foot in skySourceFootprints: 

397 s = mergedList.addNew() 

398 s.setFootprint(foot) 

399 s.set(key, True) 

400 

401 # Sort Peaks from brightest to faintest 

402 for record in mergedList: 

403 record.getFootprint().sortPeaks() 

404 self.log.info("Merged to %d sources", len(mergedList)) 

405 # Attempt to remove garbage peaks 

406 self.cullPeaks(mergedList) 

407 return Struct(outputCatalog=mergedList) 

408 

409 def cullPeaks(self, catalog): 

410 """! 

411 @brief Attempt to remove garbage peaks (mostly on the outskirts of large blends). 

412 

413 @param[in] catalog Source catalog 

414 """ 

415 keys = [item.key for item in self.merged.getPeakSchema().extract("merge_peak_*").values()] 

416 assert len(keys) > 0, "Error finding flags that associate peaks with their detection bands." 

417 totalPeaks = 0 

418 culledPeaks = 0 

419 for parentSource in catalog: 

420 # Make a list copy so we can clear the attached PeakCatalog and append the ones we're keeping 

421 # to it (which is easier than deleting as we iterate). 

422 keptPeaks = parentSource.getFootprint().getPeaks() 

423 oldPeaks = list(keptPeaks) 

424 keptPeaks.clear() 

425 familySize = len(oldPeaks) 

426 totalPeaks += familySize 

427 for rank, peak in enumerate(oldPeaks): 

428 if ((rank < self.config.cullPeaks.rankSufficient) 

429 or (sum([peak.get(k) for k in keys]) >= self.config.cullPeaks.nBandsSufficient) 

430 or (rank < self.config.cullPeaks.rankConsidered 

431 and rank < self.config.cullPeaks.rankNormalizedConsidered * familySize)): 

432 keptPeaks.append(peak) 

433 else: 

434 culledPeaks += 1 

435 self.log.info("Culled %d of %d peaks", culledPeaks, totalPeaks) 

436 

437 def getSchemaCatalogs(self): 

438 """! 

439 Return a dict of empty catalogs for each catalog dataset produced by this task. 

440 

441 @param[out] dictionary of empty catalogs 

442 """ 

443 mergeDet = afwTable.SourceCatalog(self.schema) 

444 peak = afwDetect.PeakCatalog(self.merged.getPeakSchema()) 

445 return {self.config.coaddName + "Coadd_mergeDet": mergeDet, 

446 self.config.coaddName + "Coadd_peak": peak} 

447 

448 def getSkySourceFootprints(self, mergedList, skyInfo, seed): 

449 """! 

450 @brief Return a list of Footprints of sky objects which don't overlap with anything in mergedList 

451 

452 @param mergedList The merged Footprints from all the input bands 

453 @param skyInfo A description of the patch 

454 @param seed Seed for the random number generator 

455 """ 

456 mask = afwImage.Mask(skyInfo.patchInfo.getOuterBBox()) 

457 detected = mask.getPlaneBitMask("DETECTED") 

458 for s in mergedList: 

459 s.getFootprint().spans.setMask(mask, detected) 

460 

461 footprints = self.skyObjects.run(mask, seed) 

462 if not footprints: 

463 return footprints 

464 

465 # Need to convert the peak catalog's schema so we can set the "merge_peak_<skyFilterName>" flags 

466 schema = self.merged.getPeakSchema() 

467 mergeKey = schema.find("merge_peak_%s" % self.config.skyFilterName).key 

468 converted = [] 

469 for oldFoot in footprints: 

470 assert len(oldFoot.getPeaks()) == 1, "Should be a single peak only" 

471 peak = oldFoot.getPeaks()[0] 

472 newFoot = afwDetect.Footprint(oldFoot.spans, schema) 

473 newFoot.addPeak(peak.getFx(), peak.getFy(), peak.getPeakValue()) 

474 newFoot.getPeaks()[0].set(mergeKey, True) 

475 converted.append(newFoot) 

476 

477 return converted 

478 

479 def write(self, patchRef, catalog): 

480 """! 

481 @brief Write the output. 

482 

483 @param[in] patchRef data reference for patch 

484 @param[in] catalog catalog 

485 

486 We write as the dataset provided by the 'outputDataset' 

487 class variable. 

488 """ 

489 patchRef.put(catalog, self.config.coaddName + "Coadd_" + self.outputDataset) 

490 # since the filter isn't actually part of the data ID for the dataset we're saving, 

491 # it's confusing to see it in the log message, even if the butler simply ignores it. 

492 mergeDataId = patchRef.dataId.copy() 

493 del mergeDataId["filter"] 

494 self.log.info("Wrote merged catalog: %s", mergeDataId) 

495 

496 def writeMetadata(self, dataRefList): 

497 """! 

498 @brief No metadata to write, and not sure how to write it for a list of dataRefs. 

499 """ 

500 pass