Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# 

2# LSST Data Management System 

3# Copyright 2008-2016 AURA/LSST. 

4# 

5# This product includes software developed by the 

6# LSST Project (http://www.lsst.org/). 

7# 

8# This program is free software: you can redistribute it and/or modify 

9# it under the terms of the GNU General Public License as published by 

10# the Free Software Foundation, either version 3 of the License, or 

11# (at your option) any later version. 

12# 

13# This program is distributed in the hope that it will be useful, 

14# but WITHOUT ANY WARRANTY; without even the implied warranty of 

15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

16# GNU General Public License for more details. 

17# 

18# You should have received a copy of the LSST License Statement and 

19# the GNU General Public License along with this program. If not, 

20# see <https://www.lsstcorp.org/LegalNotices/>. 

21# 

22 

23"""PipelineTask for associating DiaSources with previous DiaObjects. 

24 

25Additionally performs forced photometry on the calibrated and difference 

26images at the updated locations of DiaObjects. 

27 

28Currently loads directly from the Apdb rather than pre-loading. 

29""" 

30 

31import os 

32 

33import lsst.dax.apdb as daxApdb 

34from lsst.meas.base import DiaObjectCalculationTask 

35import lsst.pex.config as pexConfig 

36import lsst.pipe.base as pipeBase 

37import lsst.pipe.base.connectionTypes as connTypes 

38 

39from lsst.ap.association import ( 

40 AssociationTask, 

41 DiaForcedSourceTask, 

42 LoadDiaCatalogsTask, 

43 make_dia_object_schema, 

44 make_dia_source_schema, 

45 PackageAlertsTask) 

46 

47__all__ = ("DiaPipelineConfig", 

48 "DiaPipelineTask", 

49 "DiaPipelineConnections") 

50 

51 

52class DiaPipelineConnections( 

53 pipeBase.PipelineTaskConnections, 

54 dimensions=("instrument", "visit", "detector"), 

55 defaultTemplates={"coaddName": "deep", "fakesType": ""}): 

56 """Butler connections for DiaPipelineTask. 

57 """ 

58 diaSourceTable = connTypes.Input( 

59 doc="Catalog of calibrated DiaSources.", 

60 name="{fakesType}{coaddName}Diff_diaSrcTable", 

61 storageClass="DataFrame", 

62 dimensions=("instrument", "visit", "detector"), 

63 ) 

64 diffIm = connTypes.Input( 

65 doc="Difference image on which the DiaSources were detected.", 

66 name="{fakesType}{coaddName}Diff_differenceExp", 

67 storageClass="ExposureF", 

68 dimensions=("instrument", "visit", "detector"), 

69 ) 

70 exposure = connTypes.Input( 

71 doc="Calibrated exposure differenced with a template image during " 

72 "image differencing.", 

73 name="calexp", 

74 storageClass="ExposureF", 

75 dimensions=("instrument", "visit", "detector"), 

76 ) 

77 warpedExposure = connTypes.Input( 

78 doc="Warped template used to create `subtractedExposure`. Not PSF " 

79 "matched.", 

80 dimensions=("instrument", "visit", "detector"), 

81 storageClass="ExposureF", 

82 name="{fakesType}{coaddName}Diff_warpedExp", 

83 ) 

84 apdbMarker = connTypes.Output( 

85 doc="Marker dataset storing the configuration of the Apdb for each " 

86 "visit/detector. Used to signal the completion of the pipeline.", 

87 name="apdb_marker", 

88 storageClass="Config", 

89 dimensions=("instrument", "visit", "detector"), 

90 ) 

91 associatedDiaSources = connTypes.Output( 

92 doc="Optional output storing the DiaSource catalog after matching, " 

93 "calibration, and standardization for insertation into the Apdb.", 

94 name="{fakesType}{coaddName}Diff_assocDiaSrc", 

95 storageClass="DataFrame", 

96 dimensions=("instrument", "visit", "detector"), 

97 ) 

98 

99 def __init__(self, *, config=None): 

100 super().__init__(config=config) 

101 

102 if not config.doWriteAssociatedSources: 

103 self.outputs.remove("associatedDiaSources") 

104 

105 def adjustQuantum(self, inputs, outputs, label, dataId): 

106 """Override to make adjustments to `lsst.daf.butler.DatasetRef` objects 

107 in the `lsst.daf.butler.core.Quantum` during the graph generation stage 

108 of the activator. 

109 

110 This implementation checks to make sure that the filters in the dataset 

111 are compatible with AP processing as set by the Apdb/DPDD schema. 

112 

113 Parameters 

114 ---------- 

115 inputs : `dict` 

116 Dictionary whose keys are an input (regular or prerequisite) 

117 connection name and whose values are a tuple of the connection 

118 instance and a collection of associated `DatasetRef` objects. 

119 The exact type of the nested collections is unspecified; it can be 

120 assumed to be multi-pass iterable and support `len` and ``in``, but 

121 it should not be mutated in place. In contrast, the outer 

122 dictionaries are guaranteed to be temporary copies that are true 

123 `dict` instances, and hence may be modified and even returned; this 

124 is especially useful for delegating to `super` (see notes below). 

125 outputs : `dict` 

126 Dict of output datasets, with the same structure as ``inputs``. 

127 label : `str` 

128 Label for this task in the pipeline (should be used in all 

129 diagnostic messages). 

130 data_id : `lsst.daf.butler.DataCoordinate` 

131 Data ID for this quantum in the pipeline (should be used in all 

132 diagnostic messages). 

133 

134 Returns 

135 ------- 

136 adjusted_inputs : `dict` 

137 Dict of the same form as ``inputs`` with updated containers of 

138 input `DatasetRef` objects. Connections that are not changed 

139 should not be returned at all. Datasets may only be removed, not 

140 added. Nested collections may be of any multi-pass iterable type, 

141 and the order of iteration will set the order of iteration within 

142 `PipelineTask.runQuantum`. 

143 adjusted_outputs : `dict` 

144 Dict of updated output datasets, with the same structure and 

145 interpretation as ``adjusted_inputs``. 

146 

147 Raises 

148 ------ 

149 ScalarError 

150 Raised if any `Input` or `PrerequisiteInput` connection has 

151 ``multiple`` set to `False`, but multiple datasets. 

152 NoWorkFound 

153 Raised to indicate that this quantum should not be run; not enough 

154 datasets were found for a regular `Input` connection, and the 

155 quantum should be pruned or skipped. 

156 FileNotFoundError 

157 Raised to cause QuantumGraph generation to fail (with the message 

158 included in this exception); not enough datasets were found for a 

159 `PrerequisiteInput` connection. 

160 """ 

161 _, refs = inputs["diffIm"] 

162 for ref in refs: 

163 if ref.dataId["band"] not in self.config.validBands: 

164 raise ValueError( 

165 f"Requested '{ref.dataId['band']}' not in " 

166 "DiaPipelineConfig.validBands. To process bands not in " 

167 "the standard Rubin set (ugrizy) you must add the band to " 

168 "the validBands list in DiaPipelineConfig and add the " 

169 "appropriate columns to the Apdb schema.") 

170 return super().adjustQuantum(inputs, outputs, label, dataId) 

171 

172 

173class DiaPipelineConfig(pipeBase.PipelineTaskConfig, 

174 pipelineConnections=DiaPipelineConnections): 

175 """Config for DiaPipelineTask. 

176 """ 

177 coaddName = pexConfig.Field( 

178 doc="coadd name: typically one of deep, goodSeeing, or dcr", 

179 dtype=str, 

180 default="deep", 

181 ) 

182 apdb = pexConfig.ConfigurableField( 

183 target=daxApdb.Apdb, 

184 ConfigClass=daxApdb.ApdbConfig, 

185 doc="Database connection for storing associated DiaSources and " 

186 "DiaObjects. Must already be initialized.", 

187 ) 

188 validBands = pexConfig.ListField( 

189 dtype=str, 

190 default=["u", "g", "r", "i", "z", "y"], 

191 doc="List of bands that are valid for AP processing. To process a " 

192 "band not on this list, the appropriate band specific columns " 

193 "must be added to the Apdb schema in dax_apdb.", 

194 ) 

195 diaCatalogLoader = pexConfig.ConfigurableField( 

196 target=LoadDiaCatalogsTask, 

197 doc="Task to load DiaObjects and DiaSources from the Apdb.", 

198 ) 

199 associator = pexConfig.ConfigurableField( 

200 target=AssociationTask, 

201 doc="Task used to associate DiaSources with DiaObjects.", 

202 ) 

203 diaCalculation = pexConfig.ConfigurableField( 

204 target=DiaObjectCalculationTask, 

205 doc="Task to compute summary statistics for DiaObjects.", 

206 ) 

207 diaForcedSource = pexConfig.ConfigurableField( 

208 target=DiaForcedSourceTask, 

209 doc="Task used for force photometer DiaObject locations in direct and " 

210 "difference images.", 

211 ) 

212 alertPackager = pexConfig.ConfigurableField( 

213 target=PackageAlertsTask, 

214 doc="Subtask for packaging Ap data into alerts.", 

215 ) 

216 doPackageAlerts = pexConfig.Field( 

217 dtype=bool, 

218 default=False, 

219 doc="Package Dia-data into serialized alerts for distribution and " 

220 "write them to disk.", 

221 ) 

222 doWriteAssociatedSources = pexConfig.Field( 

223 dtype=bool, 

224 default=False, 

225 doc="Write out associated and SDMed DiaSources.", 

226 ) 

227 

228 def setDefaults(self): 

229 self.apdb.dia_object_index = "baseline" 

230 self.apdb.dia_object_columns = [] 

231 self.apdb.extra_schema_file = os.path.join( 

232 "${AP_ASSOCIATION_DIR}", 

233 "data", 

234 "apdb-ap-pipe-schema-extra.yaml") 

235 self.diaCalculation.plugins = ["ap_meanPosition", 

236 "ap_HTMIndex", 

237 "ap_nDiaSources", 

238 "ap_diaObjectFlag", 

239 "ap_meanFlux", 

240 "ap_percentileFlux", 

241 "ap_sigmaFlux", 

242 "ap_chi2Flux", 

243 "ap_madFlux", 

244 "ap_skewFlux", 

245 "ap_minMaxFlux", 

246 "ap_maxSlopeFlux", 

247 "ap_meanErrFlux", 

248 "ap_linearFit", 

249 "ap_stetsonJ", 

250 "ap_meanTotFlux", 

251 "ap_sigmaTotFlux"] 

252 

253 def validate(self): 

254 pexConfig.Config.validate(self) 

255 if self.diaCatalogLoader.htmLevel != \ 

256 self.diaCalculation.plugins["ap_HTMIndex"].htmLevel: 

257 raise ValueError("HTM index level in LoadDiaCatalogsTask must be " 

258 "equal to HTMIndexDiaCalculationPlugin index " 

259 "level.") 

260 if "ap_HTMIndex" not in self.diaCalculation.plugins: 

261 raise ValueError("DiaPipe requires the ap_HTMIndex plugin " 

262 "be enabled for proper insertion into the Apdb.") 

263 

264 

265class DiaPipelineTask(pipeBase.PipelineTask): 

266 """Task for loading, associating and storing Difference Image Analysis 

267 (DIA) Objects and Sources. 

268 """ 

269 ConfigClass = DiaPipelineConfig 

270 _DefaultName = "diaPipe" 

271 RunnerClass = pipeBase.ButlerInitializedTaskRunner 

272 

273 def __init__(self, initInputs=None, **kwargs): 

274 super().__init__(**kwargs) 

275 self.apdb = self.config.apdb.apply( 

276 afw_schemas=dict(DiaObject=make_dia_object_schema(), 

277 DiaSource=make_dia_source_schema())) 

278 self.makeSubtask("diaCatalogLoader") 

279 self.makeSubtask("associator") 

280 self.makeSubtask("diaCalculation") 

281 self.makeSubtask("diaForcedSource") 

282 if self.config.doPackageAlerts: 

283 self.makeSubtask("alertPackager") 

284 

285 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

286 inputs = butlerQC.get(inputRefs) 

287 expId, expBits = butlerQC.quantum.dataId.pack("visit_detector", 

288 returnMaxBits=True) 

289 inputs["ccdExposureIdBits"] = expBits 

290 inputs["band"] = butlerQC.quantum.dataId["band"] 

291 

292 outputs = self.run(**inputs) 

293 

294 butlerQC.put(outputs, outputRefs) 

295 

296 @pipeBase.timeMethod 

297 def run(self, 

298 diaSourceTable, 

299 diffIm, 

300 exposure, 

301 warpedExposure, 

302 ccdExposureIdBits, 

303 band): 

304 """Process DiaSources and DiaObjects. 

305 

306 Load previous DiaObjects and their DiaSource history. Calibrate the 

307 values in the diaSourceCat. Associate new DiaSources with previous 

308 DiaObjects. Run forced photometry at the updated DiaObject locations. 

309 Store the results in the Alert Production Database (Apdb). 

310 

311 Parameters 

312 ---------- 

313 diaSourceTable : `pandas.DataFrame` 

314 Newly detected DiaSources. 

315 diffIm : `lsst.afw.image.ExposureF` 

316 Difference image exposure in which the sources in ``diaSourceCat`` 

317 were detected. 

318 exposure : `lsst.afw.image.ExposureF` 

319 Calibrated exposure differenced with a template to create 

320 ``diffIm``. 

321 warpedExposure : `lsst.afw.image.ExposureF` 

322 Template exposure used to create diffIm. 

323 ccdExposureIdBits : `int` 

324 Number of bits used for a unique ``ccdVisitId``. 

325 

326 Returns 

327 ------- 

328 results : `lsst.pipe.base.Struct` 

329 Results struct with components. 

330 

331 - ``apdb_maker`` : Marker dataset to store in the Butler indicating 

332 that this ccdVisit has completed successfully. 

333 (`lsst.dax.apdb.ApdbConfig`) 

334 """ 

335 self.log.info("Running DiaPipeline...") 

336 # Put the SciencePipelines through a SDMification step and return 

337 # calibrated columns with the expect output database names. 

338 

339 # Load the DiaObjects and DiaSource history. 

340 loaderResult = self.diaCatalogLoader.run(diffIm, self.apdb) 

341 

342 # Associate new DiaSources with existing DiaObjects and update 

343 # DiaObject summary statistics using the full DiaSource history. 

344 assocResults = self.associator.run(diaSourceTable, 

345 loaderResult.diaObjects, 

346 loaderResult.diaSources) 

347 

348 mergedDiaSourceHistory = loaderResult.diaSources.append( 

349 assocResults.diaSources, 

350 sort=True) 

351 # Test for DiaSource duplication first. If duplicates are found, 

352 # this likely means this is duplicate data being processed and sent 

353 # to the Apdb. 

354 if self.testDataFrameIndex(mergedDiaSourceHistory): 

355 raise RuntimeError( 

356 "Duplicate DiaSources found after association and merging " 

357 "with history. This is likely due to re-running data with an " 

358 "already populated Apdb. If this was not the case then there " 

359 "was an unexpected failure in Association while matching " 

360 "sources to objects, and should be reported. Exiting.") 

361 

362 diaCalResult = self.diaCalculation.run( 

363 assocResults.diaObjects, 

364 mergedDiaSourceHistory, 

365 assocResults.matchedDiaObjectIds, 

366 [band]) 

367 if self.testDataFrameIndex(diaCalResult.diaObjectCat): 

368 raise RuntimeError( 

369 "Duplicate DiaObjects (loaded + updated) created after " 

370 "DiaCalculation. This is unexpected behavior and should be " 

371 "reported. Existing.") 

372 if self.testDataFrameIndex(diaCalResult.updatedDiaObjects): 

373 raise RuntimeError( 

374 "Duplicate DiaObjects (updated) created after " 

375 "DiaCalculation. This is unexpected behavior and should be " 

376 "reported. Existing.") 

377 

378 # Force photometer on the Difference and Calibrated exposures using 

379 # the new and updated DiaObject locations. 

380 diaForcedSources = self.diaForcedSource.run( 

381 diaCalResult.diaObjectCat, 

382 diaCalResult.updatedDiaObjects.loc[:, "diaObjectId"].to_numpy(), 

383 ccdExposureIdBits, 

384 exposure, 

385 diffIm) 

386 

387 # Store DiaSources and updated DiaObjects in the Apdb. 

388 self.apdb.storeDiaSources(assocResults.diaSources) 

389 self.apdb.storeDiaObjects( 

390 diaCalResult.updatedDiaObjects, 

391 exposure.getInfo().getVisitInfo().getDate().toPython()) 

392 self.apdb.storeDiaForcedSources(diaForcedSources) 

393 

394 if self.config.doPackageAlerts: 

395 if len(loaderResult.diaForcedSources) > 1: 

396 diaForcedSources = diaForcedSources.append( 

397 loaderResult.diaForcedSources, 

398 sort=True) 

399 if self.testDataFrameIndex(diaForcedSources): 

400 self.log.warn( 

401 "Duplicate DiaForcedSources created after merge with " 

402 "history and new sources. This may cause downstream " 

403 "problems. Dropping duplicates.") 

404 # Drop duplicates via index and keep the first appearance. 

405 # Reset due to the index shape being slight different than 

406 # expected. 

407 diaForcedSources = diaForcedSources.groupby( 

408 diaForcedSources.index).first() 

409 diaForcedSources.reset_index(drop=True, inplace=True) 

410 diaForcedSources.set_index( 

411 ["diaObjectId", "diaForcedSourceId"], 

412 drop=False, 

413 inplace=True) 

414 self.alertPackager.run(assocResults.diaSources, 

415 diaCalResult.diaObjectCat, 

416 loaderResult.diaSources, 

417 diaForcedSources, 

418 diffIm, 

419 warpedExposure, 

420 ccdExposureIdBits) 

421 

422 return pipeBase.Struct(apdbMarker=self.config.apdb.value, 

423 associatedDiaSources=assocResults.diaSources) 

424 

425 def testDataFrameIndex(self, df): 

426 """Test the sorted DataFrame index for duplicates. 

427 

428 Wrapped as a separate function to allow for mocking of the this task 

429 in unittesting. Default of a mock return for this test is True. 

430 

431 Parameters 

432 ---------- 

433 df : `pandas.DataFrame` 

434 DataFrame to text. 

435 

436 Returns 

437 ------- 

438 `bool` 

439 True if DataFrame contains duplicate rows. 

440 """ 

441 return df.index.has_duplicates