Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# 

2# LSST Data Management System 

3# Copyright 2008-2016 AURA/LSST. 

4# 

5# This product includes software developed by the 

6# LSST Project (http://www.lsst.org/). 

7# 

8# This program is free software: you can redistribute it and/or modify 

9# it under the terms of the GNU General Public License as published by 

10# the Free Software Foundation, either version 3 of the License, or 

11# (at your option) any later version. 

12# 

13# This program is distributed in the hope that it will be useful, 

14# but WITHOUT ANY WARRANTY; without even the implied warranty of 

15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

16# GNU General Public License for more details. 

17# 

18# You should have received a copy of the LSST License Statement and 

19# the GNU General Public License along with this program. If not, 

20# see <https://www.lsstcorp.org/LegalNotices/>. 

21# 

22 

23"""PipelineTask for associating DiaSources with previous DiaObjects. 

24 

25Additionally performs forced photometry on the calibrated and difference 

26images at the updated locations of DiaObjects. 

27 

28Currently loads directly from the Apdb rather than pre-loading. 

29""" 

30 

31import os 

32import pandas as pd 

33 

34import lsst.dax.apdb as daxApdb 

35from lsst.meas.base import DiaObjectCalculationTask 

36import lsst.pex.config as pexConfig 

37import lsst.pipe.base as pipeBase 

38import lsst.pipe.base.connectionTypes as connTypes 

39 

40from lsst.ap.association import ( 

41 AssociationTask, 

42 DiaForcedSourceTask, 

43 LoadDiaCatalogsTask, 

44 make_dia_object_schema, 

45 make_dia_source_schema, 

46 PackageAlertsTask) 

47 

48__all__ = ("DiaPipelineConfig", 

49 "DiaPipelineTask", 

50 "DiaPipelineConnections") 

51 

52 

53class DiaPipelineConnections( 

54 pipeBase.PipelineTaskConnections, 

55 dimensions=("instrument", "visit", "detector"), 

56 defaultTemplates={"coaddName": "deep", "fakesType": ""}): 

57 """Butler connections for DiaPipelineTask. 

58 """ 

59 diaSourceTable = connTypes.Input( 

60 doc="Catalog of calibrated DiaSources.", 

61 name="{fakesType}{coaddName}Diff_diaSrcTable", 

62 storageClass="DataFrame", 

63 dimensions=("instrument", "visit", "detector"), 

64 ) 

65 diffIm = connTypes.Input( 

66 doc="Difference image on which the DiaSources were detected.", 

67 name="{fakesType}{coaddName}Diff_differenceExp", 

68 storageClass="ExposureF", 

69 dimensions=("instrument", "visit", "detector"), 

70 ) 

71 exposure = connTypes.Input( 

72 doc="Calibrated exposure differenced with a template image during " 

73 "image differencing.", 

74 name="calexp", 

75 storageClass="ExposureF", 

76 dimensions=("instrument", "visit", "detector"), 

77 ) 

78 warpedExposure = connTypes.Input( 

79 doc="Warped template used to create `subtractedExposure`. Not PSF " 

80 "matched.", 

81 dimensions=("instrument", "visit", "detector"), 

82 storageClass="ExposureF", 

83 name="{fakesType}{coaddName}Diff_warpedExp", 

84 ) 

85 apdbMarker = connTypes.Output( 

86 doc="Marker dataset storing the configuration of the Apdb for each " 

87 "visit/detector. Used to signal the completion of the pipeline.", 

88 name="apdb_marker", 

89 storageClass="Config", 

90 dimensions=("instrument", "visit", "detector"), 

91 ) 

92 associatedDiaSources = connTypes.Output( 

93 doc="Optional output storing the DiaSource catalog after matching, " 

94 "calibration, and standardization for insertation into the Apdb.", 

95 name="{fakesType}{coaddName}Diff_assocDiaSrc", 

96 storageClass="DataFrame", 

97 dimensions=("instrument", "visit", "detector"), 

98 ) 

99 

100 def __init__(self, *, config=None): 

101 super().__init__(config=config) 

102 

103 if not config.doWriteAssociatedSources: 

104 self.outputs.remove("associatedDiaSources") 

105 

106 def adjustQuantum(self, inputs, outputs, label, dataId): 

107 """Override to make adjustments to `lsst.daf.butler.DatasetRef` objects 

108 in the `lsst.daf.butler.core.Quantum` during the graph generation stage 

109 of the activator. 

110 

111 This implementation checks to make sure that the filters in the dataset 

112 are compatible with AP processing as set by the Apdb/DPDD schema. 

113 

114 Parameters 

115 ---------- 

116 inputs : `dict` 

117 Dictionary whose keys are an input (regular or prerequisite) 

118 connection name and whose values are a tuple of the connection 

119 instance and a collection of associated `DatasetRef` objects. 

120 The exact type of the nested collections is unspecified; it can be 

121 assumed to be multi-pass iterable and support `len` and ``in``, but 

122 it should not be mutated in place. In contrast, the outer 

123 dictionaries are guaranteed to be temporary copies that are true 

124 `dict` instances, and hence may be modified and even returned; this 

125 is especially useful for delegating to `super` (see notes below). 

126 outputs : `dict` 

127 Dict of output datasets, with the same structure as ``inputs``. 

128 label : `str` 

129 Label for this task in the pipeline (should be used in all 

130 diagnostic messages). 

131 data_id : `lsst.daf.butler.DataCoordinate` 

132 Data ID for this quantum in the pipeline (should be used in all 

133 diagnostic messages). 

134 

135 Returns 

136 ------- 

137 adjusted_inputs : `dict` 

138 Dict of the same form as ``inputs`` with updated containers of 

139 input `DatasetRef` objects. Connections that are not changed 

140 should not be returned at all. Datasets may only be removed, not 

141 added. Nested collections may be of any multi-pass iterable type, 

142 and the order of iteration will set the order of iteration within 

143 `PipelineTask.runQuantum`. 

144 adjusted_outputs : `dict` 

145 Dict of updated output datasets, with the same structure and 

146 interpretation as ``adjusted_inputs``. 

147 

148 Raises 

149 ------ 

150 ScalarError 

151 Raised if any `Input` or `PrerequisiteInput` connection has 

152 ``multiple`` set to `False`, but multiple datasets. 

153 NoWorkFound 

154 Raised to indicate that this quantum should not be run; not enough 

155 datasets were found for a regular `Input` connection, and the 

156 quantum should be pruned or skipped. 

157 FileNotFoundError 

158 Raised to cause QuantumGraph generation to fail (with the message 

159 included in this exception); not enough datasets were found for a 

160 `PrerequisiteInput` connection. 

161 """ 

162 _, refs = inputs["diffIm"] 

163 for ref in refs: 

164 if ref.dataId["band"] not in self.config.validBands: 

165 raise ValueError( 

166 f"Requested '{ref.dataId['band']}' not in " 

167 "DiaPipelineConfig.validBands. To process bands not in " 

168 "the standard Rubin set (ugrizy) you must add the band to " 

169 "the validBands list in DiaPipelineConfig and add the " 

170 "appropriate columns to the Apdb schema.") 

171 return super().adjustQuantum(inputs, outputs, label, dataId) 

172 

173 

174class DiaPipelineConfig(pipeBase.PipelineTaskConfig, 

175 pipelineConnections=DiaPipelineConnections): 

176 """Config for DiaPipelineTask. 

177 """ 

178 coaddName = pexConfig.Field( 

179 doc="coadd name: typically one of deep, goodSeeing, or dcr", 

180 dtype=str, 

181 default="deep", 

182 ) 

183 apdb = pexConfig.ConfigurableField( 

184 target=daxApdb.Apdb, 

185 ConfigClass=daxApdb.ApdbConfig, 

186 doc="Database connection for storing associated DiaSources and " 

187 "DiaObjects. Must already be initialized.", 

188 ) 

189 validBands = pexConfig.ListField( 

190 dtype=str, 

191 default=["u", "g", "r", "i", "z", "y"], 

192 doc="List of bands that are valid for AP processing. To process a " 

193 "band not on this list, the appropriate band specific columns " 

194 "must be added to the Apdb schema in dax_apdb.", 

195 ) 

196 diaCatalogLoader = pexConfig.ConfigurableField( 

197 target=LoadDiaCatalogsTask, 

198 doc="Task to load DiaObjects and DiaSources from the Apdb.", 

199 ) 

200 associator = pexConfig.ConfigurableField( 

201 target=AssociationTask, 

202 doc="Task used to associate DiaSources with DiaObjects.", 

203 ) 

204 diaCalculation = pexConfig.ConfigurableField( 

205 target=DiaObjectCalculationTask, 

206 doc="Task to compute summary statistics for DiaObjects.", 

207 ) 

208 diaForcedSource = pexConfig.ConfigurableField( 

209 target=DiaForcedSourceTask, 

210 doc="Task used for force photometer DiaObject locations in direct and " 

211 "difference images.", 

212 ) 

213 alertPackager = pexConfig.ConfigurableField( 

214 target=PackageAlertsTask, 

215 doc="Subtask for packaging Ap data into alerts.", 

216 ) 

217 doPackageAlerts = pexConfig.Field( 

218 dtype=bool, 

219 default=False, 

220 doc="Package Dia-data into serialized alerts for distribution and " 

221 "write them to disk.", 

222 ) 

223 doWriteAssociatedSources = pexConfig.Field( 

224 dtype=bool, 

225 default=False, 

226 doc="Write out associated and SDMed DiaSources.", 

227 ) 

228 

229 def setDefaults(self): 

230 self.apdb.dia_object_index = "baseline" 

231 self.apdb.dia_object_columns = [] 

232 self.apdb.extra_schema_file = os.path.join( 

233 "${AP_ASSOCIATION_DIR}", 

234 "data", 

235 "apdb-ap-pipe-schema-extra.yaml") 

236 self.diaCalculation.plugins = ["ap_meanPosition", 

237 "ap_HTMIndex", 

238 "ap_nDiaSources", 

239 "ap_diaObjectFlag", 

240 "ap_meanFlux", 

241 "ap_percentileFlux", 

242 "ap_sigmaFlux", 

243 "ap_chi2Flux", 

244 "ap_madFlux", 

245 "ap_skewFlux", 

246 "ap_minMaxFlux", 

247 "ap_maxSlopeFlux", 

248 "ap_meanErrFlux", 

249 "ap_linearFit", 

250 "ap_stetsonJ", 

251 "ap_meanTotFlux", 

252 "ap_sigmaTotFlux"] 

253 

254 def validate(self): 

255 pexConfig.Config.validate(self) 

256 if self.diaCatalogLoader.htmLevel != \ 

257 self.diaCalculation.plugins["ap_HTMIndex"].htmLevel: 

258 raise ValueError("HTM index level in LoadDiaCatalogsTask must be " 

259 "equal to HTMIndexDiaCalculationPlugin index " 

260 "level.") 

261 if "ap_HTMIndex" not in self.diaCalculation.plugins: 

262 raise ValueError("DiaPipe requires the ap_HTMIndex plugin " 

263 "be enabled for proper insertion into the Apdb.") 

264 

265 

266class DiaPipelineTask(pipeBase.PipelineTask): 

267 """Task for loading, associating and storing Difference Image Analysis 

268 (DIA) Objects and Sources. 

269 """ 

270 ConfigClass = DiaPipelineConfig 

271 _DefaultName = "diaPipe" 

272 RunnerClass = pipeBase.ButlerInitializedTaskRunner 

273 

274 def __init__(self, initInputs=None, **kwargs): 

275 super().__init__(**kwargs) 

276 self.apdb = self.config.apdb.apply( 

277 afw_schemas=dict(DiaObject=make_dia_object_schema(), 

278 DiaSource=make_dia_source_schema())) 

279 self.makeSubtask("diaCatalogLoader") 

280 self.makeSubtask("associator") 

281 self.makeSubtask("diaCalculation") 

282 self.makeSubtask("diaForcedSource") 

283 if self.config.doPackageAlerts: 

284 self.makeSubtask("alertPackager") 

285 

286 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

287 inputs = butlerQC.get(inputRefs) 

288 expId, expBits = butlerQC.quantum.dataId.pack("visit_detector", 

289 returnMaxBits=True) 

290 inputs["ccdExposureIdBits"] = expBits 

291 inputs["band"] = butlerQC.quantum.dataId["band"] 

292 

293 outputs = self.run(**inputs) 

294 

295 butlerQC.put(outputs, outputRefs) 

296 

297 @pipeBase.timeMethod 

298 def run(self, 

299 diaSourceTable, 

300 diffIm, 

301 exposure, 

302 warpedExposure, 

303 ccdExposureIdBits, 

304 band): 

305 """Process DiaSources and DiaObjects. 

306 

307 Load previous DiaObjects and their DiaSource history. Calibrate the 

308 values in the diaSourceCat. Associate new DiaSources with previous 

309 DiaObjects. Run forced photometry at the updated DiaObject locations. 

310 Store the results in the Alert Production Database (Apdb). 

311 

312 Parameters 

313 ---------- 

314 diaSourceTable : `pandas.DataFrame` 

315 Newly detected DiaSources. 

316 diffIm : `lsst.afw.image.ExposureF` 

317 Difference image exposure in which the sources in ``diaSourceCat`` 

318 were detected. 

319 exposure : `lsst.afw.image.ExposureF` 

320 Calibrated exposure differenced with a template to create 

321 ``diffIm``. 

322 warpedExposure : `lsst.afw.image.ExposureF` 

323 Template exposure used to create diffIm. 

324 ccdExposureIdBits : `int` 

325 Number of bits used for a unique ``ccdVisitId``. 

326 band : `str` 

327 The band in which the new DiaSources were detected. 

328 

329 Returns 

330 ------- 

331 results : `lsst.pipe.base.Struct` 

332 Results struct with components. 

333 

334 - ``apdb_maker`` : Marker dataset to store in the Butler indicating 

335 that this ccdVisit has completed successfully. 

336 (`lsst.dax.apdb.ApdbConfig`) 

337 - ``associatedDiaSources`` : Catalog of newly associated 

338 DiaSources. (`pandas.DataFrame`) 

339 """ 

340 self.log.info("Running DiaPipeline...") 

341 # Put the SciencePipelines through a SDMification step and return 

342 # calibrated columns with the expect output database names. 

343 

344 # Load the DiaObjects and DiaSource history. 

345 loaderResult = self.diaCatalogLoader.run(diffIm, self.apdb) 

346 

347 # Associate new DiaSources with existing DiaObjects. 

348 assocResults = self.associator.run(diaSourceTable, 

349 loaderResult.diaObjects) 

350 

351 # Create new DiaObjects from unassociated diaSources. 

352 createResults = self.createNewDiaObjects(assocResults.diaSources) 

353 self._add_association_meta_data(assocResults.nUpdatedDiaObjects, 

354 assocResults.nUnassociatedDiaObjects, 

355 len(createResults.newDiaObjects)) 

356 

357 # Index the DiaSource catalog for this visit after all associations 

358 # have been made. 

359 updatedDiaObjectIds = createResults.diaSources["diaObjectId"][ 

360 createResults.diaSources["diaObjectId"] != 0].to_numpy() 

361 diaSources = createResults.diaSources.set_index(["diaObjectId", 

362 "filterName", 

363 "diaSourceId"], 

364 drop=False) 

365 

366 # Append new DiaObjects and DiaSources to their previous history. 

367 diaObjects = loaderResult.diaObjects.append( 

368 createResults.newDiaObjects.set_index("diaObjectId", drop=False), 

369 sort=True) 

370 if self.testDataFrameIndex(diaObjects): 

371 raise RuntimeError( 

372 "Duplicate DiaObjects created after association. This is " 

373 "likely due to re-running data with an already populated " 

374 "Apdb. If this was not the case then there was an unexpected " 

375 "failure in Association while matching and creating new " 

376 "DiaObjects and should be reported. Exiting.") 

377 mergedDiaSourceHistory = loaderResult.diaSources.append( 

378 diaSources, 

379 sort=True) 

380 # Test for DiaSource duplication first. If duplicates are found, 

381 # this likely means this is duplicate data being processed and sent 

382 # to the Apdb. 

383 if self.testDataFrameIndex(mergedDiaSourceHistory): 

384 raise RuntimeError( 

385 "Duplicate DiaSources found after association and merging " 

386 "with history. This is likely due to re-running data with an " 

387 "already populated Apdb. If this was not the case then there " 

388 "was an unexpected failure in Association while matching " 

389 "sources to objects, and should be reported. Exiting.") 

390 

391 # Compute DiaObject Summary statistics from their full DiaSource 

392 # history. 

393 diaCalResult = self.diaCalculation.run( 

394 diaObjects, 

395 mergedDiaSourceHistory, 

396 updatedDiaObjectIds, 

397 [band]) 

398 # Test for duplication in the updated DiaObjects. 

399 if self.testDataFrameIndex(diaCalResult.diaObjectCat): 

400 raise RuntimeError( 

401 "Duplicate DiaObjects (loaded + updated) created after " 

402 "DiaCalculation. This is unexpected behavior and should be " 

403 "reported. Existing.") 

404 if self.testDataFrameIndex(diaCalResult.updatedDiaObjects): 

405 raise RuntimeError( 

406 "Duplicate DiaObjects (updated) created after " 

407 "DiaCalculation. This is unexpected behavior and should be " 

408 "reported. Existing.") 

409 

410 # Force photometer on the Difference and Calibrated exposures using 

411 # the new and updated DiaObject locations. 

412 diaForcedSources = self.diaForcedSource.run( 

413 diaCalResult.diaObjectCat, 

414 diaCalResult.updatedDiaObjects.loc[:, "diaObjectId"].to_numpy(), 

415 ccdExposureIdBits, 

416 exposure, 

417 diffIm) 

418 

419 # Store DiaSources and updated DiaObjects in the Apdb. 

420 self.apdb.storeDiaSources(diaSources) 

421 self.apdb.storeDiaObjects( 

422 diaCalResult.updatedDiaObjects, 

423 exposure.getInfo().getVisitInfo().getDate().toPython()) 

424 self.apdb.storeDiaForcedSources(diaForcedSources) 

425 

426 if self.config.doPackageAlerts: 

427 if len(loaderResult.diaForcedSources) > 1: 

428 diaForcedSources = diaForcedSources.append( 

429 loaderResult.diaForcedSources, 

430 sort=True) 

431 if self.testDataFrameIndex(diaForcedSources): 

432 self.log.warn( 

433 "Duplicate DiaForcedSources created after merge with " 

434 "history and new sources. This may cause downstream " 

435 "problems. Dropping duplicates.") 

436 # Drop duplicates via index and keep the first appearance. 

437 # Reset due to the index shape being slight different than 

438 # expected. 

439 diaForcedSources = diaForcedSources.groupby( 

440 diaForcedSources.index).first() 

441 diaForcedSources.reset_index(drop=True, inplace=True) 

442 diaForcedSources.set_index( 

443 ["diaObjectId", "diaForcedSourceId"], 

444 drop=False, 

445 inplace=True) 

446 self.alertPackager.run(diaSources, 

447 diaCalResult.diaObjectCat, 

448 loaderResult.diaSources, 

449 diaForcedSources, 

450 diffIm, 

451 warpedExposure, 

452 ccdExposureIdBits) 

453 

454 return pipeBase.Struct(apdbMarker=self.config.apdb.value, 

455 associatedDiaSources=diaSources) 

456 

457 def createNewDiaObjects(self, diaSources): 

458 """Loop through the set of DiaSources and create new DiaObjects 

459 for unassociated DiaSources. 

460 

461 Parameters 

462 ---------- 

463 diaSources : `pandas.DataFrame` 

464 Set of DiaSources to create new DiaObjects from. 

465 

466 Returns 

467 ------- 

468 results : `lsst.pipe.base.Struct` 

469 Results struct containing: 

470 

471 - ``diaSources`` : DiaSource catalog with updated DiaObject ids. 

472 (`pandas.DataFrame`) 

473 - ``newDiaObjects`` : Newly created DiaObjects from the 

474 unassociated DiaSources. (`pandas.DataFrame`) 

475 """ 

476 newDiaObjectsList = [] 

477 for idx, diaSource in diaSources.iterrows(): 

478 if diaSource["diaObjectId"] == 0: 

479 newDiaObjectsList.append( 

480 self._initialize_dia_object(diaSource["diaSourceId"])) 

481 diaSources.loc[idx, "diaObjectId"] = diaSource["diaSourceId"] 

482 if len(newDiaObjectsList) > 0: 

483 newDiaObjects = pd.DataFrame(data=newDiaObjectsList) 

484 else: 

485 tmpObj = self._initialize_dia_object(0) 

486 newDiaObjects = pd.DataFrame(data=newDiaObjectsList, 

487 columns=tmpObj.keys()) 

488 return pipeBase.Struct(diaSources=diaSources, 

489 newDiaObjects=pd.DataFrame(data=newDiaObjects)) 

490 

491 def _initialize_dia_object(self, objId): 

492 """Create a new DiaObject with values required to be initialized by the 

493 Ppdb. 

494 

495 Parameters 

496 ---------- 

497 objid : `int` 

498 ``diaObjectId`` value for the of the new DiaObject. 

499 

500 Returns 

501 ------- 

502 diaObject : `dict` 

503 Newly created DiaObject with keys: 

504 

505 ``diaObjectId`` 

506 Unique DiaObjectId (`int`). 

507 ``pmParallaxNdata`` 

508 Number of data points used for parallax calculation (`int`). 

509 ``nearbyObj1`` 

510 Id of the a nearbyObject in the Object table (`int`). 

511 ``nearbyObj2`` 

512 Id of the a nearbyObject in the Object table (`int`). 

513 ``nearbyObj3`` 

514 Id of the a nearbyObject in the Object table (`int`). 

515 ``?PSFluxData`` 

516 Number of data points used to calculate point source flux 

517 summary statistics in each bandpass (`int`). 

518 """ 

519 new_dia_object = {"diaObjectId": objId, 

520 "pmParallaxNdata": 0, 

521 "nearbyObj1": 0, 

522 "nearbyObj2": 0, 

523 "nearbyObj3": 0, 

524 "flags": 0} 

525 for f in ["u", "g", "r", "i", "z", "y"]: 

526 new_dia_object["%sPSFluxNdata" % f] = 0 

527 return new_dia_object 

528 

529 def testDataFrameIndex(self, df): 

530 """Test the sorted DataFrame index for duplicates. 

531 

532 Wrapped as a separate function to allow for mocking of the this task 

533 in unittesting. Default of a mock return for this test is True. 

534 

535 Parameters 

536 ---------- 

537 df : `pandas.DataFrame` 

538 DataFrame to text. 

539 

540 Returns 

541 ------- 

542 `bool` 

543 True if DataFrame contains duplicate rows. 

544 """ 

545 return df.index.has_duplicates 

546 

547 def _add_association_meta_data(self, 

548 nUpdatedDiaObjects, 

549 nUnassociatedDiaObjects, 

550 nNewDiaObjects): 

551 """Store summaries of the association step in the task metadata. 

552 

553 Parameters 

554 ---------- 

555 nUpdatedDiaObjects : `int` 

556 Number of previous DiaObjects associated and updated in this 

557 ccdVisit. 

558 nUnassociatedDiaObjects : `int` 

559 Number of previous DiaObjects that were not associated or updated 

560 in this ccdVisit. 

561 nNewDiaObjects : `int` 

562 Number of newly created DiaObjects for this ccdVisit. 

563 """ 

564 self.metadata.add('numUpdatedDiaObjects', nUpdatedDiaObjects) 

565 self.metadata.add('numUnassociatedDiaObjects', nUnassociatedDiaObjects) 

566 self.metadata.add('numNewDiaObjects', nNewDiaObjects)