Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# 

2# LSST Data Management System 

3# Copyright 2008-2016 AURA/LSST. 

4# 

5# This product includes software developed by the 

6# LSST Project (http://www.lsst.org/). 

7# 

8# This program is free software: you can redistribute it and/or modify 

9# it under the terms of the GNU General Public License as published by 

10# the Free Software Foundation, either version 3 of the License, or 

11# (at your option) any later version. 

12# 

13# This program is distributed in the hope that it will be useful, 

14# but WITHOUT ANY WARRANTY; without even the implied warranty of 

15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

16# GNU General Public License for more details. 

17# 

18# You should have received a copy of the LSST License Statement and 

19# the GNU General Public License along with this program. If not, 

20# see <https://www.lsstcorp.org/LegalNotices/>. 

21# 

22 

23"""PipelineTask for associating DiaSources with previous DiaObjects. 

24 

25Additionally performs forced photometry on the calibrated and difference 

26images at the updated locations of DiaObjects. 

27 

28Currently loads directly from the Apdb rather than pre-loading. 

29""" 

30 

31import os 

32import pandas as pd 

33 

34import lsst.dax.apdb as daxApdb 

35from lsst.meas.base import DiaObjectCalculationTask 

36import lsst.pex.config as pexConfig 

37import lsst.pipe.base as pipeBase 

38import lsst.pipe.base.connectionTypes as connTypes 

39 

40from lsst.ap.association import ( 

41 AssociationTask, 

42 DiaForcedSourceTask, 

43 LoadDiaCatalogsTask, 

44 PackageAlertsTask) 

45from lsst.ap.association.ssoAssociation import SolarSystemAssociationTask 

46 

47__all__ = ("DiaPipelineConfig", 

48 "DiaPipelineTask", 

49 "DiaPipelineConnections") 

50 

51 

52class DiaPipelineConnections( 

53 pipeBase.PipelineTaskConnections, 

54 dimensions=("instrument", "visit", "detector"), 

55 defaultTemplates={"coaddName": "deep", "fakesType": ""}): 

56 """Butler connections for DiaPipelineTask. 

57 """ 

58 diaSourceTable = connTypes.Input( 

59 doc="Catalog of calibrated DiaSources.", 

60 name="{fakesType}{coaddName}Diff_diaSrcTable", 

61 storageClass="DataFrame", 

62 dimensions=("instrument", "visit", "detector"), 

63 ) 

64 solarSystemObjectTable = connTypes.Input( 

65 doc="Catalog of SolarSolarSystem objects expected to be observable in " 

66 "this detectorVisit.", 

67 name="visitSsObjects", 

68 storageClass="DataFrame", 

69 dimensions=("instrument", "visit"), 

70 ) 

71 diffIm = connTypes.Input( 

72 doc="Difference image on which the DiaSources were detected.", 

73 name="{fakesType}{coaddName}Diff_differenceExp", 

74 storageClass="ExposureF", 

75 dimensions=("instrument", "visit", "detector"), 

76 ) 

77 exposure = connTypes.Input( 

78 doc="Calibrated exposure differenced with a template image during " 

79 "image differencing.", 

80 name="calexp", 

81 storageClass="ExposureF", 

82 dimensions=("instrument", "visit", "detector"), 

83 ) 

84 warpedExposure = connTypes.Input( 

85 doc="Warped template used to create `subtractedExposure`. Not PSF " 

86 "matched.", 

87 dimensions=("instrument", "visit", "detector"), 

88 storageClass="ExposureF", 

89 name="{fakesType}{coaddName}Diff_warpedExp", 

90 ) 

91 apdbMarker = connTypes.Output( 

92 doc="Marker dataset storing the configuration of the Apdb for each " 

93 "visit/detector. Used to signal the completion of the pipeline.", 

94 name="apdb_marker", 

95 storageClass="Config", 

96 dimensions=("instrument", "visit", "detector"), 

97 ) 

98 associatedDiaSources = connTypes.Output( 

99 doc="Optional output storing the DiaSource catalog after matching, " 

100 "calibration, and standardization for insertation into the Apdb.", 

101 name="{fakesType}{coaddName}Diff_assocDiaSrc", 

102 storageClass="DataFrame", 

103 dimensions=("instrument", "visit", "detector"), 

104 ) 

105 

106 def __init__(self, *, config=None): 

107 super().__init__(config=config) 

108 

109 if not config.doWriteAssociatedSources: 

110 self.outputs.remove("associatedDiaSources") 

111 if not config.doSolarSystemAssociation: 

112 self.inputs.remove("solarSystemObjectTable") 

113 

114 def adjustQuantum(self, inputs, outputs, label, dataId): 

115 """Override to make adjustments to `lsst.daf.butler.DatasetRef` objects 

116 in the `lsst.daf.butler.core.Quantum` during the graph generation stage 

117 of the activator. 

118 

119 This implementation checks to make sure that the filters in the dataset 

120 are compatible with AP processing as set by the Apdb/DPDD schema. 

121 

122 Parameters 

123 ---------- 

124 inputs : `dict` 

125 Dictionary whose keys are an input (regular or prerequisite) 

126 connection name and whose values are a tuple of the connection 

127 instance and a collection of associated `DatasetRef` objects. 

128 The exact type of the nested collections is unspecified; it can be 

129 assumed to be multi-pass iterable and support `len` and ``in``, but 

130 it should not be mutated in place. In contrast, the outer 

131 dictionaries are guaranteed to be temporary copies that are true 

132 `dict` instances, and hence may be modified and even returned; this 

133 is especially useful for delegating to `super` (see notes below). 

134 outputs : `dict` 

135 Dict of output datasets, with the same structure as ``inputs``. 

136 label : `str` 

137 Label for this task in the pipeline (should be used in all 

138 diagnostic messages). 

139 data_id : `lsst.daf.butler.DataCoordinate` 

140 Data ID for this quantum in the pipeline (should be used in all 

141 diagnostic messages). 

142 

143 Returns 

144 ------- 

145 adjusted_inputs : `dict` 

146 Dict of the same form as ``inputs`` with updated containers of 

147 input `DatasetRef` objects. Connections that are not changed 

148 should not be returned at all. Datasets may only be removed, not 

149 added. Nested collections may be of any multi-pass iterable type, 

150 and the order of iteration will set the order of iteration within 

151 `PipelineTask.runQuantum`. 

152 adjusted_outputs : `dict` 

153 Dict of updated output datasets, with the same structure and 

154 interpretation as ``adjusted_inputs``. 

155 

156 Raises 

157 ------ 

158 ScalarError 

159 Raised if any `Input` or `PrerequisiteInput` connection has 

160 ``multiple`` set to `False`, but multiple datasets. 

161 NoWorkFound 

162 Raised to indicate that this quantum should not be run; not enough 

163 datasets were found for a regular `Input` connection, and the 

164 quantum should be pruned or skipped. 

165 FileNotFoundError 

166 Raised to cause QuantumGraph generation to fail (with the message 

167 included in this exception); not enough datasets were found for a 

168 `PrerequisiteInput` connection. 

169 """ 

170 _, refs = inputs["diffIm"] 

171 for ref in refs: 

172 if ref.dataId["band"] not in self.config.validBands: 

173 raise ValueError( 

174 f"Requested '{ref.dataId['band']}' not in " 

175 "DiaPipelineConfig.validBands. To process bands not in " 

176 "the standard Rubin set (ugrizy) you must add the band to " 

177 "the validBands list in DiaPipelineConfig and add the " 

178 "appropriate columns to the Apdb schema.") 

179 return super().adjustQuantum(inputs, outputs, label, dataId) 

180 

181 

182class DiaPipelineConfig(pipeBase.PipelineTaskConfig, 

183 pipelineConnections=DiaPipelineConnections): 

184 """Config for DiaPipelineTask. 

185 """ 

186 coaddName = pexConfig.Field( 

187 doc="coadd name: typically one of deep, goodSeeing, or dcr", 

188 dtype=str, 

189 default="deep", 

190 ) 

191 apdb = daxApdb.ApdbSql.makeField( 

192 doc="Database connection for storing associated DiaSources and " 

193 "DiaObjects. Must already be initialized.", 

194 ) 

195 validBands = pexConfig.ListField( 

196 dtype=str, 

197 default=["u", "g", "r", "i", "z", "y"], 

198 doc="List of bands that are valid for AP processing. To process a " 

199 "band not on this list, the appropriate band specific columns " 

200 "must be added to the Apdb schema in dax_apdb.", 

201 ) 

202 diaCatalogLoader = pexConfig.ConfigurableField( 

203 target=LoadDiaCatalogsTask, 

204 doc="Task to load DiaObjects and DiaSources from the Apdb.", 

205 ) 

206 associator = pexConfig.ConfigurableField( 

207 target=AssociationTask, 

208 doc="Task used to associate DiaSources with DiaObjects.", 

209 ) 

210 doSolarSystemAssociation = pexConfig.Field( 

211 dtype=bool, 

212 default=False, 

213 doc="Process SolarSystem objects through the pipeline.", 

214 ) 

215 solarSystemAssociator = pexConfig.ConfigurableField( 

216 target=SolarSystemAssociationTask, 

217 doc="Task used to associate DiaSources with SolarSystemObjects.", 

218 ) 

219 diaCalculation = pexConfig.ConfigurableField( 

220 target=DiaObjectCalculationTask, 

221 doc="Task to compute summary statistics for DiaObjects.", 

222 ) 

223 diaForcedSource = pexConfig.ConfigurableField( 

224 target=DiaForcedSourceTask, 

225 doc="Task used for force photometer DiaObject locations in direct and " 

226 "difference images.", 

227 ) 

228 alertPackager = pexConfig.ConfigurableField( 

229 target=PackageAlertsTask, 

230 doc="Subtask for packaging Ap data into alerts.", 

231 ) 

232 doPackageAlerts = pexConfig.Field( 

233 dtype=bool, 

234 default=False, 

235 doc="Package Dia-data into serialized alerts for distribution and " 

236 "write them to disk.", 

237 ) 

238 doWriteAssociatedSources = pexConfig.Field( 

239 dtype=bool, 

240 default=False, 

241 doc="Write out associated and SDMed DiaSources.", 

242 ) 

243 

244 def setDefaults(self): 

245 self.apdb.dia_object_index = "baseline" 

246 self.apdb.dia_object_columns = [] 

247 self.apdb.extra_schema_file = os.path.join( 

248 "${AP_ASSOCIATION_DIR}", 

249 "data", 

250 "apdb-ap-pipe-schema-extra.yaml") 

251 self.diaCalculation.plugins = ["ap_meanPosition", 

252 "ap_nDiaSources", 

253 "ap_diaObjectFlag", 

254 "ap_meanFlux", 

255 "ap_percentileFlux", 

256 "ap_sigmaFlux", 

257 "ap_chi2Flux", 

258 "ap_madFlux", 

259 "ap_skewFlux", 

260 "ap_minMaxFlux", 

261 "ap_maxSlopeFlux", 

262 "ap_meanErrFlux", 

263 "ap_linearFit", 

264 "ap_stetsonJ", 

265 "ap_meanTotFlux", 

266 "ap_sigmaTotFlux"] 

267 

268 

269class DiaPipelineTask(pipeBase.PipelineTask): 

270 """Task for loading, associating and storing Difference Image Analysis 

271 (DIA) Objects and Sources. 

272 """ 

273 ConfigClass = DiaPipelineConfig 

274 _DefaultName = "diaPipe" 

275 RunnerClass = pipeBase.ButlerInitializedTaskRunner 

276 

277 def __init__(self, initInputs=None, **kwargs): 

278 super().__init__(**kwargs) 

279 self.apdb = self.config.apdb.apply() 

280 self.makeSubtask("diaCatalogLoader") 

281 self.makeSubtask("associator") 

282 self.makeSubtask("diaCalculation") 

283 self.makeSubtask("diaForcedSource") 

284 if self.config.doPackageAlerts: 

285 self.makeSubtask("alertPackager") 

286 if self.config.doSolarSystemAssociation: 

287 self.makeSubtask("solarSystemAssociator") 

288 

289 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

290 inputs = butlerQC.get(inputRefs) 

291 expId, expBits = butlerQC.quantum.dataId.pack("visit_detector", 

292 returnMaxBits=True) 

293 inputs["ccdExposureIdBits"] = expBits 

294 inputs["band"] = butlerQC.quantum.dataId["band"] 

295 if not self.config.doSolarSystemAssociation: 

296 inputs["solarSystemObjectTable"] = None 

297 

298 outputs = self.run(**inputs) 

299 

300 butlerQC.put(outputs, outputRefs) 

301 

302 @pipeBase.timeMethod 

303 def run(self, 

304 diaSourceTable, 

305 solarSystemObjectTable, 

306 diffIm, 

307 exposure, 

308 warpedExposure, 

309 ccdExposureIdBits, 

310 band): 

311 """Process DiaSources and DiaObjects. 

312 

313 Load previous DiaObjects and their DiaSource history. Calibrate the 

314 values in the diaSourceCat. Associate new DiaSources with previous 

315 DiaObjects. Run forced photometry at the updated DiaObject locations. 

316 Store the results in the Alert Production Database (Apdb). 

317 

318 Parameters 

319 ---------- 

320 diaSourceTable : `pandas.DataFrame` 

321 Newly detected DiaSources. 

322 diffIm : `lsst.afw.image.ExposureF` 

323 Difference image exposure in which the sources in ``diaSourceCat`` 

324 were detected. 

325 exposure : `lsst.afw.image.ExposureF` 

326 Calibrated exposure differenced with a template to create 

327 ``diffIm``. 

328 warpedExposure : `lsst.afw.image.ExposureF` 

329 Template exposure used to create diffIm. 

330 ccdExposureIdBits : `int` 

331 Number of bits used for a unique ``ccdVisitId``. 

332 band : `str` 

333 The band in which the new DiaSources were detected. 

334 

335 Returns 

336 ------- 

337 results : `lsst.pipe.base.Struct` 

338 Results struct with components. 

339 

340 - ``apdbMaker`` : Marker dataset to store in the Butler indicating 

341 that this ccdVisit has completed successfully. 

342 (`lsst.dax.apdb.ApdbConfig`) 

343 - ``associatedDiaSources`` : Catalog of newly associated 

344 DiaSources. (`pandas.DataFrame`) 

345 """ 

346 # Load the DiaObjects and DiaSource history. 

347 loaderResult = self.diaCatalogLoader.run(diffIm, self.apdb) 

348 

349 # Associate new DiaSources with existing DiaObjects. 

350 assocResults = self.associator.run(diaSourceTable, 

351 loaderResult.diaObjects) 

352 if self.config.doSolarSystemAssociation: 

353 ssoAssocResult = self.solarSystemAssociator.run( 

354 assocResults.unAssocDiaSources, solarSystemObjectTable) 

355 createResults = self.createNewDiaObjects( 

356 ssoAssocResult.unAssocDiaSources) 

357 associatedDiaSources = pd.concat( 

358 [assocResults.matchedDiaSources, 

359 ssoAssocResult.ssoAssocDiaSources, 

360 createResults.diaSources]) 

361 else: 

362 createResults = self.createNewDiaObjects( 

363 assocResults.unAssocDiaSources) 

364 associatedDiaSources = pd.concat( 

365 [assocResults.matchedDiaSources, 

366 createResults.diaSources]) 

367 

368 # Create new DiaObjects from unassociated diaSources. 

369 self._add_association_meta_data(assocResults.nUpdatedDiaObjects, 

370 assocResults.nUnassociatedDiaObjects, 

371 createResults.nNewDiaObjects) 

372 # Index the DiaSource catalog for this visit after all associations 

373 # have been made. 

374 updatedDiaObjectIds = associatedDiaSources["diaObjectId"][ 

375 associatedDiaSources["diaObjectId"] != 0].to_numpy() 

376 associatedDiaSources.set_index(["diaObjectId", 

377 "filterName", 

378 "diaSourceId"], 

379 drop=False, 

380 inplace=True) 

381 

382 # Append new DiaObjects and DiaSources to their previous history. 

383 diaObjects = loaderResult.diaObjects.append( 

384 createResults.newDiaObjects.set_index("diaObjectId", drop=False), 

385 sort=True) 

386 if self.testDataFrameIndex(diaObjects): 

387 raise RuntimeError( 

388 "Duplicate DiaObjects created after association. This is " 

389 "likely due to re-running data with an already populated " 

390 "Apdb. If this was not the case then there was an unexpected " 

391 "failure in Association while matching and creating new " 

392 "DiaObjects and should be reported. Exiting.") 

393 mergedDiaSourceHistory = loaderResult.diaSources.append( 

394 associatedDiaSources, 

395 sort=True) 

396 # Test for DiaSource duplication first. If duplicates are found, 

397 # this likely means this is duplicate data being processed and sent 

398 # to the Apdb. 

399 if self.testDataFrameIndex(mergedDiaSourceHistory): 

400 raise RuntimeError( 

401 "Duplicate DiaSources found after association and merging " 

402 "with history. This is likely due to re-running data with an " 

403 "already populated Apdb. If this was not the case then there " 

404 "was an unexpected failure in Association while matching " 

405 "sources to objects, and should be reported. Exiting.") 

406 

407 # Compute DiaObject Summary statistics from their full DiaSource 

408 # history. 

409 diaCalResult = self.diaCalculation.run( 

410 diaObjects, 

411 mergedDiaSourceHistory, 

412 updatedDiaObjectIds, 

413 [band]) 

414 # Test for duplication in the updated DiaObjects. 

415 if self.testDataFrameIndex(diaCalResult.diaObjectCat): 

416 raise RuntimeError( 

417 "Duplicate DiaObjects (loaded + updated) created after " 

418 "DiaCalculation. This is unexpected behavior and should be " 

419 "reported. Existing.") 

420 if self.testDataFrameIndex(diaCalResult.updatedDiaObjects): 

421 raise RuntimeError( 

422 "Duplicate DiaObjects (updated) created after " 

423 "DiaCalculation. This is unexpected behavior and should be " 

424 "reported. Existing.") 

425 

426 # Force photometer on the Difference and Calibrated exposures using 

427 # the new and updated DiaObject locations. 

428 diaForcedSources = self.diaForcedSource.run( 

429 diaCalResult.diaObjectCat, 

430 diaCalResult.updatedDiaObjects.loc[:, "diaObjectId"].to_numpy(), 

431 ccdExposureIdBits, 

432 exposure, 

433 diffIm) 

434 

435 # Store DiaSources, updated DiaObjects, and DiaForcedSources in the 

436 # Apdb. 

437 self.apdb.store( 

438 exposure.getInfo().getVisitInfo().getDate(), 

439 diaCalResult.updatedDiaObjects, 

440 associatedDiaSources, 

441 diaForcedSources) 

442 

443 if self.config.doPackageAlerts: 

444 if len(loaderResult.diaForcedSources) > 1: 

445 diaForcedSources = diaForcedSources.append( 

446 loaderResult.diaForcedSources, 

447 sort=True) 

448 if self.testDataFrameIndex(diaForcedSources): 

449 self.log.warn( 

450 "Duplicate DiaForcedSources created after merge with " 

451 "history and new sources. This may cause downstream " 

452 "problems. Dropping duplicates.") 

453 # Drop duplicates via index and keep the first appearance. 

454 # Reset due to the index shape being slight different than 

455 # expected. 

456 diaForcedSources = diaForcedSources.groupby( 

457 diaForcedSources.index).first() 

458 diaForcedSources.reset_index(drop=True, inplace=True) 

459 diaForcedSources.set_index( 

460 ["diaObjectId", "diaForcedSourceId"], 

461 drop=False, 

462 inplace=True) 

463 self.alertPackager.run(associatedDiaSources, 

464 diaCalResult.diaObjectCat, 

465 loaderResult.diaSources, 

466 diaForcedSources, 

467 diffIm, 

468 warpedExposure, 

469 ccdExposureIdBits) 

470 

471 return pipeBase.Struct(apdbMarker=self.config.apdb.value, 

472 associatedDiaSources=associatedDiaSources,) 

473 

474 def createNewDiaObjects(self, unAssocDiaSources): 

475 """Loop through the set of DiaSources and create new DiaObjects 

476 for unassociated DiaSources. 

477 

478 Parameters 

479 ---------- 

480 unAssocDiaSources : `pandas.DataFrame` 

481 Set of DiaSources to create new DiaObjects from. 

482 

483 Returns 

484 ------- 

485 results : `lsst.pipe.base.Struct` 

486 Results struct containing: 

487 

488 - ``diaSources`` : DiaSource catalog with updated DiaObject ids. 

489 (`pandas.DataFrame`) 

490 - ``newDiaObjects`` : Newly created DiaObjects from the 

491 unassociated DiaSources. (`pandas.DataFrame`) 

492 - ``nNewDiaObjects`` : Number of newly created diaObjects.(`int`) 

493 """ 

494 if len(unAssocDiaSources) == 0: 

495 tmpObj = self._initialize_dia_object(0) 

496 newDiaObjects = pd.DataFrame(data=[], 

497 columns=tmpObj.keys()) 

498 else: 

499 newDiaObjects = unAssocDiaSources["diaSourceId"].apply( 

500 self._initialize_dia_object) 

501 unAssocDiaSources["diaObjectId"] = unAssocDiaSources["diaSourceId"] 

502 return pipeBase.Struct(diaSources=unAssocDiaSources, 

503 newDiaObjects=newDiaObjects, 

504 nNewDiaObjects=len(newDiaObjects)) 

505 

506 def _initialize_dia_object(self, objId): 

507 """Create a new DiaObject with values required to be initialized by the 

508 Ppdb. 

509 

510 Parameters 

511 ---------- 

512 objid : `int` 

513 ``diaObjectId`` value for the of the new DiaObject. 

514 

515 Returns 

516 ------- 

517 diaObject : `dict` 

518 Newly created DiaObject with keys: 

519 

520 ``diaObjectId`` 

521 Unique DiaObjectId (`int`). 

522 ``pmParallaxNdata`` 

523 Number of data points used for parallax calculation (`int`). 

524 ``nearbyObj1`` 

525 Id of the a nearbyObject in the Object table (`int`). 

526 ``nearbyObj2`` 

527 Id of the a nearbyObject in the Object table (`int`). 

528 ``nearbyObj3`` 

529 Id of the a nearbyObject in the Object table (`int`). 

530 ``?PSFluxData`` 

531 Number of data points used to calculate point source flux 

532 summary statistics in each bandpass (`int`). 

533 """ 

534 new_dia_object = {"diaObjectId": objId, 

535 "pmParallaxNdata": 0, 

536 "nearbyObj1": 0, 

537 "nearbyObj2": 0, 

538 "nearbyObj3": 0, 

539 "flags": 0} 

540 for f in ["u", "g", "r", "i", "z", "y"]: 

541 new_dia_object["%sPSFluxNdata" % f] = 0 

542 return pd.Series(data=new_dia_object) 

543 

544 def testDataFrameIndex(self, df): 

545 """Test the sorted DataFrame index for duplicates. 

546 

547 Wrapped as a separate function to allow for mocking of the this task 

548 in unittesting. Default of a mock return for this test is True. 

549 

550 Parameters 

551 ---------- 

552 df : `pandas.DataFrame` 

553 DataFrame to text. 

554 

555 Returns 

556 ------- 

557 `bool` 

558 True if DataFrame contains duplicate rows. 

559 """ 

560 return df.index.has_duplicates 

561 

562 def _add_association_meta_data(self, 

563 nUpdatedDiaObjects, 

564 nUnassociatedDiaObjects, 

565 nNewDiaObjects): 

566 """Store summaries of the association step in the task metadata. 

567 

568 Parameters 

569 ---------- 

570 nUpdatedDiaObjects : `int` 

571 Number of previous DiaObjects associated and updated in this 

572 ccdVisit. 

573 nUnassociatedDiaObjects : `int` 

574 Number of previous DiaObjects that were not associated or updated 

575 in this ccdVisit. 

576 nNewDiaObjects : `int` 

577 Number of newly created DiaObjects for this ccdVisit. 

578 """ 

579 self.metadata.add('numUpdatedDiaObjects', nUpdatedDiaObjects) 

580 self.metadata.add('numUnassociatedDiaObjects', nUnassociatedDiaObjects) 

581 self.metadata.add('numNewDiaObjects', nNewDiaObjects)