Coverage for python/lsst/ap/association/diaPipe.py: 23%

185 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2024-04-25 11:25 -0700

1# 

2# LSST Data Management System 

3# Copyright 2008-2016 AURA/LSST. 

4# 

5# This product includes software developed by the 

6# LSST Project (http://www.lsst.org/). 

7# 

8# This program is free software: you can redistribute it and/or modify 

9# it under the terms of the GNU General Public License as published by 

10# the Free Software Foundation, either version 3 of the License, or 

11# (at your option) any later version. 

12# 

13# This program is distributed in the hope that it will be useful, 

14# but WITHOUT ANY WARRANTY; without even the implied warranty of 

15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

16# GNU General Public License for more details. 

17# 

18# You should have received a copy of the LSST License Statement and 

19# the GNU General Public License along with this program. If not, 

20# see <https://www.lsstcorp.org/LegalNotices/>. 

21# 

22 

23"""PipelineTask for associating DiaSources with previous DiaObjects. 

24 

25Additionally performs forced photometry on the calibrated and difference 

26images at the updated locations of DiaObjects. 

27 

28Currently loads directly from the Apdb rather than pre-loading. 

29""" 

30 

31__all__ = ("DiaPipelineConfig", 

32 "DiaPipelineTask", 

33 "DiaPipelineConnections") 

34 

35import numpy as np 

36import pandas as pd 

37 

38from lsst.daf.base import DateTime 

39import lsst.dax.apdb as daxApdb 

40from lsst.meas.base import DetectorVisitIdGeneratorConfig, DiaObjectCalculationTask 

41import lsst.pex.config as pexConfig 

42import lsst.pipe.base as pipeBase 

43import lsst.pipe.base.connectionTypes as connTypes 

44from lsst.utils.timer import timeMethod 

45 

46from lsst.ap.association import ( 

47 AssociationTask, 

48 DiaForcedSourceTask, 

49 LoadDiaCatalogsTask, 

50 PackageAlertsTask) 

51from lsst.ap.association.ssoAssociation import SolarSystemAssociationTask 

52 

53 

54class DiaPipelineConnections( 

55 pipeBase.PipelineTaskConnections, 

56 dimensions=("instrument", "visit", "detector"), 

57 defaultTemplates={"coaddName": "deep", "fakesType": ""}): 

58 """Butler connections for DiaPipelineTask. 

59 """ 

60 diaSourceTable = connTypes.Input( 

61 doc="Catalog of calibrated DiaSources.", 

62 name="{fakesType}{coaddName}Diff_diaSrcTable", 

63 storageClass="DataFrame", 

64 dimensions=("instrument", "visit", "detector"), 

65 ) 

66 solarSystemObjectTable = connTypes.Input( 

67 doc="Catalog of SolarSolarSystem objects expected to be observable in " 

68 "this detectorVisit.", 

69 name="visitSsObjects", 

70 storageClass="DataFrame", 

71 dimensions=("instrument", "visit"), 

72 ) 

73 diffIm = connTypes.Input( 

74 doc="Difference image on which the DiaSources were detected.", 

75 name="{fakesType}{coaddName}Diff_differenceExp", 

76 storageClass="ExposureF", 

77 dimensions=("instrument", "visit", "detector"), 

78 ) 

79 exposure = connTypes.Input( 

80 doc="Calibrated exposure differenced with a template image during " 

81 "image differencing.", 

82 name="{fakesType}calexp", 

83 storageClass="ExposureF", 

84 dimensions=("instrument", "visit", "detector"), 

85 ) 

86 template = connTypes.Input( 

87 doc="Warped template used to create `subtractedExposure`. Not PSF " 

88 "matched.", 

89 dimensions=("instrument", "visit", "detector"), 

90 storageClass="ExposureF", 

91 name="{fakesType}{coaddName}Diff_templateExp", 

92 ) 

93 apdbMarker = connTypes.Output( 

94 doc="Marker dataset storing the configuration of the Apdb for each " 

95 "visit/detector. Used to signal the completion of the pipeline.", 

96 name="apdb_marker", 

97 storageClass="Config", 

98 dimensions=("instrument", "visit", "detector"), 

99 ) 

100 associatedDiaSources = connTypes.Output( 

101 doc="Optional output storing the DiaSource catalog after matching, " 

102 "calibration, and standardization for insertion into the Apdb.", 

103 name="{fakesType}{coaddName}Diff_assocDiaSrc", 

104 storageClass="DataFrame", 

105 dimensions=("instrument", "visit", "detector"), 

106 ) 

107 diaForcedSources = connTypes.Output( 

108 doc="Optional output storing the forced sources computed at the diaObject positions.", 

109 name="{fakesType}{coaddName}Diff_diaForcedSrc", 

110 storageClass="DataFrame", 

111 dimensions=("instrument", "visit", "detector"), 

112 ) 

113 diaObjects = connTypes.Output( 

114 doc="Optional output storing the updated diaObjects associated to these sources.", 

115 name="{fakesType}{coaddName}Diff_diaObject", 

116 storageClass="DataFrame", 

117 dimensions=("instrument", "visit", "detector"), 

118 ) 

119 longTrailedSources = pipeBase.connectionTypes.Output( 

120 doc="Optional output temporarily storing long trailed diaSources.", 

121 dimensions=("instrument", "visit", "detector"), 

122 storageClass="DataFrame", 

123 name="{fakesType}{coaddName}Diff_longTrailedSrc", 

124 ) 

125 

126 def __init__(self, *, config=None): 

127 super().__init__(config=config) 

128 

129 if not config.doWriteAssociatedSources: 

130 self.outputs.remove("associatedDiaSources") 

131 self.outputs.remove("diaForcedSources") 

132 self.outputs.remove("diaObjects") 

133 elif not config.doRunForcedMeasurement: 

134 self.outputs.remove("diaForcedSources") 

135 if not config.doSolarSystemAssociation: 

136 self.inputs.remove("solarSystemObjectTable") 

137 if not config.associator.doTrailedSourceFilter: 

138 self.outputs.remove("longTrailedSources") 

139 

140 def adjustQuantum(self, inputs, outputs, label, dataId): 

141 """Override to make adjustments to `lsst.daf.butler.DatasetRef` objects 

142 in the `lsst.daf.butler.core.Quantum` during the graph generation stage 

143 of the activator. 

144 

145 This implementation checks to make sure that the filters in the dataset 

146 are compatible with AP processing as set by the Apdb/DPDD schema. 

147 

148 Parameters 

149 ---------- 

150 inputs : `dict` 

151 Dictionary whose keys are an input (regular or prerequisite) 

152 connection name and whose values are a tuple of the connection 

153 instance and a collection of associated `DatasetRef` objects. 

154 The exact type of the nested collections is unspecified; it can be 

155 assumed to be multi-pass iterable and support `len` and ``in``, but 

156 it should not be mutated in place. In contrast, the outer 

157 dictionaries are guaranteed to be temporary copies that are true 

158 `dict` instances, and hence may be modified and even returned; this 

159 is especially useful for delegating to `super` (see notes below). 

160 outputs : `dict` 

161 Dict of output datasets, with the same structure as ``inputs``. 

162 label : `str` 

163 Label for this task in the pipeline (should be used in all 

164 diagnostic messages). 

165 data_id : `lsst.daf.butler.DataCoordinate` 

166 Data ID for this quantum in the pipeline (should be used in all 

167 diagnostic messages). 

168 

169 Returns 

170 ------- 

171 adjusted_inputs : `dict` 

172 Dict of the same form as ``inputs`` with updated containers of 

173 input `DatasetRef` objects. Connections that are not changed 

174 should not be returned at all. Datasets may only be removed, not 

175 added. Nested collections may be of any multi-pass iterable type, 

176 and the order of iteration will set the order of iteration within 

177 `PipelineTask.runQuantum`. 

178 adjusted_outputs : `dict` 

179 Dict of updated output datasets, with the same structure and 

180 interpretation as ``adjusted_inputs``. 

181 

182 Raises 

183 ------ 

184 ScalarError 

185 Raised if any `Input` or `PrerequisiteInput` connection has 

186 ``multiple`` set to `False`, but multiple datasets. 

187 NoWorkFound 

188 Raised to indicate that this quantum should not be run; not enough 

189 datasets were found for a regular `Input` connection, and the 

190 quantum should be pruned or skipped. 

191 FileNotFoundError 

192 Raised to cause QuantumGraph generation to fail (with the message 

193 included in this exception); not enough datasets were found for a 

194 `PrerequisiteInput` connection. 

195 """ 

196 _, refs = inputs["diffIm"] 

197 for ref in refs: 

198 if ref.dataId["band"] not in self.config.validBands: 

199 raise ValueError( 

200 f"Requested '{ref.dataId['band']}' not in " 

201 "DiaPipelineConfig.validBands. To process bands not in " 

202 "the standard Rubin set (ugrizy) you must add the band to " 

203 "the validBands list in DiaPipelineConfig and add the " 

204 "appropriate columns to the Apdb schema.") 

205 return super().adjustQuantum(inputs, outputs, label, dataId) 

206 

207 

208class DiaPipelineConfig(pipeBase.PipelineTaskConfig, 

209 pipelineConnections=DiaPipelineConnections): 

210 """Config for DiaPipelineTask. 

211 """ 

212 coaddName = pexConfig.Field( 

213 doc="coadd name: typically one of deep, goodSeeing, or dcr", 

214 dtype=str, 

215 default="deep", 

216 ) 

217 apdb = daxApdb.ApdbSql.makeField( 

218 doc="Database connection for storing associated DiaSources and " 

219 "DiaObjects. Must already be initialized.", 

220 ) 

221 validBands = pexConfig.ListField( 

222 dtype=str, 

223 default=["u", "g", "r", "i", "z", "y"], 

224 doc="List of bands that are valid for AP processing. To process a " 

225 "band not on this list, the appropriate band specific columns " 

226 "must be added to the Apdb schema in dax_apdb.", 

227 ) 

228 diaCatalogLoader = pexConfig.ConfigurableField( 

229 target=LoadDiaCatalogsTask, 

230 doc="Task to load DiaObjects and DiaSources from the Apdb.", 

231 ) 

232 associator = pexConfig.ConfigurableField( 

233 target=AssociationTask, 

234 doc="Task used to associate DiaSources with DiaObjects.", 

235 ) 

236 doSolarSystemAssociation = pexConfig.Field( 

237 dtype=bool, 

238 default=False, 

239 doc="Process SolarSystem objects through the pipeline.", 

240 ) 

241 solarSystemAssociator = pexConfig.ConfigurableField( 

242 target=SolarSystemAssociationTask, 

243 doc="Task used to associate DiaSources with SolarSystemObjects.", 

244 ) 

245 diaCalculation = pexConfig.ConfigurableField( 

246 target=DiaObjectCalculationTask, 

247 doc="Task to compute summary statistics for DiaObjects.", 

248 ) 

249 doLoadForcedSources = pexConfig.Field( 

250 dtype=bool, 

251 default=True, 

252 deprecated="Added to allow disabling forced sources for performance " 

253 "reasons during the ops rehearsal. " 

254 "It is expected to be removed.", 

255 doc="Load forced DiaSource history from the APDB? " 

256 "This should only be turned off for debugging purposes.", 

257 ) 

258 doRunForcedMeasurement = pexConfig.Field( 

259 dtype=bool, 

260 default=True, 

261 deprecated="Added to allow disabling forced sources for performance " 

262 "reasons during the ops rehearsal. " 

263 "It is expected to be removed.", 

264 doc="Run forced measurement on all of the diaObjects? " 

265 "This should only be turned off for debugging purposes.", 

266 ) 

267 diaForcedSource = pexConfig.ConfigurableField( 

268 target=DiaForcedSourceTask, 

269 doc="Task used for force photometer DiaObject locations in direct and " 

270 "difference images.", 

271 ) 

272 alertPackager = pexConfig.ConfigurableField( 

273 target=PackageAlertsTask, 

274 doc="Subtask for packaging Ap data into alerts.", 

275 ) 

276 doPackageAlerts = pexConfig.Field( 

277 dtype=bool, 

278 default=False, 

279 doc="Package Dia-data into serialized alerts for distribution and " 

280 "write them to disk.", 

281 ) 

282 doWriteAssociatedSources = pexConfig.Field( 

283 dtype=bool, 

284 default=True, 

285 doc="Write out associated DiaSources, DiaForcedSources, and DiaObjects, " 

286 "formatted following the Science Data Model.", 

287 ) 

288 imagePixelMargin = pexConfig.RangeField( 

289 dtype=int, 

290 default=10, 

291 min=0, 

292 doc="Pad the image by this many pixels before removing off-image " 

293 "diaObjects for association.", 

294 ) 

295 idGenerator = DetectorVisitIdGeneratorConfig.make_field() 

296 

297 def setDefaults(self): 

298 self.apdb.dia_object_index = "baseline" 

299 self.apdb.dia_object_columns = [] 

300 self.diaCalculation.plugins = ["ap_meanPosition", 

301 "ap_nDiaSources", 

302 "ap_diaObjectFlag", 

303 "ap_meanFlux", 

304 "ap_percentileFlux", 

305 "ap_sigmaFlux", 

306 "ap_chi2Flux", 

307 "ap_madFlux", 

308 "ap_skewFlux", 

309 "ap_minMaxFlux", 

310 "ap_maxSlopeFlux", 

311 "ap_meanErrFlux", 

312 "ap_linearFit", 

313 "ap_stetsonJ", 

314 "ap_meanTotFlux", 

315 "ap_sigmaTotFlux"] 

316 

317 

318class DiaPipelineTask(pipeBase.PipelineTask): 

319 """Task for loading, associating and storing Difference Image Analysis 

320 (DIA) Objects and Sources. 

321 """ 

322 ConfigClass = DiaPipelineConfig 

323 _DefaultName = "diaPipe" 

324 

325 def __init__(self, initInputs=None, **kwargs): 

326 super().__init__(**kwargs) 

327 self.apdb = self.config.apdb.apply() 

328 self.makeSubtask("diaCatalogLoader") 

329 self.makeSubtask("associator") 

330 self.makeSubtask("diaCalculation") 

331 if self.config.doRunForcedMeasurement: 

332 self.makeSubtask("diaForcedSource") 

333 if self.config.doPackageAlerts: 

334 self.makeSubtask("alertPackager") 

335 if self.config.doSolarSystemAssociation: 

336 self.makeSubtask("solarSystemAssociator") 

337 

338 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

339 inputs = butlerQC.get(inputRefs) 

340 inputs["idGenerator"] = self.config.idGenerator.apply(butlerQC.quantum.dataId) 

341 inputs["band"] = butlerQC.quantum.dataId["band"] 

342 if not self.config.doSolarSystemAssociation: 

343 inputs["solarSystemObjectTable"] = None 

344 

345 outputs = self.run(**inputs) 

346 

347 butlerQC.put(outputs, outputRefs) 

348 

349 @timeMethod 

350 def run(self, 

351 diaSourceTable, 

352 solarSystemObjectTable, 

353 diffIm, 

354 exposure, 

355 template, 

356 band, 

357 idGenerator): 

358 """Process DiaSources and DiaObjects. 

359 

360 Load previous DiaObjects and their DiaSource history. Calibrate the 

361 values in the diaSourceCat. Associate new DiaSources with previous 

362 DiaObjects. Run forced photometry at the updated DiaObject locations. 

363 Store the results in the Alert Production Database (Apdb). 

364 

365 Parameters 

366 ---------- 

367 diaSourceTable : `pandas.DataFrame` 

368 Newly detected DiaSources. 

369 diffIm : `lsst.afw.image.ExposureF` 

370 Difference image exposure in which the sources in ``diaSourceCat`` 

371 were detected. 

372 exposure : `lsst.afw.image.ExposureF` 

373 Calibrated exposure differenced with a template to create 

374 ``diffIm``. 

375 template : `lsst.afw.image.ExposureF` 

376 Template exposure used to create diffIm. 

377 band : `str` 

378 The band in which the new DiaSources were detected. 

379 idGenerator : `lsst.meas.base.IdGenerator` 

380 Object that generates source IDs and random number generator seeds. 

381 

382 Returns 

383 ------- 

384 results : `lsst.pipe.base.Struct` 

385 Results struct with components. 

386 

387 - ``apdbMaker`` : Marker dataset to store in the Butler indicating 

388 that this ccdVisit has completed successfully. 

389 (`lsst.dax.apdb.ApdbConfig`) 

390 - ``associatedDiaSources`` : Catalog of newly associated 

391 DiaSources. (`pandas.DataFrame`) 

392 """ 

393 # Load the DiaObjects and DiaSource history. 

394 loaderResult = self.diaCatalogLoader.run(diffIm, self.apdb, 

395 doLoadForcedSources=self.config.doLoadForcedSources) 

396 if len(loaderResult.diaObjects) > 0: 

397 diaObjects = self.purgeDiaObjects(diffIm.getBBox(), diffIm.getWcs(), loaderResult.diaObjects, 

398 buffer=self.config.imagePixelMargin) 

399 else: 

400 diaObjects = loaderResult.diaObjects 

401 

402 # Associate new DiaSources with existing DiaObjects. 

403 assocResults = self.associator.run(diaSourceTable, diaObjects, 

404 exposure_time=diffIm.visitInfo.exposureTime) 

405 

406 if self.config.doSolarSystemAssociation: 

407 ssoAssocResult = self.solarSystemAssociator.run( 

408 assocResults.unAssocDiaSources, 

409 solarSystemObjectTable, 

410 diffIm) 

411 createResults = self.createNewDiaObjects( 

412 ssoAssocResult.unAssocDiaSources) 

413 toAssociate = [] 

414 if len(assocResults.matchedDiaSources) > 0: 

415 toAssociate.append(assocResults.matchedDiaSources) 

416 if len(ssoAssocResult.ssoAssocDiaSources) > 0: 

417 toAssociate.append(ssoAssocResult.ssoAssocDiaSources) 

418 toAssociate.append(createResults.diaSources) 

419 associatedDiaSources = pd.concat(toAssociate) 

420 nTotalSsObjects = ssoAssocResult.nTotalSsObjects 

421 nAssociatedSsObjects = ssoAssocResult.nAssociatedSsObjects 

422 else: 

423 createResults = self.createNewDiaObjects( 

424 assocResults.unAssocDiaSources) 

425 toAssociate = [] 

426 if len(assocResults.matchedDiaSources) > 0: 

427 toAssociate.append(assocResults.matchedDiaSources) 

428 toAssociate.append(createResults.diaSources) 

429 associatedDiaSources = pd.concat(toAssociate) 

430 nTotalSsObjects = 0 

431 nAssociatedSsObjects = 0 

432 

433 # Create new DiaObjects from unassociated diaSources. 

434 self._add_association_meta_data(assocResults.nUpdatedDiaObjects, 

435 assocResults.nUnassociatedDiaObjects, 

436 createResults.nNewDiaObjects, 

437 nTotalSsObjects, 

438 nAssociatedSsObjects) 

439 # Index the DiaSource catalog for this visit after all associations 

440 # have been made. 

441 updatedDiaObjectIds = associatedDiaSources["diaObjectId"][ 

442 associatedDiaSources["diaObjectId"] != 0].to_numpy() 

443 associatedDiaSources.set_index(["diaObjectId", 

444 "band", 

445 "diaSourceId"], 

446 drop=False, 

447 inplace=True) 

448 

449 # Append new DiaObjects and DiaSources to their previous history. 

450 diaObjects = pd.concat( 

451 [diaObjects, 

452 createResults.newDiaObjects.set_index("diaObjectId", drop=False)], 

453 sort=True) 

454 if self.testDataFrameIndex(diaObjects): 

455 raise RuntimeError( 

456 "Duplicate DiaObjects created after association. This is " 

457 "likely due to re-running data with an already populated " 

458 "Apdb. If this was not the case then there was an unexpected " 

459 "failure in Association while matching and creating new " 

460 "DiaObjects and should be reported. Exiting.") 

461 

462 if len(loaderResult.diaSources) > 0: 

463 # We need to coerce the types of loaderResult.diaSources 

464 # to be the same as associatedDiaSources, thanks to pandas 

465 # datetime issues (DM-41100). And we may as well coerce 

466 # all the columns to ensure consistency for future compatibility. 

467 for name, dtype in associatedDiaSources.dtypes.items(): 

468 if name in loaderResult.diaSources.columns and loaderResult.diaSources[name].dtype != dtype: 

469 self.log.debug( 

470 "Coercing loaderResult.diaSources column %s from %s to %s", 

471 name, 

472 str(loaderResult.diaSources[name].dtype), 

473 str(dtype), 

474 ) 

475 loaderResult.diaSources[name] = loaderResult.diaSources[name].astype(dtype) 

476 

477 mergedDiaSourceHistory = pd.concat( 

478 [loaderResult.diaSources, associatedDiaSources], 

479 sort=True) 

480 else: 

481 mergedDiaSourceHistory = pd.concat([associatedDiaSources], sort=True) 

482 

483 # Test for DiaSource duplication first. If duplicates are found, 

484 # this likely means this is duplicate data being processed and sent 

485 # to the Apdb. 

486 if self.testDataFrameIndex(mergedDiaSourceHistory): 

487 raise RuntimeError( 

488 "Duplicate DiaSources found after association and merging " 

489 "with history. This is likely due to re-running data with an " 

490 "already populated Apdb. If this was not the case then there " 

491 "was an unexpected failure in Association while matching " 

492 "sources to objects, and should be reported. Exiting.") 

493 

494 # Compute DiaObject Summary statistics from their full DiaSource 

495 # history. 

496 diaCalResult = self.diaCalculation.run( 

497 diaObjects, 

498 mergedDiaSourceHistory, 

499 updatedDiaObjectIds, 

500 [band]) 

501 # Test for duplication in the updated DiaObjects. 

502 if self.testDataFrameIndex(diaCalResult.diaObjectCat): 

503 raise RuntimeError( 

504 "Duplicate DiaObjects (loaded + updated) created after " 

505 "DiaCalculation. This is unexpected behavior and should be " 

506 "reported. Exiting.") 

507 if self.testDataFrameIndex(diaCalResult.updatedDiaObjects): 

508 raise RuntimeError( 

509 "Duplicate DiaObjects (updated) created after " 

510 "DiaCalculation. This is unexpected behavior and should be " 

511 "reported. Exiting.") 

512 

513 if self.config.doRunForcedMeasurement: 

514 # Force photometer on the Difference and Calibrated exposures using 

515 # the new and updated DiaObject locations. 

516 diaForcedSources = self.diaForcedSource.run( 

517 diaCalResult.diaObjectCat, 

518 diaCalResult.updatedDiaObjects.loc[:, "diaObjectId"].to_numpy(), 

519 exposure, 

520 diffIm, 

521 idGenerator=idGenerator) 

522 else: 

523 # alertPackager needs correct columns 

524 diaForcedSources = pd.DataFrame(columns=[ 

525 "diaForcedSourceId", "diaObjectID", "ccdVisitID", "psfFlux", "psfFluxErr", 

526 "x", "y", "flags", "midpointMjdTai", "band", 

527 ]) 

528 

529 # Store DiaSources, updated DiaObjects, and DiaForcedSources in the 

530 # Apdb. 

531 self.apdb.store( 

532 DateTime.now().toAstropy(), 

533 diaCalResult.updatedDiaObjects, 

534 associatedDiaSources, 

535 diaForcedSources) 

536 

537 if self.config.doPackageAlerts: 

538 if len(loaderResult.diaForcedSources) > 1: 

539 # We need to coerce the types of loaderResult.diaForcedSources 

540 # to be the same as associatedDiaSources, thanks to pandas 

541 # datetime issues (DM-41100). And we may as well coerce 

542 # all the columns to ensure consistency for future compatibility. 

543 for name, dtype in diaForcedSources.dtypes.items(): 

544 if name in loaderResult.diaForcedSources.columns and \ 

545 loaderResult.diaForcedSources[name].dtype != dtype: 

546 self.log.debug( 

547 "Coercing loaderResult.diaForcedSources column %s from %s to %s", 

548 name, 

549 str(loaderResult.diaForcedSources[name].dtype), 

550 str(dtype), 

551 ) 

552 loaderResult.diaForcedSources[name] = ( 

553 loaderResult.diaForcedSources[name].astype(dtype) 

554 ) 

555 diaForcedSources = pd.concat( 

556 [diaForcedSources, loaderResult.diaForcedSources], 

557 sort=True) 

558 if self.testDataFrameIndex(diaForcedSources): 

559 self.log.warning( 

560 "Duplicate DiaForcedSources created after merge with " 

561 "history and new sources. This may cause downstream " 

562 "problems. Dropping duplicates.") 

563 # Drop duplicates via index and keep the first appearance. 

564 # Reset due to the index shape being slight different than 

565 # expected. 

566 diaForcedSources = diaForcedSources.groupby( 

567 diaForcedSources.index).first() 

568 diaForcedSources.reset_index(drop=True, inplace=True) 

569 diaForcedSources.set_index( 

570 ["diaObjectId", "diaForcedSourceId"], 

571 drop=False, 

572 inplace=True) 

573 self.alertPackager.run(associatedDiaSources, 

574 diaCalResult.diaObjectCat, 

575 loaderResult.diaSources, 

576 diaForcedSources, 

577 diffIm, 

578 exposure, 

579 template, 

580 doRunForcedMeasurement=self.config.doRunForcedMeasurement, 

581 ) 

582 

583 return pipeBase.Struct(apdbMarker=self.config.apdb.value, 

584 associatedDiaSources=associatedDiaSources, 

585 diaForcedSources=diaForcedSources, 

586 diaObjects=diaObjects, 

587 longTrailedSources=assocResults.longTrailedSources 

588 ) 

589 

590 def createNewDiaObjects(self, unAssocDiaSources): 

591 """Loop through the set of DiaSources and create new DiaObjects 

592 for unassociated DiaSources. 

593 

594 Parameters 

595 ---------- 

596 unAssocDiaSources : `pandas.DataFrame` 

597 Set of DiaSources to create new DiaObjects from. 

598 

599 Returns 

600 ------- 

601 results : `lsst.pipe.base.Struct` 

602 Results struct containing: 

603 

604 - ``diaSources`` : DiaSource catalog with updated DiaObject ids. 

605 (`pandas.DataFrame`) 

606 - ``newDiaObjects`` : Newly created DiaObjects from the 

607 unassociated DiaSources. (`pandas.DataFrame`) 

608 - ``nNewDiaObjects`` : Number of newly created diaObjects.(`int`) 

609 """ 

610 if len(unAssocDiaSources) == 0: 

611 tmpObj = self._initialize_dia_object(0) 

612 newDiaObjects = pd.DataFrame(data=[], 

613 columns=tmpObj.keys()) 

614 else: 

615 newDiaObjects = unAssocDiaSources["diaSourceId"].apply( 

616 self._initialize_dia_object) 

617 unAssocDiaSources["diaObjectId"] = unAssocDiaSources["diaSourceId"] 

618 return pipeBase.Struct(diaSources=unAssocDiaSources, 

619 newDiaObjects=newDiaObjects, 

620 nNewDiaObjects=len(newDiaObjects)) 

621 

622 def _initialize_dia_object(self, objId): 

623 """Create a new DiaObject with values required to be initialized by the 

624 Ppdb. 

625 

626 Parameters 

627 ---------- 

628 objid : `int` 

629 ``diaObjectId`` value for the of the new DiaObject. 

630 

631 Returns 

632 ------- 

633 diaObject : `dict` 

634 Newly created DiaObject with keys: 

635 

636 ``diaObjectId`` 

637 Unique DiaObjectId (`int`). 

638 ``pmParallaxNdata`` 

639 Number of data points used for parallax calculation (`int`). 

640 ``nearbyObj1`` 

641 Id of the a nearbyObject in the Object table (`int`). 

642 ``nearbyObj2`` 

643 Id of the a nearbyObject in the Object table (`int`). 

644 ``nearbyObj3`` 

645 Id of the a nearbyObject in the Object table (`int`). 

646 ``?_psfFluxNdata`` 

647 Number of data points used to calculate point source flux 

648 summary statistics in each bandpass (`int`). 

649 """ 

650 new_dia_object = {"diaObjectId": objId, 

651 "pmParallaxNdata": 0, 

652 "nearbyObj1": 0, 

653 "nearbyObj2": 0, 

654 "nearbyObj3": 0, 

655 "flags": 0} 

656 for f in ["u", "g", "r", "i", "z", "y"]: 

657 new_dia_object["%s_psfFluxNdata" % f] = 0 

658 return pd.Series(data=new_dia_object) 

659 

660 def testDataFrameIndex(self, df): 

661 """Test the sorted DataFrame index for duplicates. 

662 

663 Wrapped as a separate function to allow for mocking of the this task 

664 in unittesting. Default of a mock return for this test is True. 

665 

666 Parameters 

667 ---------- 

668 df : `pandas.DataFrame` 

669 DataFrame to text. 

670 

671 Returns 

672 ------- 

673 `bool` 

674 True if DataFrame contains duplicate rows. 

675 """ 

676 return df.index.has_duplicates 

677 

678 def _add_association_meta_data(self, 

679 nUpdatedDiaObjects, 

680 nUnassociatedDiaObjects, 

681 nNewDiaObjects, 

682 nTotalSsObjects, 

683 nAssociatedSsObjects): 

684 """Store summaries of the association step in the task metadata. 

685 

686 Parameters 

687 ---------- 

688 nUpdatedDiaObjects : `int` 

689 Number of previous DiaObjects associated and updated in this 

690 ccdVisit. 

691 nUnassociatedDiaObjects : `int` 

692 Number of previous DiaObjects that were not associated or updated 

693 in this ccdVisit. 

694 nNewDiaObjects : `int` 

695 Number of newly created DiaObjects for this ccdVisit. 

696 nTotalSsObjects : `int` 

697 Number of SolarSystemObjects within the observable detector 

698 area. 

699 nAssociatedSsObjects : `int` 

700 Number of successfully associated SolarSystemObjects. 

701 """ 

702 self.metadata.add('numUpdatedDiaObjects', nUpdatedDiaObjects) 

703 self.metadata.add('numUnassociatedDiaObjects', nUnassociatedDiaObjects) 

704 self.metadata.add('numNewDiaObjects', nNewDiaObjects) 

705 self.metadata.add('numTotalSolarSystemObjects', nTotalSsObjects) 

706 self.metadata.add('numAssociatedSsObjects', nAssociatedSsObjects) 

707 

708 def purgeDiaObjects(self, bbox, wcs, diaObjCat, buffer=0): 

709 """Drop diaObjects that are outside the exposure bounding box. 

710 

711 Parameters 

712 ---------- 

713 bbox : `lsst.geom.Box2I` 

714 Bounding box of the exposure. 

715 wcs : `lsst.afw.geom.SkyWcs` 

716 Coordinate system definition (wcs) for the exposure. 

717 diaObjCat : `pandas.DataFrame` 

718 DiaObjects loaded from the Apdb. 

719 buffer : `int`, optional 

720 Width, in pixels, to pad the exposure bounding box. 

721 

722 Returns 

723 ------- 

724 diaObjCat : `pandas.DataFrame` 

725 DiaObjects loaded from the Apdb, restricted to the exposure 

726 bounding box. 

727 """ 

728 try: 

729 bbox.grow(buffer) 

730 raVals = diaObjCat.ra.to_numpy() 

731 decVals = diaObjCat.dec.to_numpy() 

732 xVals, yVals = wcs.skyToPixelArray(raVals, decVals, degrees=True) 

733 selector = bbox.contains(xVals, yVals) 

734 nPurged = np.sum(~selector) 

735 if nPurged > 0: 

736 diaObjCat = diaObjCat[selector].copy() 

737 self.log.info(f"Dropped {nPurged} diaObjects that were outside the bbox " 

738 f"leaving {len(diaObjCat)} in the catalog") 

739 except Exception as e: 

740 self.log.warning("Error attempting to check diaObject history: %s", e) 

741 return diaObjCat