Coverage for python/lsst/ap/association/diaPipe.py: 23%

195 statements  

« prev     ^ index     » next       coverage.py v7.5.1, created at 2024-05-10 10:38 +0000

1# 

2# LSST Data Management System 

3# Copyright 2008-2016 AURA/LSST. 

4# 

5# This product includes software developed by the 

6# LSST Project (http://www.lsst.org/). 

7# 

8# This program is free software: you can redistribute it and/or modify 

9# it under the terms of the GNU General Public License as published by 

10# the Free Software Foundation, either version 3 of the License, or 

11# (at your option) any later version. 

12# 

13# This program is distributed in the hope that it will be useful, 

14# but WITHOUT ANY WARRANTY; without even the implied warranty of 

15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

16# GNU General Public License for more details. 

17# 

18# You should have received a copy of the LSST License Statement and 

19# the GNU General Public License along with this program. If not, 

20# see <https://www.lsstcorp.org/LegalNotices/>. 

21# 

22 

23"""PipelineTask for associating DiaSources with previous DiaObjects. 

24 

25Additionally performs forced photometry on the calibrated and difference 

26images at the updated locations of DiaObjects. 

27 

28Currently loads directly from the Apdb rather than pre-loading. 

29""" 

30 

31__all__ = ("DiaPipelineConfig", 

32 "DiaPipelineTask", 

33 "DiaPipelineConnections") 

34 

35 

36import warnings 

37 

38import lsst.dax.apdb as daxApdb 

39import lsst.pex.config as pexConfig 

40import lsst.pipe.base as pipeBase 

41import lsst.pipe.base.connectionTypes as connTypes 

42import numpy as np 

43import pandas as pd 

44from lsst.ap.association import ( 

45 AssociationTask, 

46 DiaForcedSourceTask, 

47 LoadDiaCatalogsTask, 

48 PackageAlertsTask) 

49from lsst.ap.association.ssoAssociation import SolarSystemAssociationTask 

50from lsst.daf.base import DateTime 

51from lsst.meas.base import DetectorVisitIdGeneratorConfig, \ 

52 DiaObjectCalculationTask 

53from lsst.utils.timer import timeMethod 

54 

55 

56class DiaPipelineConnections( 

57 pipeBase.PipelineTaskConnections, 

58 dimensions=("instrument", "visit", "detector"), 

59 defaultTemplates={"coaddName": "deep", "fakesType": ""}): 

60 """Butler connections for DiaPipelineTask. 

61 """ 

62 diaSourceTable = connTypes.Input( 

63 doc="Catalog of calibrated DiaSources.", 

64 name="{fakesType}{coaddName}Diff_diaSrcTable", 

65 storageClass="DataFrame", 

66 dimensions=("instrument", "visit", "detector"), 

67 ) 

68 solarSystemObjectTable = connTypes.Input( 

69 doc="Catalog of SolarSolarSystem objects expected to be observable in " 

70 "this detectorVisit.", 

71 name="visitSsObjects", 

72 storageClass="DataFrame", 

73 dimensions=("instrument", "visit"), 

74 ) 

75 diffIm = connTypes.Input( 

76 doc="Difference image on which the DiaSources were detected.", 

77 name="{fakesType}{coaddName}Diff_differenceExp", 

78 storageClass="ExposureF", 

79 dimensions=("instrument", "visit", "detector"), 

80 ) 

81 exposure = connTypes.Input( 

82 doc="Calibrated exposure differenced with a template image during " 

83 "image differencing.", 

84 name="{fakesType}calexp", 

85 storageClass="ExposureF", 

86 dimensions=("instrument", "visit", "detector"), 

87 ) 

88 template = connTypes.Input( 

89 doc="Warped template used to create `subtractedExposure`. Not PSF " 

90 "matched.", 

91 dimensions=("instrument", "visit", "detector"), 

92 storageClass="ExposureF", 

93 name="{fakesType}{coaddName}Diff_templateExp", 

94 ) 

95 apdbMarker = connTypes.Output( 

96 doc="Marker dataset storing the configuration of the Apdb for each " 

97 "visit/detector. Used to signal the completion of the pipeline.", 

98 name="apdb_marker", 

99 storageClass="Config", 

100 dimensions=("instrument", "visit", "detector"), 

101 ) 

102 associatedDiaSources = connTypes.Output( 

103 doc="Optional output storing the DiaSource catalog after matching, " 

104 "calibration, and standardization for insertion into the Apdb.", 

105 name="{fakesType}{coaddName}Diff_assocDiaSrc", 

106 storageClass="DataFrame", 

107 dimensions=("instrument", "visit", "detector"), 

108 ) 

109 diaForcedSources = connTypes.Output( 

110 doc="Optional output storing the forced sources computed at the diaObject positions.", 

111 name="{fakesType}{coaddName}Diff_diaForcedSrc", 

112 storageClass="DataFrame", 

113 dimensions=("instrument", "visit", "detector"), 

114 ) 

115 diaObjects = connTypes.Output( 

116 doc="Optional output storing the updated diaObjects associated to these sources.", 

117 name="{fakesType}{coaddName}Diff_diaObject", 

118 storageClass="DataFrame", 

119 dimensions=("instrument", "visit", "detector"), 

120 ) 

121 

122 def __init__(self, *, config=None): 

123 super().__init__(config=config) 

124 

125 if not config.doWriteAssociatedSources: 

126 self.outputs.remove("associatedDiaSources") 

127 self.outputs.remove("diaForcedSources") 

128 self.outputs.remove("diaObjects") 

129 elif not config.doRunForcedMeasurement: 

130 self.outputs.remove("diaForcedSources") 

131 if not config.doSolarSystemAssociation: 

132 self.inputs.remove("solarSystemObjectTable") 

133 

134 def adjustQuantum(self, inputs, outputs, label, dataId): 

135 """Override to make adjustments to `lsst.daf.butler.DatasetRef` objects 

136 in the `lsst.daf.butler.core.Quantum` during the graph generation stage 

137 of the activator. 

138 

139 This implementation checks to make sure that the filters in the dataset 

140 are compatible with AP processing as set by the Apdb/DPDD schema. 

141 

142 Parameters 

143 ---------- 

144 inputs : `dict` 

145 Dictionary whose keys are an input (regular or prerequisite) 

146 connection name and whose values are a tuple of the connection 

147 instance and a collection of associated `DatasetRef` objects. 

148 The exact type of the nested collections is unspecified; it can be 

149 assumed to be multi-pass iterable and support `len` and ``in``, but 

150 it should not be mutated in place. In contrast, the outer 

151 dictionaries are guaranteed to be temporary copies that are true 

152 `dict` instances, and hence may be modified and even returned; this 

153 is especially useful for delegating to `super` (see notes below). 

154 outputs : `dict` 

155 Dict of output datasets, with the same structure as ``inputs``. 

156 label : `str` 

157 Label for this task in the pipeline (should be used in all 

158 diagnostic messages). 

159 data_id : `lsst.daf.butler.DataCoordinate` 

160 Data ID for this quantum in the pipeline (should be used in all 

161 diagnostic messages). 

162 

163 Returns 

164 ------- 

165 adjusted_inputs : `dict` 

166 Dict of the same form as ``inputs`` with updated containers of 

167 input `DatasetRef` objects. Connections that are not changed 

168 should not be returned at all. Datasets may only be removed, not 

169 added. Nested collections may be of any multi-pass iterable type, 

170 and the order of iteration will set the order of iteration within 

171 `PipelineTask.runQuantum`. 

172 adjusted_outputs : `dict` 

173 Dict of updated output datasets, with the same structure and 

174 interpretation as ``adjusted_inputs``. 

175 

176 Raises 

177 ------ 

178 ScalarError 

179 Raised if any `Input` or `PrerequisiteInput` connection has 

180 ``multiple`` set to `False`, but multiple datasets. 

181 NoWorkFound 

182 Raised to indicate that this quantum should not be run; not enough 

183 datasets were found for a regular `Input` connection, and the 

184 quantum should be pruned or skipped. 

185 FileNotFoundError 

186 Raised to cause QuantumGraph generation to fail (with the message 

187 included in this exception); not enough datasets were found for a 

188 `PrerequisiteInput` connection. 

189 """ 

190 _, refs = inputs["diffIm"] 

191 for ref in refs: 

192 if ref.dataId["band"] not in self.config.validBands: 

193 raise ValueError( 

194 f"Requested '{ref.dataId['band']}' not in " 

195 "DiaPipelineConfig.validBands. To process bands not in " 

196 "the standard Rubin set (ugrizy) you must add the band to " 

197 "the validBands list in DiaPipelineConfig and add the " 

198 "appropriate columns to the Apdb schema.") 

199 return super().adjustQuantum(inputs, outputs, label, dataId) 

200 

201 

202class DiaPipelineConfig(pipeBase.PipelineTaskConfig, 

203 pipelineConnections=DiaPipelineConnections): 

204 """Config for DiaPipelineTask. 

205 """ 

206 coaddName = pexConfig.Field( 

207 doc="coadd name: typically one of deep, goodSeeing, or dcr", 

208 dtype=str, 

209 default="deep", 

210 ) 

211 apdb = pexConfig.ConfigurableField( # TODO: remove on DM-43419 

212 target=daxApdb.ApdbSql, 

213 doc="Database connection for storing associated DiaSources and " 

214 "DiaObjects. Must already be initialized.", 

215 deprecated="This field has been replaced by ``apdb_config_url``; set " 

216 "``doConfigureApdb=False`` to use it. Will be removed after v28.", 

217 ) 

218 apdb_config_url = pexConfig.Field( 

219 dtype=str, 

220 default=None, 

221 optional=False, 

222 doc="A config file specifying the APDB and its connection parameters, " 

223 "typically written by the apdb-cli command-line utility. " 

224 "The database must already be initialized.", 

225 ) 

226 validBands = pexConfig.ListField( 

227 dtype=str, 

228 default=["u", "g", "r", "i", "z", "y"], 

229 doc="List of bands that are valid for AP processing. To process a " 

230 "band not on this list, the appropriate band specific columns " 

231 "must be added to the Apdb schema in dax_apdb.", 

232 ) 

233 diaCatalogLoader = pexConfig.ConfigurableField( 

234 target=LoadDiaCatalogsTask, 

235 doc="Task to load DiaObjects and DiaSources from the Apdb.", 

236 ) 

237 associator = pexConfig.ConfigurableField( 

238 target=AssociationTask, 

239 doc="Task used to associate DiaSources with DiaObjects.", 

240 ) 

241 doSolarSystemAssociation = pexConfig.Field( 

242 dtype=bool, 

243 default=False, 

244 doc="Process SolarSystem objects through the pipeline.", 

245 ) 

246 solarSystemAssociator = pexConfig.ConfigurableField( 

247 target=SolarSystemAssociationTask, 

248 doc="Task used to associate DiaSources with SolarSystemObjects.", 

249 ) 

250 diaCalculation = pexConfig.ConfigurableField( 

251 target=DiaObjectCalculationTask, 

252 doc="Task to compute summary statistics for DiaObjects.", 

253 ) 

254 doLoadForcedSources = pexConfig.Field( 

255 dtype=bool, 

256 default=True, 

257 deprecated="Added to allow disabling forced sources for performance " 

258 "reasons during the ops rehearsal. " 

259 "It is expected to be removed.", 

260 doc="Load forced DiaSource history from the APDB? " 

261 "This should only be turned off for debugging purposes.", 

262 ) 

263 doRunForcedMeasurement = pexConfig.Field( 

264 dtype=bool, 

265 default=True, 

266 deprecated="Added to allow disabling forced sources for performance " 

267 "reasons during the ops rehearsal. " 

268 "It is expected to be removed.", 

269 doc="Run forced measurement on all of the diaObjects? " 

270 "This should only be turned off for debugging purposes.", 

271 ) 

272 diaForcedSource = pexConfig.ConfigurableField( 

273 target=DiaForcedSourceTask, 

274 doc="Task used for force photometer DiaObject locations in direct and " 

275 "difference images.", 

276 ) 

277 alertPackager = pexConfig.ConfigurableField( 

278 target=PackageAlertsTask, 

279 doc="Subtask for packaging Ap data into alerts.", 

280 ) 

281 doPackageAlerts = pexConfig.Field( 

282 dtype=bool, 

283 default=False, 

284 doc="Package Dia-data into serialized alerts for distribution and " 

285 "write them to disk.", 

286 ) 

287 doWriteAssociatedSources = pexConfig.Field( 

288 dtype=bool, 

289 default=True, 

290 doc="Write out associated DiaSources, DiaForcedSources, and DiaObjects, " 

291 "formatted following the Science Data Model.", 

292 ) 

293 imagePixelMargin = pexConfig.RangeField( 

294 dtype=int, 

295 default=10, 

296 min=0, 

297 doc="Pad the image by this many pixels before removing off-image " 

298 "diaObjects for association.", 

299 ) 

300 idGenerator = DetectorVisitIdGeneratorConfig.make_field() 

301 doConfigureApdb = pexConfig.Field( # TODO: remove on DM-43419 

302 dtype=bool, 

303 default=True, 

304 doc="Use the deprecated ``apdb`` sub-config to set up the APDB, " 

305 "instead of the new config (``apdb_config_url``). This field is " 

306 "provided for backward-compatibility ONLY and will be removed " 

307 "without notice after v28.", 

308 ) 

309 

310 def setDefaults(self): 

311 self.apdb.dia_object_index = "baseline" 

312 self.apdb.dia_object_columns = [] 

313 self.diaCalculation.plugins = ["ap_meanPosition", 

314 "ap_nDiaSources", 

315 "ap_meanFlux", 

316 "ap_percentileFlux", 

317 "ap_sigmaFlux", 

318 "ap_chi2Flux", 

319 "ap_madFlux", 

320 "ap_skewFlux", 

321 "ap_minMaxFlux", 

322 "ap_maxSlopeFlux", 

323 "ap_meanErrFlux", 

324 "ap_linearFit", 

325 "ap_stetsonJ", 

326 "ap_meanTotFlux", 

327 "ap_sigmaTotFlux"] 

328 

329 # TODO: remove on DM-43419 

330 def validate(self): 

331 # Sidestep Config.validate to avoid validating uninitialized fields we're not using. 

332 skip = {"apdb_config_url"} if self.doConfigureApdb else {"apdb"} 

333 for name, field in self._fields.items(): 

334 if name not in skip: 

335 field.validate(self) 

336 

337 # It's possible to use apdb without setting it, bypassing the deprecation warning. 

338 if self.doConfigureApdb: 

339 warnings.warn("Config field DiaPipelineConfig.apdb is deprecated: " 

340 # Workaround for DM-44051 

341 "This field has been replaced by ``apdb_config_url``; set " 

342 "``doConfigureApdb=False`` to use it. Will be removed after v28.", 

343 FutureWarning) 

344 

345 

346class DiaPipelineTask(pipeBase.PipelineTask): 

347 """Task for loading, associating and storing Difference Image Analysis 

348 (DIA) Objects and Sources. 

349 """ 

350 ConfigClass = DiaPipelineConfig 

351 _DefaultName = "diaPipe" 

352 

353 def __init__(self, initInputs=None, **kwargs): 

354 super().__init__(**kwargs) 

355 if self.config.doConfigureApdb: 

356 self.apdb = self.config.apdb.apply() 

357 else: 

358 self.apdb = daxApdb.Apdb.from_uri(self.config.apdb_config_url) 

359 self.makeSubtask("diaCatalogLoader") 

360 self.makeSubtask("associator") 

361 self.makeSubtask("diaCalculation") 

362 if self.config.doRunForcedMeasurement: 

363 self.makeSubtask("diaForcedSource") 

364 if self.config.doPackageAlerts: 

365 self.makeSubtask("alertPackager") 

366 if self.config.doSolarSystemAssociation: 

367 self.makeSubtask("solarSystemAssociator") 

368 

369 def runQuantum(self, butlerQC, inputRefs, outputRefs): 

370 inputs = butlerQC.get(inputRefs) 

371 inputs["idGenerator"] = self.config.idGenerator.apply(butlerQC.quantum.dataId) 

372 inputs["band"] = butlerQC.quantum.dataId["band"] 

373 if not self.config.doSolarSystemAssociation: 

374 inputs["solarSystemObjectTable"] = None 

375 

376 outputs = self.run(**inputs) 

377 

378 butlerQC.put(outputs, outputRefs) 

379 

380 @timeMethod 

381 def run(self, 

382 diaSourceTable, 

383 solarSystemObjectTable, 

384 diffIm, 

385 exposure, 

386 template, 

387 band, 

388 idGenerator): 

389 """Process DiaSources and DiaObjects. 

390 

391 Load previous DiaObjects and their DiaSource history. Calibrate the 

392 values in the diaSourceCat. Associate new DiaSources with previous 

393 DiaObjects. Run forced photometry at the updated DiaObject locations. 

394 Store the results in the Alert Production Database (Apdb). 

395 

396 Parameters 

397 ---------- 

398 diaSourceTable : `pandas.DataFrame` 

399 Newly detected DiaSources. 

400 diffIm : `lsst.afw.image.ExposureF` 

401 Difference image exposure in which the sources in ``diaSourceCat`` 

402 were detected. 

403 exposure : `lsst.afw.image.ExposureF` 

404 Calibrated exposure differenced with a template to create 

405 ``diffIm``. 

406 template : `lsst.afw.image.ExposureF` 

407 Template exposure used to create diffIm. 

408 band : `str` 

409 The band in which the new DiaSources were detected. 

410 idGenerator : `lsst.meas.base.IdGenerator` 

411 Object that generates source IDs and random number generator seeds. 

412 

413 Returns 

414 ------- 

415 results : `lsst.pipe.base.Struct` 

416 Results struct with components. 

417 

418 - ``apdbMaker`` : Marker dataset to store in the Butler indicating 

419 that this ccdVisit has completed successfully. 

420 (`lsst.dax.apdb.ApdbConfig`) 

421 - ``associatedDiaSources`` : Catalog of newly associated 

422 DiaSources. (`pandas.DataFrame`) 

423 """ 

424 # Load the DiaObjects and DiaSource history. 

425 loaderResult = self.diaCatalogLoader.run(diffIm, self.apdb, 

426 doLoadForcedSources=self.config.doLoadForcedSources) 

427 if len(loaderResult.diaObjects) > 0: 

428 diaObjects = self.purgeDiaObjects(diffIm.getBBox(), diffIm.getWcs(), loaderResult.diaObjects, 

429 buffer=self.config.imagePixelMargin) 

430 else: 

431 diaObjects = loaderResult.diaObjects 

432 # Associate new DiaSources with existing DiaObjects. 

433 assocResults = self.associator.run(diaSourceTable, diaObjects) 

434 

435 if self.config.doSolarSystemAssociation: 

436 ssoAssocResult = self.solarSystemAssociator.run( 

437 assocResults.unAssocDiaSources, 

438 solarSystemObjectTable, 

439 diffIm) 

440 createResults = self.createNewDiaObjects( 

441 ssoAssocResult.unAssocDiaSources) 

442 toAssociate = [] 

443 if len(assocResults.matchedDiaSources) > 0: 

444 toAssociate.append(assocResults.matchedDiaSources) 

445 if len(ssoAssocResult.ssoAssocDiaSources) > 0: 

446 toAssociate.append(ssoAssocResult.ssoAssocDiaSources) 

447 toAssociate.append(createResults.diaSources) 

448 associatedDiaSources = pd.concat(toAssociate) 

449 nTotalSsObjects = ssoAssocResult.nTotalSsObjects 

450 nAssociatedSsObjects = ssoAssocResult.nAssociatedSsObjects 

451 else: 

452 createResults = self.createNewDiaObjects( 

453 assocResults.unAssocDiaSources) 

454 toAssociate = [] 

455 if len(assocResults.matchedDiaSources) > 0: 

456 toAssociate.append(assocResults.matchedDiaSources) 

457 toAssociate.append(createResults.diaSources) 

458 associatedDiaSources = pd.concat(toAssociate) 

459 nTotalSsObjects = 0 

460 nAssociatedSsObjects = 0 

461 

462 # Create new DiaObjects from unassociated diaSources. 

463 self._add_association_meta_data(assocResults.nUpdatedDiaObjects, 

464 assocResults.nUnassociatedDiaObjects, 

465 createResults.nNewDiaObjects, 

466 nTotalSsObjects, 

467 nAssociatedSsObjects) 

468 # Index the DiaSource catalog for this visit after all associations 

469 # have been made. 

470 updatedDiaObjectIds = associatedDiaSources["diaObjectId"][ 

471 associatedDiaSources["diaObjectId"] != 0].to_numpy() 

472 associatedDiaSources.set_index(["diaObjectId", 

473 "band", 

474 "diaSourceId"], 

475 drop=False, 

476 inplace=True) 

477 

478 # Append new DiaObjects and DiaSources to their previous history. 

479 diaObjects = pd.concat( 

480 [diaObjects, 

481 createResults.newDiaObjects.set_index("diaObjectId", drop=False)], 

482 sort=True) 

483 if self.testDataFrameIndex(diaObjects): 

484 raise RuntimeError( 

485 "Duplicate DiaObjects created after association. This is " 

486 "likely due to re-running data with an already populated " 

487 "Apdb. If this was not the case then there was an unexpected " 

488 "failure in Association while matching and creating new " 

489 "DiaObjects and should be reported. Exiting.") 

490 

491 if len(loaderResult.diaSources) > 0: 

492 # We need to coerce the types of loaderResult.diaSources 

493 # to be the same as associatedDiaSources, thanks to pandas 

494 # datetime issues (DM-41100). And we may as well coerce 

495 # all the columns to ensure consistency for future compatibility. 

496 for name, dtype in associatedDiaSources.dtypes.items(): 

497 if name in loaderResult.diaSources.columns and loaderResult.diaSources[name].dtype != dtype: 

498 self.log.debug( 

499 "Coercing loaderResult.diaSources column %s from %s to %s", 

500 name, 

501 str(loaderResult.diaSources[name].dtype), 

502 str(dtype), 

503 ) 

504 loaderResult.diaSources[name] = loaderResult.diaSources[name].astype(dtype) 

505 

506 mergedDiaSourceHistory = pd.concat( 

507 [loaderResult.diaSources, associatedDiaSources], 

508 sort=True) 

509 else: 

510 mergedDiaSourceHistory = pd.concat([associatedDiaSources], sort=True) 

511 

512 # Test for DiaSource duplication first. If duplicates are found, 

513 # this likely means this is duplicate data being processed and sent 

514 # to the Apdb. 

515 if self.testDataFrameIndex(mergedDiaSourceHistory): 

516 raise RuntimeError( 

517 "Duplicate DiaSources found after association and merging " 

518 "with history. This is likely due to re-running data with an " 

519 "already populated Apdb. If this was not the case then there " 

520 "was an unexpected failure in Association while matching " 

521 "sources to objects, and should be reported. Exiting.") 

522 

523 # Compute DiaObject Summary statistics from their full DiaSource 

524 # history. 

525 diaCalResult = self.diaCalculation.run( 

526 diaObjects, 

527 mergedDiaSourceHistory, 

528 updatedDiaObjectIds, 

529 [band]) 

530 # Test for duplication in the updated DiaObjects. 

531 if self.testDataFrameIndex(diaCalResult.diaObjectCat): 

532 raise RuntimeError( 

533 "Duplicate DiaObjects (loaded + updated) created after " 

534 "DiaCalculation. This is unexpected behavior and should be " 

535 "reported. Exiting.") 

536 if self.testDataFrameIndex(diaCalResult.updatedDiaObjects): 

537 raise RuntimeError( 

538 "Duplicate DiaObjects (updated) created after " 

539 "DiaCalculation. This is unexpected behavior and should be " 

540 "reported. Exiting.") 

541 

542 if self.config.doRunForcedMeasurement: 

543 # Force photometer on the Difference and Calibrated exposures using 

544 # the new and updated DiaObject locations. 

545 diaForcedSources = self.diaForcedSource.run( 

546 diaCalResult.diaObjectCat, 

547 diaCalResult.updatedDiaObjects.loc[:, "diaObjectId"].to_numpy(), 

548 exposure, 

549 diffIm, 

550 idGenerator=idGenerator) 

551 else: 

552 # alertPackager needs correct columns 

553 diaForcedSources = pd.DataFrame(columns=[ 

554 "diaForcedSourceId", "diaObjectID", "ccdVisitID", "psfFlux", "psfFluxErr", 

555 "x", "y", "midpointMjdTai", "band", 

556 ]) 

557 

558 # Store DiaSources, updated DiaObjects, and DiaForcedSources in the 

559 # Apdb. 

560 self.apdb.store( 

561 DateTime.now().toAstropy(), 

562 diaCalResult.updatedDiaObjects, 

563 associatedDiaSources, 

564 diaForcedSources) 

565 

566 if self.config.doPackageAlerts: 

567 if len(loaderResult.diaForcedSources) > 1: 

568 # We need to coerce the types of loaderResult.diaForcedSources 

569 # to be the same as associatedDiaSources, thanks to pandas 

570 # datetime issues (DM-41100). And we may as well coerce 

571 # all the columns to ensure consistency for future compatibility. 

572 for name, dtype in diaForcedSources.dtypes.items(): 

573 if name in loaderResult.diaForcedSources.columns and \ 

574 loaderResult.diaForcedSources[name].dtype != dtype: 

575 self.log.debug( 

576 "Coercing loaderResult.diaForcedSources column %s from %s to %s", 

577 name, 

578 str(loaderResult.diaForcedSources[name].dtype), 

579 str(dtype), 

580 ) 

581 loaderResult.diaForcedSources[name] = ( 

582 loaderResult.diaForcedSources[name].astype(dtype) 

583 ) 

584 diaForcedSources = pd.concat( 

585 [diaForcedSources, loaderResult.diaForcedSources], 

586 sort=True) 

587 if self.testDataFrameIndex(diaForcedSources): 

588 self.log.warning( 

589 "Duplicate DiaForcedSources created after merge with " 

590 "history and new sources. This may cause downstream " 

591 "problems. Dropping duplicates.") 

592 # Drop duplicates via index and keep the first appearance. 

593 # Reset due to the index shape being slight different than 

594 # expected. 

595 diaForcedSources = diaForcedSources.groupby( 

596 diaForcedSources.index).first() 

597 diaForcedSources.reset_index(drop=True, inplace=True) 

598 diaForcedSources.set_index( 

599 ["diaObjectId", "diaForcedSourceId"], 

600 drop=False, 

601 inplace=True) 

602 self.alertPackager.run(associatedDiaSources, 

603 diaCalResult.diaObjectCat, 

604 loaderResult.diaSources, 

605 diaForcedSources, 

606 diffIm, 

607 exposure, 

608 template, 

609 doRunForcedMeasurement=self.config.doRunForcedMeasurement, 

610 ) 

611 

612 # For historical reasons, apdbMarker is a Config even if it's not meant to be read. 

613 # A default Config is the cheapest way to satisfy the storage class. 

614 marker = self.config.apdb.value if self.config.doConfigureApdb else pexConfig.Config() 

615 return pipeBase.Struct(apdbMarker=marker, 

616 associatedDiaSources=associatedDiaSources, 

617 diaForcedSources=diaForcedSources, 

618 diaObjects=diaObjects, 

619 ) 

620 

621 def createNewDiaObjects(self, unAssocDiaSources): 

622 """Loop through the set of DiaSources and create new DiaObjects 

623 for unassociated DiaSources. 

624 

625 Parameters 

626 ---------- 

627 unAssocDiaSources : `pandas.DataFrame` 

628 Set of DiaSources to create new DiaObjects from. 

629 

630 Returns 

631 ------- 

632 results : `lsst.pipe.base.Struct` 

633 Results struct containing: 

634 

635 - ``diaSources`` : DiaSource catalog with updated DiaObject ids. 

636 (`pandas.DataFrame`) 

637 - ``newDiaObjects`` : Newly created DiaObjects from the 

638 unassociated DiaSources. (`pandas.DataFrame`) 

639 - ``nNewDiaObjects`` : Number of newly created diaObjects.(`int`) 

640 """ 

641 if len(unAssocDiaSources) == 0: 

642 tmpObj = self._initialize_dia_object(0) 

643 newDiaObjects = pd.DataFrame(data=[], 

644 columns=tmpObj.keys()) 

645 else: 

646 newDiaObjects = unAssocDiaSources["diaSourceId"].apply( 

647 self._initialize_dia_object) 

648 unAssocDiaSources["diaObjectId"] = unAssocDiaSources["diaSourceId"] 

649 return pipeBase.Struct(diaSources=unAssocDiaSources, 

650 newDiaObjects=newDiaObjects, 

651 nNewDiaObjects=len(newDiaObjects)) 

652 

653 def _initialize_dia_object(self, objId): 

654 """Create a new DiaObject with values required to be initialized by the 

655 Ppdb. 

656 

657 Parameters 

658 ---------- 

659 objid : `int` 

660 ``diaObjectId`` value for the of the new DiaObject. 

661 

662 Returns 

663 ------- 

664 diaObject : `dict` 

665 Newly created DiaObject with keys: 

666 

667 ``diaObjectId`` 

668 Unique DiaObjectId (`int`). 

669 ``pmParallaxNdata`` 

670 Number of data points used for parallax calculation (`int`). 

671 ``nearbyObj1`` 

672 Id of the a nearbyObject in the Object table (`int`). 

673 ``nearbyObj2`` 

674 Id of the a nearbyObject in the Object table (`int`). 

675 ``nearbyObj3`` 

676 Id of the a nearbyObject in the Object table (`int`). 

677 ``?_psfFluxNdata`` 

678 Number of data points used to calculate point source flux 

679 summary statistics in each bandpass (`int`). 

680 """ 

681 new_dia_object = {"diaObjectId": objId, 

682 "pmParallaxNdata": 0, 

683 "nearbyObj1": 0, 

684 "nearbyObj2": 0, 

685 "nearbyObj3": 0} 

686 for f in ["u", "g", "r", "i", "z", "y"]: 

687 new_dia_object["%s_psfFluxNdata" % f] = 0 

688 return pd.Series(data=new_dia_object) 

689 

690 def testDataFrameIndex(self, df): 

691 """Test the sorted DataFrame index for duplicates. 

692 

693 Wrapped as a separate function to allow for mocking of the this task 

694 in unittesting. Default of a mock return for this test is True. 

695 

696 Parameters 

697 ---------- 

698 df : `pandas.DataFrame` 

699 DataFrame to text. 

700 

701 Returns 

702 ------- 

703 `bool` 

704 True if DataFrame contains duplicate rows. 

705 """ 

706 return df.index.has_duplicates 

707 

708 def _add_association_meta_data(self, 

709 nUpdatedDiaObjects, 

710 nUnassociatedDiaObjects, 

711 nNewDiaObjects, 

712 nTotalSsObjects, 

713 nAssociatedSsObjects): 

714 """Store summaries of the association step in the task metadata. 

715 

716 Parameters 

717 ---------- 

718 nUpdatedDiaObjects : `int` 

719 Number of previous DiaObjects associated and updated in this 

720 ccdVisit. 

721 nUnassociatedDiaObjects : `int` 

722 Number of previous DiaObjects that were not associated or updated 

723 in this ccdVisit. 

724 nNewDiaObjects : `int` 

725 Number of newly created DiaObjects for this ccdVisit. 

726 nTotalSsObjects : `int` 

727 Number of SolarSystemObjects within the observable detector 

728 area. 

729 nAssociatedSsObjects : `int` 

730 Number of successfully associated SolarSystemObjects. 

731 """ 

732 self.metadata.add('numUpdatedDiaObjects', nUpdatedDiaObjects) 

733 self.metadata.add('numUnassociatedDiaObjects', nUnassociatedDiaObjects) 

734 self.metadata.add('numNewDiaObjects', nNewDiaObjects) 

735 self.metadata.add('numTotalSolarSystemObjects', nTotalSsObjects) 

736 self.metadata.add('numAssociatedSsObjects', nAssociatedSsObjects) 

737 

738 def purgeDiaObjects(self, bbox, wcs, diaObjCat, buffer=0): 

739 """Drop diaObjects that are outside the exposure bounding box. 

740 

741 Parameters 

742 ---------- 

743 bbox : `lsst.geom.Box2I` 

744 Bounding box of the exposure. 

745 wcs : `lsst.afw.geom.SkyWcs` 

746 Coordinate system definition (wcs) for the exposure. 

747 diaObjCat : `pandas.DataFrame` 

748 DiaObjects loaded from the Apdb. 

749 buffer : `int`, optional 

750 Width, in pixels, to pad the exposure bounding box. 

751 

752 Returns 

753 ------- 

754 diaObjCat : `pandas.DataFrame` 

755 DiaObjects loaded from the Apdb, restricted to the exposure 

756 bounding box. 

757 """ 

758 try: 

759 bbox.grow(buffer) 

760 raVals = diaObjCat.ra.to_numpy() 

761 decVals = diaObjCat.dec.to_numpy() 

762 xVals, yVals = wcs.skyToPixelArray(raVals, decVals, degrees=True) 

763 selector = bbox.contains(xVals, yVals) 

764 nPurged = np.sum(~selector) 

765 if nPurged > 0: 

766 diaObjCat = diaObjCat[selector].copy() 

767 self.log.info(f"Dropped {nPurged} diaObjects that were outside the bbox " 

768 f"leaving {len(diaObjCat)} in the catalog") 

769 except Exception as e: 

770 self.log.warning("Error attempting to check diaObject history: %s", e) 

771 return diaObjCat