Coverage for python/lsst/ap/association/diaPipe.py : 28%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#
2# LSST Data Management System
3# Copyright 2008-2016 AURA/LSST.
4#
5# This product includes software developed by the
6# LSST Project (http://www.lsst.org/).
7#
8# This program is free software: you can redistribute it and/or modify
9# it under the terms of the GNU General Public License as published by
10# the Free Software Foundation, either version 3 of the License, or
11# (at your option) any later version.
12#
13# This program is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the LSST License Statement and
19# the GNU General Public License along with this program. If not,
20# see <https://www.lsstcorp.org/LegalNotices/>.
21#
23"""PipelineTask for associating DiaSources with previous DiaObjects.
25Additionally performs forced photometry on the calibrated and difference
26images at the updated locations of DiaObjects.
28Currently loads directly from the Apdb rather than pre-loading.
29"""
31import os
32import pandas as pd
34import lsst.dax.apdb as daxApdb
35from lsst.meas.base import DiaObjectCalculationTask
36import lsst.pex.config as pexConfig
37import lsst.pipe.base as pipeBase
38import lsst.pipe.base.connectionTypes as connTypes
40from lsst.ap.association import (
41 AssociationTask,
42 DiaForcedSourceTask,
43 LoadDiaCatalogsTask,
44 PackageAlertsTask)
45from lsst.ap.association.ssoAssociation import SolarSystemAssociationTask
47__all__ = ("DiaPipelineConfig",
48 "DiaPipelineTask",
49 "DiaPipelineConnections")
52class DiaPipelineConnections(
53 pipeBase.PipelineTaskConnections,
54 dimensions=("instrument", "visit", "detector"),
55 defaultTemplates={"coaddName": "deep", "fakesType": ""}):
56 """Butler connections for DiaPipelineTask.
57 """
58 diaSourceTable = connTypes.Input(
59 doc="Catalog of calibrated DiaSources.",
60 name="{fakesType}{coaddName}Diff_diaSrcTable",
61 storageClass="DataFrame",
62 dimensions=("instrument", "visit", "detector"),
63 )
64 diffIm = connTypes.Input(
65 doc="Difference image on which the DiaSources were detected.",
66 name="{fakesType}{coaddName}Diff_differenceExp",
67 storageClass="ExposureF",
68 dimensions=("instrument", "visit", "detector"),
69 )
70 exposure = connTypes.Input(
71 doc="Calibrated exposure differenced with a template image during "
72 "image differencing.",
73 name="calexp",
74 storageClass="ExposureF",
75 dimensions=("instrument", "visit", "detector"),
76 )
77 warpedExposure = connTypes.Input(
78 doc="Warped template used to create `subtractedExposure`. Not PSF "
79 "matched.",
80 dimensions=("instrument", "visit", "detector"),
81 storageClass="ExposureF",
82 name="{fakesType}{coaddName}Diff_warpedExp",
83 )
84 apdbMarker = connTypes.Output(
85 doc="Marker dataset storing the configuration of the Apdb for each "
86 "visit/detector. Used to signal the completion of the pipeline.",
87 name="apdb_marker",
88 storageClass="Config",
89 dimensions=("instrument", "visit", "detector"),
90 )
91 associatedDiaSources = connTypes.Output(
92 doc="Optional output storing the DiaSource catalog after matching, "
93 "calibration, and standardization for insertation into the Apdb.",
94 name="{fakesType}{coaddName}Diff_assocDiaSrc",
95 storageClass="DataFrame",
96 dimensions=("instrument", "visit", "detector"),
97 )
99 def __init__(self, *, config=None):
100 super().__init__(config=config)
102 if not config.doWriteAssociatedSources:
103 self.outputs.remove("associatedDiaSources")
105 def adjustQuantum(self, inputs, outputs, label, dataId):
106 """Override to make adjustments to `lsst.daf.butler.DatasetRef` objects
107 in the `lsst.daf.butler.core.Quantum` during the graph generation stage
108 of the activator.
110 This implementation checks to make sure that the filters in the dataset
111 are compatible with AP processing as set by the Apdb/DPDD schema.
113 Parameters
114 ----------
115 inputs : `dict`
116 Dictionary whose keys are an input (regular or prerequisite)
117 connection name and whose values are a tuple of the connection
118 instance and a collection of associated `DatasetRef` objects.
119 The exact type of the nested collections is unspecified; it can be
120 assumed to be multi-pass iterable and support `len` and ``in``, but
121 it should not be mutated in place. In contrast, the outer
122 dictionaries are guaranteed to be temporary copies that are true
123 `dict` instances, and hence may be modified and even returned; this
124 is especially useful for delegating to `super` (see notes below).
125 outputs : `dict`
126 Dict of output datasets, with the same structure as ``inputs``.
127 label : `str`
128 Label for this task in the pipeline (should be used in all
129 diagnostic messages).
130 data_id : `lsst.daf.butler.DataCoordinate`
131 Data ID for this quantum in the pipeline (should be used in all
132 diagnostic messages).
134 Returns
135 -------
136 adjusted_inputs : `dict`
137 Dict of the same form as ``inputs`` with updated containers of
138 input `DatasetRef` objects. Connections that are not changed
139 should not be returned at all. Datasets may only be removed, not
140 added. Nested collections may be of any multi-pass iterable type,
141 and the order of iteration will set the order of iteration within
142 `PipelineTask.runQuantum`.
143 adjusted_outputs : `dict`
144 Dict of updated output datasets, with the same structure and
145 interpretation as ``adjusted_inputs``.
147 Raises
148 ------
149 ScalarError
150 Raised if any `Input` or `PrerequisiteInput` connection has
151 ``multiple`` set to `False`, but multiple datasets.
152 NoWorkFound
153 Raised to indicate that this quantum should not be run; not enough
154 datasets were found for a regular `Input` connection, and the
155 quantum should be pruned or skipped.
156 FileNotFoundError
157 Raised to cause QuantumGraph generation to fail (with the message
158 included in this exception); not enough datasets were found for a
159 `PrerequisiteInput` connection.
160 """
161 _, refs = inputs["diffIm"]
162 for ref in refs:
163 if ref.dataId["band"] not in self.config.validBands:
164 raise ValueError(
165 f"Requested '{ref.dataId['band']}' not in "
166 "DiaPipelineConfig.validBands. To process bands not in "
167 "the standard Rubin set (ugrizy) you must add the band to "
168 "the validBands list in DiaPipelineConfig and add the "
169 "appropriate columns to the Apdb schema.")
170 return super().adjustQuantum(inputs, outputs, label, dataId)
173class DiaPipelineConfig(pipeBase.PipelineTaskConfig,
174 pipelineConnections=DiaPipelineConnections):
175 """Config for DiaPipelineTask.
176 """
177 coaddName = pexConfig.Field(
178 doc="coadd name: typically one of deep, goodSeeing, or dcr",
179 dtype=str,
180 default="deep",
181 )
182 apdb = daxApdb.ApdbSql.makeField(
183 doc="Database connection for storing associated DiaSources and "
184 "DiaObjects. Must already be initialized.",
185 )
186 validBands = pexConfig.ListField(
187 dtype=str,
188 default=["u", "g", "r", "i", "z", "y"],
189 doc="List of bands that are valid for AP processing. To process a "
190 "band not on this list, the appropriate band specific columns "
191 "must be added to the Apdb schema in dax_apdb.",
192 )
193 diaCatalogLoader = pexConfig.ConfigurableField(
194 target=LoadDiaCatalogsTask,
195 doc="Task to load DiaObjects and DiaSources from the Apdb.",
196 )
197 associator = pexConfig.ConfigurableField(
198 target=AssociationTask,
199 doc="Task used to associate DiaSources with DiaObjects.",
200 )
201 diaCalculation = pexConfig.ConfigurableField(
202 target=DiaObjectCalculationTask,
203 doc="Task to compute summary statistics for DiaObjects.",
204 )
205 diaForcedSource = pexConfig.ConfigurableField(
206 target=DiaForcedSourceTask,
207 doc="Task used for force photometer DiaObject locations in direct and "
208 "difference images.",
209 )
210 alertPackager = pexConfig.ConfigurableField(
211 target=PackageAlertsTask,
212 doc="Subtask for packaging Ap data into alerts.",
213 )
214 doPackageAlerts = pexConfig.Field(
215 dtype=bool,
216 default=False,
217 doc="Package Dia-data into serialized alerts for distribution and "
218 "write them to disk.",
219 )
220 doWriteAssociatedSources = pexConfig.Field(
221 dtype=bool,
222 default=False,
223 doc="Write out associated and SDMed DiaSources.",
224 )
226 def setDefaults(self):
227 self.apdb.dia_object_index = "baseline"
228 self.apdb.dia_object_columns = []
229 self.apdb.extra_schema_file = os.path.join(
230 "${AP_ASSOCIATION_DIR}",
231 "data",
232 "apdb-ap-pipe-schema-extra.yaml")
233 self.diaCalculation.plugins = ["ap_meanPosition",
234 "ap_HTMIndex",
235 "ap_nDiaSources",
236 "ap_diaObjectFlag",
237 "ap_meanFlux",
238 "ap_percentileFlux",
239 "ap_sigmaFlux",
240 "ap_chi2Flux",
241 "ap_madFlux",
242 "ap_skewFlux",
243 "ap_minMaxFlux",
244 "ap_maxSlopeFlux",
245 "ap_meanErrFlux",
246 "ap_linearFit",
247 "ap_stetsonJ",
248 "ap_meanTotFlux",
249 "ap_sigmaTotFlux"]
251 def validate(self):
252 pexConfig.Config.validate(self)
253 # TODO: this plugin is not useful, pixelization is handled by Apdb
254 if "ap_HTMIndex" not in self.diaCalculation.plugins:
255 raise ValueError("DiaPipe requires the ap_HTMIndex plugin "
256 "be enabled for proper insertion into the Apdb.")
259class DiaPipelineTask(pipeBase.PipelineTask):
260 """Task for loading, associating and storing Difference Image Analysis
261 (DIA) Objects and Sources.
262 """
263 ConfigClass = DiaPipelineConfig
264 _DefaultName = "diaPipe"
265 RunnerClass = pipeBase.ButlerInitializedTaskRunner
267 def __init__(self, initInputs=None, **kwargs):
268 super().__init__(**kwargs)
269 self.apdb = self.config.apdb.apply()
270 self.makeSubtask("diaCatalogLoader")
271 self.makeSubtask("associator")
272 self.makeSubtask("diaCalculation")
273 self.makeSubtask("diaForcedSource")
274 if self.config.doPackageAlerts:
275 self.makeSubtask("alertPackager")
277 def runQuantum(self, butlerQC, inputRefs, outputRefs):
278 inputs = butlerQC.get(inputRefs)
279 expId, expBits = butlerQC.quantum.dataId.pack("visit_detector",
280 returnMaxBits=True)
281 inputs["ccdExposureIdBits"] = expBits
282 inputs["band"] = butlerQC.quantum.dataId["band"]
284 outputs = self.run(**inputs)
286 butlerQC.put(outputs, outputRefs)
288 @pipeBase.timeMethod
289 def run(self,
290 diaSourceTable,
291 diffIm,
292 exposure,
293 warpedExposure,
294 ccdExposureIdBits,
295 band):
296 """Process DiaSources and DiaObjects.
298 Load previous DiaObjects and their DiaSource history. Calibrate the
299 values in the diaSourceCat. Associate new DiaSources with previous
300 DiaObjects. Run forced photometry at the updated DiaObject locations.
301 Store the results in the Alert Production Database (Apdb).
303 Parameters
304 ----------
305 diaSourceTable : `pandas.DataFrame`
306 Newly detected DiaSources.
307 diffIm : `lsst.afw.image.ExposureF`
308 Difference image exposure in which the sources in ``diaSourceCat``
309 were detected.
310 exposure : `lsst.afw.image.ExposureF`
311 Calibrated exposure differenced with a template to create
312 ``diffIm``.
313 warpedExposure : `lsst.afw.image.ExposureF`
314 Template exposure used to create diffIm.
315 ccdExposureIdBits : `int`
316 Number of bits used for a unique ``ccdVisitId``.
317 band : `str`
318 The band in which the new DiaSources were detected.
320 Returns
321 -------
322 results : `lsst.pipe.base.Struct`
323 Results struct with components.
325 - ``apdbMaker`` : Marker dataset to store in the Butler indicating
326 that this ccdVisit has completed successfully.
327 (`lsst.dax.apdb.ApdbConfig`)
328 - ``associatedDiaSources`` : Catalog of newly associated
329 DiaSources. (`pandas.DataFrame`)
330 """
331 # Load the DiaObjects and DiaSource history.
332 loaderResult = self.diaCatalogLoader.run(diffIm, self.apdb)
334 # Associate new DiaSources with existing DiaObjects.
335 assocResults = self.associator.run(diaSourceTable,
336 loaderResult.diaObjects)
338 # Create new DiaObjects from unassociated diaSources.
339 createResults = self.createNewDiaObjects(assocResults.diaSources)
340 self._add_association_meta_data(assocResults.nUpdatedDiaObjects,
341 assocResults.nUnassociatedDiaObjects,
342 len(createResults.newDiaObjects))
344 # Index the DiaSource catalog for this visit after all associations
345 # have been made.
346 updatedDiaObjectIds = createResults.diaSources["diaObjectId"][
347 createResults.diaSources["diaObjectId"] != 0].to_numpy()
348 diaSources = createResults.diaSources.set_index(["diaObjectId",
349 "filterName",
350 "diaSourceId"],
351 drop=False)
353 # Append new DiaObjects and DiaSources to their previous history.
354 diaObjects = loaderResult.diaObjects.append(
355 createResults.newDiaObjects.set_index("diaObjectId", drop=False),
356 sort=True)
357 if self.testDataFrameIndex(diaObjects):
358 raise RuntimeError(
359 "Duplicate DiaObjects created after association. This is "
360 "likely due to re-running data with an already populated "
361 "Apdb. If this was not the case then there was an unexpected "
362 "failure in Association while matching and creating new "
363 "DiaObjects and should be reported. Exiting.")
364 mergedDiaSourceHistory = loaderResult.diaSources.append(
365 diaSources,
366 sort=True)
367 # Test for DiaSource duplication first. If duplicates are found,
368 # this likely means this is duplicate data being processed and sent
369 # to the Apdb.
370 if self.testDataFrameIndex(mergedDiaSourceHistory):
371 raise RuntimeError(
372 "Duplicate DiaSources found after association and merging "
373 "with history. This is likely due to re-running data with an "
374 "already populated Apdb. If this was not the case then there "
375 "was an unexpected failure in Association while matching "
376 "sources to objects, and should be reported. Exiting.")
378 # Compute DiaObject Summary statistics from their full DiaSource
379 # history.
380 diaCalResult = self.diaCalculation.run(
381 diaObjects,
382 mergedDiaSourceHistory,
383 updatedDiaObjectIds,
384 [band])
385 # Test for duplication in the updated DiaObjects.
386 if self.testDataFrameIndex(diaCalResult.diaObjectCat):
387 raise RuntimeError(
388 "Duplicate DiaObjects (loaded + updated) created after "
389 "DiaCalculation. This is unexpected behavior and should be "
390 "reported. Existing.")
391 if self.testDataFrameIndex(diaCalResult.updatedDiaObjects):
392 raise RuntimeError(
393 "Duplicate DiaObjects (updated) created after "
394 "DiaCalculation. This is unexpected behavior and should be "
395 "reported. Existing.")
397 # Force photometer on the Difference and Calibrated exposures using
398 # the new and updated DiaObject locations.
399 diaForcedSources = self.diaForcedSource.run(
400 diaCalResult.diaObjectCat,
401 diaCalResult.updatedDiaObjects.loc[:, "diaObjectId"].to_numpy(),
402 ccdExposureIdBits,
403 exposure,
404 diffIm)
406 # Store DiaSources and updated DiaObjects in the Apdb.
407 self.apdb.store(
408 exposure.getInfo().getVisitInfo().getDate(),
409 diaCalResult.updatedDiaObjects,
410 diaSources,
411 diaForcedSources)
413 if self.config.doPackageAlerts:
414 if len(loaderResult.diaForcedSources) > 1:
415 diaForcedSources = diaForcedSources.append(
416 loaderResult.diaForcedSources,
417 sort=True)
418 if self.testDataFrameIndex(diaForcedSources):
419 self.log.warn(
420 "Duplicate DiaForcedSources created after merge with "
421 "history and new sources. This may cause downstream "
422 "problems. Dropping duplicates.")
423 # Drop duplicates via index and keep the first appearance.
424 # Reset due to the index shape being slight different than
425 # expected.
426 diaForcedSources = diaForcedSources.groupby(
427 diaForcedSources.index).first()
428 diaForcedSources.reset_index(drop=True, inplace=True)
429 diaForcedSources.set_index(
430 ["diaObjectId", "diaForcedSourceId"],
431 drop=False,
432 inplace=True)
433 self.alertPackager.run(diaSources,
434 diaCalResult.diaObjectCat,
435 loaderResult.diaSources,
436 diaForcedSources,
437 diffIm,
438 warpedExposure,
439 ccdExposureIdBits)
441 return pipeBase.Struct(apdbMarker=self.config.apdb.value,
442 associatedDiaSources=diaSources)
444 def createNewDiaObjects(self, diaSources):
445 """Loop through the set of DiaSources and create new DiaObjects
446 for unassociated DiaSources.
448 Parameters
449 ----------
450 diaSources : `pandas.DataFrame`
451 Set of DiaSources to create new DiaObjects from.
453 Returns
454 -------
455 results : `lsst.pipe.base.Struct`
456 Results struct containing:
458 - ``diaSources`` : DiaSource catalog with updated DiaObject ids.
459 (`pandas.DataFrame`)
460 - ``newDiaObjects`` : Newly created DiaObjects from the
461 unassociated DiaSources. (`pandas.DataFrame`)
462 """
463 newDiaObjectsList = []
464 for idx, diaSource in diaSources.iterrows():
465 if diaSource["diaObjectId"] == 0:
466 newDiaObjectsList.append(
467 self._initialize_dia_object(diaSource["diaSourceId"]))
468 diaSources.loc[idx, "diaObjectId"] = diaSource["diaSourceId"]
469 if len(newDiaObjectsList) > 0:
470 newDiaObjects = pd.DataFrame(data=newDiaObjectsList)
471 else:
472 tmpObj = self._initialize_dia_object(0)
473 newDiaObjects = pd.DataFrame(data=newDiaObjectsList,
474 columns=tmpObj.keys())
475 return pipeBase.Struct(diaSources=diaSources,
476 newDiaObjects=pd.DataFrame(data=newDiaObjects))
478 def _initialize_dia_object(self, objId):
479 """Create a new DiaObject with values required to be initialized by the
480 Ppdb.
482 Parameters
483 ----------
484 objid : `int`
485 ``diaObjectId`` value for the of the new DiaObject.
487 Returns
488 -------
489 diaObject : `dict`
490 Newly created DiaObject with keys:
492 ``diaObjectId``
493 Unique DiaObjectId (`int`).
494 ``pmParallaxNdata``
495 Number of data points used for parallax calculation (`int`).
496 ``nearbyObj1``
497 Id of the a nearbyObject in the Object table (`int`).
498 ``nearbyObj2``
499 Id of the a nearbyObject in the Object table (`int`).
500 ``nearbyObj3``
501 Id of the a nearbyObject in the Object table (`int`).
502 ``?PSFluxData``
503 Number of data points used to calculate point source flux
504 summary statistics in each bandpass (`int`).
505 """
506 new_dia_object = {"diaObjectId": objId,
507 "pmParallaxNdata": 0,
508 "nearbyObj1": 0,
509 "nearbyObj2": 0,
510 "nearbyObj3": 0,
511 "flags": 0}
512 for f in ["u", "g", "r", "i", "z", "y"]:
513 new_dia_object["%sPSFluxNdata" % f] = 0
514 return new_dia_object
516 def testDataFrameIndex(self, df):
517 """Test the sorted DataFrame index for duplicates.
519 Wrapped as a separate function to allow for mocking of the this task
520 in unittesting. Default of a mock return for this test is True.
522 Parameters
523 ----------
524 df : `pandas.DataFrame`
525 DataFrame to text.
527 Returns
528 -------
529 `bool`
530 True if DataFrame contains duplicate rows.
531 """
532 return df.index.has_duplicates
534 def _add_association_meta_data(self,
535 nUpdatedDiaObjects,
536 nUnassociatedDiaObjects,
537 nNewDiaObjects):
538 """Store summaries of the association step in the task metadata.
540 Parameters
541 ----------
542 nUpdatedDiaObjects : `int`
543 Number of previous DiaObjects associated and updated in this
544 ccdVisit.
545 nUnassociatedDiaObjects : `int`
546 Number of previous DiaObjects that were not associated or updated
547 in this ccdVisit.
548 nNewDiaObjects : `int`
549 Number of newly created DiaObjects for this ccdVisit.
550 """
551 self.metadata.add('numUpdatedDiaObjects', nUpdatedDiaObjects)
552 self.metadata.add('numUnassociatedDiaObjects', nUnassociatedDiaObjects)
553 self.metadata.add('numNewDiaObjects', nNewDiaObjects)
556class DiaPipelineSolarSystemConnections(DiaPipelineConnections):
557 ssObjects = connTypes.Input(
558 doc="Solar System Objects observable in this visit.",
559 name="visitSsObjects",
560 storageClass="DataFrame",
561 dimensions=("instrument", "visit"),
562 )
563 ssObjectAssocDiaSources = connTypes.Output(
564 doc="DiaSources associated with existing Solar System objects..",
565 name="{fakesType}{coaddName}Diff_ssObjectAssocDiaSrc",
566 storageClass="DataFrame",
567 dimensions=("instrument", "visit", "detector"),
568 )
571class DiaPipelineSolarySystemConfig(DiaPipelineConfig,
572 pipelineConnections=DiaPipelineSolarSystemConnections):
573 solarSystemAssociation = pexConfig.ConfigurableField(
574 target=SolarSystemAssociationTask,
575 doc="Task used to associate DiaSources with Solar System Objects.",
576 )
579class DiaPipelineSolarSystemTask(DiaPipelineTask):
580 """Task for loading and storing Difference Image Analysis
581 (DIA) Sources after associating them to previous DiaObjects and
582 SSObjects.
584 SSO behavior currently necessitates a separate pipelinetask, however, after
585 DM-31389 is merged this SSO specific DiaPipe will merge into the default
586 class.
587 """
588 ConfigClass = DiaPipelineSolarySystemConfig
589 _DefaultName = "diaPipeSSO"
590 RunnerClass = pipeBase.ButlerInitializedTaskRunner
592 def __init__(self, initInputs=None, **kwargs):
593 super().__init__(**kwargs)
594 self.makeSubtask("solarSystemAssociation")
596 @pipeBase.timeMethod
597 def run(self,
598 diaSourceTable,
599 ssObjects,
600 diffIm,
601 exposure,
602 warpedExposure,
603 ccdExposureIdBits,
604 band):
605 """Process DiaSources and DiaObjects.
607 Load previous DiaObjects and their DiaSource history. Calibrate the
608 values in the ``diaSourceTable``. Associate new DiaSources with previous
609 DiaObjects. Run forced photometry at the updated DiaObject locations.
610 Store the results in the Alert Production Database (Apdb).
612 Parameters
613 ----------
614 diaSourceTable : `pandas.DataFrame`
615 Newly detected DiaSources.
616 diffIm : `lsst.afw.image.ExposureF`
617 Difference image exposure in which the sources in
618 ``diaSourceTable`` were detected.
619 exposure : `lsst.afw.image.ExposureF`
620 Calibrated exposure differenced with a template to create
621 ``diffIm``.
622 warpedExposure : `lsst.afw.image.ExposureF`
623 Template exposure used to create diffIm.
624 ccdExposureIdBits : `int`
625 Number of bits used for a unique ``ccdVisitId``.
626 band : `str`
627 The band in which the new DiaSources were detected.
629 Returns
630 -------
631 results : `lsst.pipe.base.Struct`
632 Results struct with components.
634 - ``apdbMaker`` : Marker dataset to store in the Butler indicating
635 that this ccdVisit has completed successfully.
636 (`lsst.dax.apdb.ApdbConfig`)
637 - ``associatedDiaSources`` : Full set of DiaSources associated
638 to current and new DiaObjects. This is an optional Butler output.
639 (`pandas.DataFrame`)
640 - ``ssObjectAssocDiaSources`` : Set of DiaSources associated with
641 solar system objects. (`pandas.DataFrame`)
642 """
643 # Load the DiaObjects and DiaSource history.
644 loaderResult = self.diaCatalogLoader.run(diffIm, self.apdb)
646 # Associate new DiaSources with existing DiaObjects and update
647 # DiaObject summary statistics using the full DiaSource history.
648 assocResults = self.associator.run(diaSourceTable,
649 loaderResult.diaObjects,
650 loaderResult.diaSources)
651 ssObjectAssocResults = self.solarSystemAssociation.run(
652 diaSourceTable.reset_index(drop=True),
653 ssObjects)
655 mergedDiaSourceHistory = loaderResult.diaSources.append(
656 assocResults.diaSources,
657 sort=True)
658 # Test for DiaSource duplication first. If duplicates are found,
659 # this likely means this is duplicate data being processed and sent
660 # to the Apdb.
661 if self.testDataFrameIndex(mergedDiaSourceHistory):
662 raise RuntimeError(
663 "Duplicate DiaSources found after association and merging "
664 "with history. This is likely due to re-running data with an "
665 "already populated Apdb. If this was not the case then there "
666 "was an unexpected failure in Association while matching "
667 "sources to objects, and should be reported. Exiting.")
669 diaCalResult = self.diaCalculation.run(
670 assocResults.diaObjects,
671 mergedDiaSourceHistory,
672 assocResults.matchedDiaObjectIds,
673 [band])
674 if self.testDataFrameIndex(diaCalResult.diaObjectCat):
675 raise RuntimeError(
676 "Duplicate DiaObjects (loaded + updated) created after "
677 "DiaCalculation. This is unexpected behavior and should be "
678 "reported. Existing.")
679 if self.testDataFrameIndex(diaCalResult.updatedDiaObjects):
680 raise RuntimeError(
681 "Duplicate DiaObjects (updated) created after "
682 "DiaCalculation. This is unexpected behavior and should be "
683 "reported. Existing.")
685 # Force photometer on the Difference and Calibrated exposures using
686 # the new and updated DiaObject locations.
687 diaForcedSources = self.diaForcedSource.run(
688 diaCalResult.diaObjectCat,
689 diaCalResult.updatedDiaObjects.loc[:, "diaObjectId"].to_numpy(),
690 ccdExposureIdBits,
691 exposure,
692 diffIm)
694 # Store DiaSources and updated DiaObjects in the Apdb.
695 self.apdb.storeDiaSources(assocResults.diaSources)
696 self.apdb.storeDiaObjects(
697 diaCalResult.updatedDiaObjects,
698 exposure.visitInfo.date.toPython())
699 self.apdb.storeDiaForcedSources(diaForcedSources)
701 if self.config.doPackageAlerts:
702 if len(loaderResult.diaForcedSources) > 1:
703 diaForcedSources = diaForcedSources.append(
704 loaderResult.diaForcedSources,
705 sort=True)
706 if self.testDataFrameIndex(diaForcedSources):
707 self.log.warn(
708 "Duplicate DiaForcedSources created after merge with "
709 "history and new sources. This may cause downstream "
710 "problems. Dropping duplicates.")
711 # Drop duplicates via index and keep the first appearance.
712 # Reset due to the index shape being slight different than
713 # expected.
714 diaForcedSources = diaForcedSources.groupby(
715 diaForcedSources.index).first()
716 diaForcedSources.reset_index(drop=True, inplace=True)
717 diaForcedSources.set_index(
718 ["diaObjectId", "diaForcedSourceId"],
719 drop=False,
720 inplace=True)
721 self.alertPackager.run(assocResults.diaSources,
722 diaCalResult.diaObjectCat,
723 loaderResult.diaSources,
724 diaForcedSources,
725 diffIm,
726 warpedExposure,
727 ccdExposureIdBits)
729 return pipeBase.Struct(
730 apdbMarker=self.config.apdb.value,
731 associatedDiaSources=assocResults.diaSources,
732 ssObjectAssocDiaSources=ssObjectAssocResults.ssoAssocDiaSources)