Coverage for python/lsst/ap/association/diaPipe.py: 29%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#
2# LSST Data Management System
3# Copyright 2008-2016 AURA/LSST.
4#
5# This product includes software developed by the
6# LSST Project (http://www.lsst.org/).
7#
8# This program is free software: you can redistribute it and/or modify
9# it under the terms of the GNU General Public License as published by
10# the Free Software Foundation, either version 3 of the License, or
11# (at your option) any later version.
12#
13# This program is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the LSST License Statement and
19# the GNU General Public License along with this program. If not,
20# see <https://www.lsstcorp.org/LegalNotices/>.
21#
23"""PipelineTask for associating DiaSources with previous DiaObjects.
25Additionally performs forced photometry on the calibrated and difference
26images at the updated locations of DiaObjects.
28Currently loads directly from the Apdb rather than pre-loading.
29"""
31import os
32import pandas as pd
34import lsst.dax.apdb as daxApdb
35from lsst.meas.base import DiaObjectCalculationTask
36import lsst.pex.config as pexConfig
37import lsst.pipe.base as pipeBase
38import lsst.pipe.base.connectionTypes as connTypes
39from lsst.utils.timer import timeMethod
41from lsst.ap.association import (
42 AssociationTask,
43 DiaForcedSourceTask,
44 LoadDiaCatalogsTask,
45 PackageAlertsTask)
46from lsst.ap.association.ssoAssociation import SolarSystemAssociationTask
48__all__ = ("DiaPipelineConfig",
49 "DiaPipelineTask",
50 "DiaPipelineConnections")
53class DiaPipelineConnections(
54 pipeBase.PipelineTaskConnections,
55 dimensions=("instrument", "visit", "detector"),
56 defaultTemplates={"coaddName": "deep", "fakesType": ""}):
57 """Butler connections for DiaPipelineTask.
58 """
59 diaSourceTable = connTypes.Input(
60 doc="Catalog of calibrated DiaSources.",
61 name="{fakesType}{coaddName}Diff_diaSrcTable",
62 storageClass="DataFrame",
63 dimensions=("instrument", "visit", "detector"),
64 )
65 solarSystemObjectTable = connTypes.Input(
66 doc="Catalog of SolarSolarSystem objects expected to be observable in "
67 "this detectorVisit.",
68 name="visitSsObjects",
69 storageClass="DataFrame",
70 dimensions=("instrument", "visit"),
71 )
72 diffIm = connTypes.Input(
73 doc="Difference image on which the DiaSources were detected.",
74 name="{fakesType}{coaddName}Diff_differenceExp",
75 storageClass="ExposureF",
76 dimensions=("instrument", "visit", "detector"),
77 )
78 exposure = connTypes.Input(
79 doc="Calibrated exposure differenced with a template image during "
80 "image differencing.",
81 name="calexp",
82 storageClass="ExposureF",
83 dimensions=("instrument", "visit", "detector"),
84 )
85 warpedExposure = connTypes.Input(
86 doc="Warped template used to create `subtractedExposure`. Not PSF "
87 "matched.",
88 dimensions=("instrument", "visit", "detector"),
89 storageClass="ExposureF",
90 name="{fakesType}{coaddName}Diff_warpedExp",
91 )
92 apdbMarker = connTypes.Output(
93 doc="Marker dataset storing the configuration of the Apdb for each "
94 "visit/detector. Used to signal the completion of the pipeline.",
95 name="apdb_marker",
96 storageClass="Config",
97 dimensions=("instrument", "visit", "detector"),
98 )
99 associatedDiaSources = connTypes.Output(
100 doc="Optional output storing the DiaSource catalog after matching, "
101 "calibration, and standardization for insertation into the Apdb.",
102 name="{fakesType}{coaddName}Diff_assocDiaSrc",
103 storageClass="DataFrame",
104 dimensions=("instrument", "visit", "detector"),
105 )
107 def __init__(self, *, config=None):
108 super().__init__(config=config)
110 if not config.doWriteAssociatedSources:
111 self.outputs.remove("associatedDiaSources")
112 if not config.doSolarSystemAssociation:
113 self.inputs.remove("solarSystemObjectTable")
115 def adjustQuantum(self, inputs, outputs, label, dataId):
116 """Override to make adjustments to `lsst.daf.butler.DatasetRef` objects
117 in the `lsst.daf.butler.core.Quantum` during the graph generation stage
118 of the activator.
120 This implementation checks to make sure that the filters in the dataset
121 are compatible with AP processing as set by the Apdb/DPDD schema.
123 Parameters
124 ----------
125 inputs : `dict`
126 Dictionary whose keys are an input (regular or prerequisite)
127 connection name and whose values are a tuple of the connection
128 instance and a collection of associated `DatasetRef` objects.
129 The exact type of the nested collections is unspecified; it can be
130 assumed to be multi-pass iterable and support `len` and ``in``, but
131 it should not be mutated in place. In contrast, the outer
132 dictionaries are guaranteed to be temporary copies that are true
133 `dict` instances, and hence may be modified and even returned; this
134 is especially useful for delegating to `super` (see notes below).
135 outputs : `dict`
136 Dict of output datasets, with the same structure as ``inputs``.
137 label : `str`
138 Label for this task in the pipeline (should be used in all
139 diagnostic messages).
140 data_id : `lsst.daf.butler.DataCoordinate`
141 Data ID for this quantum in the pipeline (should be used in all
142 diagnostic messages).
144 Returns
145 -------
146 adjusted_inputs : `dict`
147 Dict of the same form as ``inputs`` with updated containers of
148 input `DatasetRef` objects. Connections that are not changed
149 should not be returned at all. Datasets may only be removed, not
150 added. Nested collections may be of any multi-pass iterable type,
151 and the order of iteration will set the order of iteration within
152 `PipelineTask.runQuantum`.
153 adjusted_outputs : `dict`
154 Dict of updated output datasets, with the same structure and
155 interpretation as ``adjusted_inputs``.
157 Raises
158 ------
159 ScalarError
160 Raised if any `Input` or `PrerequisiteInput` connection has
161 ``multiple`` set to `False`, but multiple datasets.
162 NoWorkFound
163 Raised to indicate that this quantum should not be run; not enough
164 datasets were found for a regular `Input` connection, and the
165 quantum should be pruned or skipped.
166 FileNotFoundError
167 Raised to cause QuantumGraph generation to fail (with the message
168 included in this exception); not enough datasets were found for a
169 `PrerequisiteInput` connection.
170 """
171 _, refs = inputs["diffIm"]
172 for ref in refs:
173 if ref.dataId["band"] not in self.config.validBands:
174 raise ValueError(
175 f"Requested '{ref.dataId['band']}' not in "
176 "DiaPipelineConfig.validBands. To process bands not in "
177 "the standard Rubin set (ugrizy) you must add the band to "
178 "the validBands list in DiaPipelineConfig and add the "
179 "appropriate columns to the Apdb schema.")
180 return super().adjustQuantum(inputs, outputs, label, dataId)
183class DiaPipelineConfig(pipeBase.PipelineTaskConfig,
184 pipelineConnections=DiaPipelineConnections):
185 """Config for DiaPipelineTask.
186 """
187 coaddName = pexConfig.Field(
188 doc="coadd name: typically one of deep, goodSeeing, or dcr",
189 dtype=str,
190 default="deep",
191 )
192 apdb = daxApdb.ApdbSql.makeField(
193 doc="Database connection for storing associated DiaSources and "
194 "DiaObjects. Must already be initialized.",
195 )
196 validBands = pexConfig.ListField(
197 dtype=str,
198 default=["u", "g", "r", "i", "z", "y"],
199 doc="List of bands that are valid for AP processing. To process a "
200 "band not on this list, the appropriate band specific columns "
201 "must be added to the Apdb schema in dax_apdb.",
202 )
203 diaCatalogLoader = pexConfig.ConfigurableField(
204 target=LoadDiaCatalogsTask,
205 doc="Task to load DiaObjects and DiaSources from the Apdb.",
206 )
207 associator = pexConfig.ConfigurableField(
208 target=AssociationTask,
209 doc="Task used to associate DiaSources with DiaObjects.",
210 )
211 doSolarSystemAssociation = pexConfig.Field(
212 dtype=bool,
213 default=False,
214 doc="Process SolarSystem objects through the pipeline.",
215 )
216 solarSystemAssociator = pexConfig.ConfigurableField(
217 target=SolarSystemAssociationTask,
218 doc="Task used to associate DiaSources with SolarSystemObjects.",
219 )
220 diaCalculation = pexConfig.ConfigurableField(
221 target=DiaObjectCalculationTask,
222 doc="Task to compute summary statistics for DiaObjects.",
223 )
224 diaForcedSource = pexConfig.ConfigurableField(
225 target=DiaForcedSourceTask,
226 doc="Task used for force photometer DiaObject locations in direct and "
227 "difference images.",
228 )
229 alertPackager = pexConfig.ConfigurableField(
230 target=PackageAlertsTask,
231 doc="Subtask for packaging Ap data into alerts.",
232 )
233 doPackageAlerts = pexConfig.Field(
234 dtype=bool,
235 default=False,
236 doc="Package Dia-data into serialized alerts for distribution and "
237 "write them to disk.",
238 )
239 doWriteAssociatedSources = pexConfig.Field(
240 dtype=bool,
241 default=False,
242 doc="Write out associated and SDMed DiaSources.",
243 )
245 def setDefaults(self):
246 self.apdb.dia_object_index = "baseline"
247 self.apdb.dia_object_columns = []
248 self.apdb.extra_schema_file = os.path.join(
249 "${AP_ASSOCIATION_DIR}",
250 "data",
251 "apdb-ap-pipe-schema-extra.yaml")
252 self.diaCalculation.plugins = ["ap_meanPosition",
253 "ap_nDiaSources",
254 "ap_diaObjectFlag",
255 "ap_meanFlux",
256 "ap_percentileFlux",
257 "ap_sigmaFlux",
258 "ap_chi2Flux",
259 "ap_madFlux",
260 "ap_skewFlux",
261 "ap_minMaxFlux",
262 "ap_maxSlopeFlux",
263 "ap_meanErrFlux",
264 "ap_linearFit",
265 "ap_stetsonJ",
266 "ap_meanTotFlux",
267 "ap_sigmaTotFlux"]
270class DiaPipelineTask(pipeBase.PipelineTask):
271 """Task for loading, associating and storing Difference Image Analysis
272 (DIA) Objects and Sources.
273 """
274 ConfigClass = DiaPipelineConfig
275 _DefaultName = "diaPipe"
276 RunnerClass = pipeBase.ButlerInitializedTaskRunner
278 def __init__(self, initInputs=None, **kwargs):
279 super().__init__(**kwargs)
280 self.apdb = self.config.apdb.apply()
281 self.makeSubtask("diaCatalogLoader")
282 self.makeSubtask("associator")
283 self.makeSubtask("diaCalculation")
284 self.makeSubtask("diaForcedSource")
285 if self.config.doPackageAlerts:
286 self.makeSubtask("alertPackager")
287 if self.config.doSolarSystemAssociation:
288 self.makeSubtask("solarSystemAssociator")
290 def runQuantum(self, butlerQC, inputRefs, outputRefs):
291 inputs = butlerQC.get(inputRefs)
292 expId, expBits = butlerQC.quantum.dataId.pack("visit_detector",
293 returnMaxBits=True)
294 inputs["ccdExposureIdBits"] = expBits
295 inputs["band"] = butlerQC.quantum.dataId["band"]
296 if not self.config.doSolarSystemAssociation:
297 inputs["solarSystemObjectTable"] = None
299 outputs = self.run(**inputs)
301 butlerQC.put(outputs, outputRefs)
303 @timeMethod
304 def run(self,
305 diaSourceTable,
306 solarSystemObjectTable,
307 diffIm,
308 exposure,
309 warpedExposure,
310 ccdExposureIdBits,
311 band):
312 """Process DiaSources and DiaObjects.
314 Load previous DiaObjects and their DiaSource history. Calibrate the
315 values in the diaSourceCat. Associate new DiaSources with previous
316 DiaObjects. Run forced photometry at the updated DiaObject locations.
317 Store the results in the Alert Production Database (Apdb).
319 Parameters
320 ----------
321 diaSourceTable : `pandas.DataFrame`
322 Newly detected DiaSources.
323 diffIm : `lsst.afw.image.ExposureF`
324 Difference image exposure in which the sources in ``diaSourceCat``
325 were detected.
326 exposure : `lsst.afw.image.ExposureF`
327 Calibrated exposure differenced with a template to create
328 ``diffIm``.
329 warpedExposure : `lsst.afw.image.ExposureF`
330 Template exposure used to create diffIm.
331 ccdExposureIdBits : `int`
332 Number of bits used for a unique ``ccdVisitId``.
333 band : `str`
334 The band in which the new DiaSources were detected.
336 Returns
337 -------
338 results : `lsst.pipe.base.Struct`
339 Results struct with components.
341 - ``apdbMaker`` : Marker dataset to store in the Butler indicating
342 that this ccdVisit has completed successfully.
343 (`lsst.dax.apdb.ApdbConfig`)
344 - ``associatedDiaSources`` : Catalog of newly associated
345 DiaSources. (`pandas.DataFrame`)
346 """
347 # Load the DiaObjects and DiaSource history.
348 loaderResult = self.diaCatalogLoader.run(diffIm, self.apdb)
350 # Associate new DiaSources with existing DiaObjects.
351 assocResults = self.associator.run(diaSourceTable,
352 loaderResult.diaObjects)
353 if self.config.doSolarSystemAssociation:
354 ssoAssocResult = self.solarSystemAssociator.run(
355 assocResults.unAssocDiaSources,
356 solarSystemObjectTable,
357 diffIm)
358 createResults = self.createNewDiaObjects(
359 ssoAssocResult.unAssocDiaSources)
360 associatedDiaSources = pd.concat(
361 [assocResults.matchedDiaSources,
362 ssoAssocResult.ssoAssocDiaSources,
363 createResults.diaSources])
364 nTotalSsObjects = ssoAssocResult.nTotalSsObjects
365 nAssociatedSsObjects = ssoAssocResult.nAssociatedSsObjects
366 else:
367 createResults = self.createNewDiaObjects(
368 assocResults.unAssocDiaSources)
369 associatedDiaSources = pd.concat(
370 [assocResults.matchedDiaSources,
371 createResults.diaSources])
372 nTotalSsObjects = 0
373 nAssociatedSsObjects = 0
375 # Create new DiaObjects from unassociated diaSources.
376 self._add_association_meta_data(assocResults.nUpdatedDiaObjects,
377 assocResults.nUnassociatedDiaObjects,
378 createResults.nNewDiaObjects,
379 nTotalSsObjects,
380 nAssociatedSsObjects)
381 # Index the DiaSource catalog for this visit after all associations
382 # have been made.
383 updatedDiaObjectIds = associatedDiaSources["diaObjectId"][
384 associatedDiaSources["diaObjectId"] != 0].to_numpy()
385 associatedDiaSources.set_index(["diaObjectId",
386 "filterName",
387 "diaSourceId"],
388 drop=False,
389 inplace=True)
391 # Append new DiaObjects and DiaSources to their previous history.
392 diaObjects = loaderResult.diaObjects.append(
393 createResults.newDiaObjects.set_index("diaObjectId", drop=False),
394 sort=True)
395 if self.testDataFrameIndex(diaObjects):
396 raise RuntimeError(
397 "Duplicate DiaObjects created after association. This is "
398 "likely due to re-running data with an already populated "
399 "Apdb. If this was not the case then there was an unexpected "
400 "failure in Association while matching and creating new "
401 "DiaObjects and should be reported. Exiting.")
402 mergedDiaSourceHistory = loaderResult.diaSources.append(
403 associatedDiaSources,
404 sort=True)
405 # Test for DiaSource duplication first. If duplicates are found,
406 # this likely means this is duplicate data being processed and sent
407 # to the Apdb.
408 if self.testDataFrameIndex(mergedDiaSourceHistory):
409 raise RuntimeError(
410 "Duplicate DiaSources found after association and merging "
411 "with history. This is likely due to re-running data with an "
412 "already populated Apdb. If this was not the case then there "
413 "was an unexpected failure in Association while matching "
414 "sources to objects, and should be reported. Exiting.")
416 # Compute DiaObject Summary statistics from their full DiaSource
417 # history.
418 diaCalResult = self.diaCalculation.run(
419 diaObjects,
420 mergedDiaSourceHistory,
421 updatedDiaObjectIds,
422 [band])
423 # Test for duplication in the updated DiaObjects.
424 if self.testDataFrameIndex(diaCalResult.diaObjectCat):
425 raise RuntimeError(
426 "Duplicate DiaObjects (loaded + updated) created after "
427 "DiaCalculation. This is unexpected behavior and should be "
428 "reported. Existing.")
429 if self.testDataFrameIndex(diaCalResult.updatedDiaObjects):
430 raise RuntimeError(
431 "Duplicate DiaObjects (updated) created after "
432 "DiaCalculation. This is unexpected behavior and should be "
433 "reported. Existing.")
435 # Force photometer on the Difference and Calibrated exposures using
436 # the new and updated DiaObject locations.
437 diaForcedSources = self.diaForcedSource.run(
438 diaCalResult.diaObjectCat,
439 diaCalResult.updatedDiaObjects.loc[:, "diaObjectId"].to_numpy(),
440 ccdExposureIdBits,
441 exposure,
442 diffIm)
444 # Store DiaSources, updated DiaObjects, and DiaForcedSources in the
445 # Apdb.
446 self.apdb.store(
447 exposure.getInfo().getVisitInfo().getDate(),
448 diaCalResult.updatedDiaObjects,
449 associatedDiaSources,
450 diaForcedSources)
452 if self.config.doPackageAlerts:
453 if len(loaderResult.diaForcedSources) > 1:
454 diaForcedSources = diaForcedSources.append(
455 loaderResult.diaForcedSources,
456 sort=True)
457 if self.testDataFrameIndex(diaForcedSources):
458 self.log.warn(
459 "Duplicate DiaForcedSources created after merge with "
460 "history and new sources. This may cause downstream "
461 "problems. Dropping duplicates.")
462 # Drop duplicates via index and keep the first appearance.
463 # Reset due to the index shape being slight different than
464 # expected.
465 diaForcedSources = diaForcedSources.groupby(
466 diaForcedSources.index).first()
467 diaForcedSources.reset_index(drop=True, inplace=True)
468 diaForcedSources.set_index(
469 ["diaObjectId", "diaForcedSourceId"],
470 drop=False,
471 inplace=True)
472 self.alertPackager.run(associatedDiaSources,
473 diaCalResult.diaObjectCat,
474 loaderResult.diaSources,
475 diaForcedSources,
476 diffIm,
477 warpedExposure,
478 ccdExposureIdBits)
480 return pipeBase.Struct(apdbMarker=self.config.apdb.value,
481 associatedDiaSources=associatedDiaSources,)
483 def createNewDiaObjects(self, unAssocDiaSources):
484 """Loop through the set of DiaSources and create new DiaObjects
485 for unassociated DiaSources.
487 Parameters
488 ----------
489 unAssocDiaSources : `pandas.DataFrame`
490 Set of DiaSources to create new DiaObjects from.
492 Returns
493 -------
494 results : `lsst.pipe.base.Struct`
495 Results struct containing:
497 - ``diaSources`` : DiaSource catalog with updated DiaObject ids.
498 (`pandas.DataFrame`)
499 - ``newDiaObjects`` : Newly created DiaObjects from the
500 unassociated DiaSources. (`pandas.DataFrame`)
501 - ``nNewDiaObjects`` : Number of newly created diaObjects.(`int`)
502 """
503 if len(unAssocDiaSources) == 0:
504 tmpObj = self._initialize_dia_object(0)
505 newDiaObjects = pd.DataFrame(data=[],
506 columns=tmpObj.keys())
507 else:
508 newDiaObjects = unAssocDiaSources["diaSourceId"].apply(
509 self._initialize_dia_object)
510 unAssocDiaSources["diaObjectId"] = unAssocDiaSources["diaSourceId"]
511 return pipeBase.Struct(diaSources=unAssocDiaSources,
512 newDiaObjects=newDiaObjects,
513 nNewDiaObjects=len(newDiaObjects))
515 def _initialize_dia_object(self, objId):
516 """Create a new DiaObject with values required to be initialized by the
517 Ppdb.
519 Parameters
520 ----------
521 objid : `int`
522 ``diaObjectId`` value for the of the new DiaObject.
524 Returns
525 -------
526 diaObject : `dict`
527 Newly created DiaObject with keys:
529 ``diaObjectId``
530 Unique DiaObjectId (`int`).
531 ``pmParallaxNdata``
532 Number of data points used for parallax calculation (`int`).
533 ``nearbyObj1``
534 Id of the a nearbyObject in the Object table (`int`).
535 ``nearbyObj2``
536 Id of the a nearbyObject in the Object table (`int`).
537 ``nearbyObj3``
538 Id of the a nearbyObject in the Object table (`int`).
539 ``?PSFluxData``
540 Number of data points used to calculate point source flux
541 summary statistics in each bandpass (`int`).
542 """
543 new_dia_object = {"diaObjectId": objId,
544 "pmParallaxNdata": 0,
545 "nearbyObj1": 0,
546 "nearbyObj2": 0,
547 "nearbyObj3": 0,
548 "flags": 0}
549 for f in ["u", "g", "r", "i", "z", "y"]:
550 new_dia_object["%sPSFluxNdata" % f] = 0
551 return pd.Series(data=new_dia_object)
553 def testDataFrameIndex(self, df):
554 """Test the sorted DataFrame index for duplicates.
556 Wrapped as a separate function to allow for mocking of the this task
557 in unittesting. Default of a mock return for this test is True.
559 Parameters
560 ----------
561 df : `pandas.DataFrame`
562 DataFrame to text.
564 Returns
565 -------
566 `bool`
567 True if DataFrame contains duplicate rows.
568 """
569 return df.index.has_duplicates
571 def _add_association_meta_data(self,
572 nUpdatedDiaObjects,
573 nUnassociatedDiaObjects,
574 nNewDiaObjects,
575 nTotalSsObjects,
576 nAssociatedSsObjects):
577 """Store summaries of the association step in the task metadata.
579 Parameters
580 ----------
581 nUpdatedDiaObjects : `int`
582 Number of previous DiaObjects associated and updated in this
583 ccdVisit.
584 nUnassociatedDiaObjects : `int`
585 Number of previous DiaObjects that were not associated or updated
586 in this ccdVisit.
587 nNewDiaObjects : `int`
588 Number of newly created DiaObjects for this ccdVisit.
589 nTotalSsObjects : `int`
590 Number of SolarSystemObjects within the observable detector
591 area.
592 nAssociatedSsObjects : `int`
593 Number of successfully associated SolarSystemObjects.
594 """
595 self.metadata.add('numUpdatedDiaObjects', nUpdatedDiaObjects)
596 self.metadata.add('numUnassociatedDiaObjects', nUnassociatedDiaObjects)
597 self.metadata.add('numNewDiaObjects', nNewDiaObjects)
598 self.metadata.add('numTotalSolarSystemObjects', nTotalSsObjects)
599 self.metadata.add('numAssociatedSsObjects', nAssociatedSsObjects)