Coverage for python/lsst/ap/association/diaPipe.py : 28%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#
2# LSST Data Management System
3# Copyright 2008-2016 AURA/LSST.
4#
5# This product includes software developed by the
6# LSST Project (http://www.lsst.org/).
7#
8# This program is free software: you can redistribute it and/or modify
9# it under the terms of the GNU General Public License as published by
10# the Free Software Foundation, either version 3 of the License, or
11# (at your option) any later version.
12#
13# This program is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the LSST License Statement and
19# the GNU General Public License along with this program. If not,
20# see <https://www.lsstcorp.org/LegalNotices/>.
21#
23"""PipelineTask for associating DiaSources with previous DiaObjects.
25Additionally performs forced photometry on the calibrated and difference
26images at the updated locations of DiaObjects.
28Currently loads directly from the Apdb rather than pre-loading.
29"""
31import os
32import pandas as pd
34import lsst.dax.apdb as daxApdb
35from lsst.meas.base import DiaObjectCalculationTask
36import lsst.pex.config as pexConfig
37import lsst.pipe.base as pipeBase
38import lsst.pipe.base.connectionTypes as connTypes
40from lsst.ap.association import (
41 AssociationTask,
42 DiaForcedSourceTask,
43 LoadDiaCatalogsTask,
44 PackageAlertsTask)
45from lsst.ap.association.ssoAssociation import SolarSystemAssociationTask
47__all__ = ("DiaPipelineConfig",
48 "DiaPipelineTask",
49 "DiaPipelineConnections")
52class DiaPipelineConnections(
53 pipeBase.PipelineTaskConnections,
54 dimensions=("instrument", "visit", "detector"),
55 defaultTemplates={"coaddName": "deep", "fakesType": ""}):
56 """Butler connections for DiaPipelineTask.
57 """
58 diaSourceTable = connTypes.Input(
59 doc="Catalog of calibrated DiaSources.",
60 name="{fakesType}{coaddName}Diff_diaSrcTable",
61 storageClass="DataFrame",
62 dimensions=("instrument", "visit", "detector"),
63 )
64 solarSystemObjectTable = connTypes.Input(
65 doc="Catalog of SolarSolarSystem objects expected to be observable in "
66 "this detectorVisit.",
67 name="visitSsObjects",
68 storageClass="DataFrame",
69 dimensions=("instrument", "visit"),
70 )
71 diffIm = connTypes.Input(
72 doc="Difference image on which the DiaSources were detected.",
73 name="{fakesType}{coaddName}Diff_differenceExp",
74 storageClass="ExposureF",
75 dimensions=("instrument", "visit", "detector"),
76 )
77 exposure = connTypes.Input(
78 doc="Calibrated exposure differenced with a template image during "
79 "image differencing.",
80 name="calexp",
81 storageClass="ExposureF",
82 dimensions=("instrument", "visit", "detector"),
83 )
84 warpedExposure = connTypes.Input(
85 doc="Warped template used to create `subtractedExposure`. Not PSF "
86 "matched.",
87 dimensions=("instrument", "visit", "detector"),
88 storageClass="ExposureF",
89 name="{fakesType}{coaddName}Diff_warpedExp",
90 )
91 apdbMarker = connTypes.Output(
92 doc="Marker dataset storing the configuration of the Apdb for each "
93 "visit/detector. Used to signal the completion of the pipeline.",
94 name="apdb_marker",
95 storageClass="Config",
96 dimensions=("instrument", "visit", "detector"),
97 )
98 associatedDiaSources = connTypes.Output(
99 doc="Optional output storing the DiaSource catalog after matching, "
100 "calibration, and standardization for insertation into the Apdb.",
101 name="{fakesType}{coaddName}Diff_assocDiaSrc",
102 storageClass="DataFrame",
103 dimensions=("instrument", "visit", "detector"),
104 )
106 def __init__(self, *, config=None):
107 super().__init__(config=config)
109 if not config.doWriteAssociatedSources:
110 self.outputs.remove("associatedDiaSources")
111 if not config.doSolarSystemAssociation:
112 self.inputs.remove("solarSystemObjectTable")
114 def adjustQuantum(self, inputs, outputs, label, dataId):
115 """Override to make adjustments to `lsst.daf.butler.DatasetRef` objects
116 in the `lsst.daf.butler.core.Quantum` during the graph generation stage
117 of the activator.
119 This implementation checks to make sure that the filters in the dataset
120 are compatible with AP processing as set by the Apdb/DPDD schema.
122 Parameters
123 ----------
124 inputs : `dict`
125 Dictionary whose keys are an input (regular or prerequisite)
126 connection name and whose values are a tuple of the connection
127 instance and a collection of associated `DatasetRef` objects.
128 The exact type of the nested collections is unspecified; it can be
129 assumed to be multi-pass iterable and support `len` and ``in``, but
130 it should not be mutated in place. In contrast, the outer
131 dictionaries are guaranteed to be temporary copies that are true
132 `dict` instances, and hence may be modified and even returned; this
133 is especially useful for delegating to `super` (see notes below).
134 outputs : `dict`
135 Dict of output datasets, with the same structure as ``inputs``.
136 label : `str`
137 Label for this task in the pipeline (should be used in all
138 diagnostic messages).
139 data_id : `lsst.daf.butler.DataCoordinate`
140 Data ID for this quantum in the pipeline (should be used in all
141 diagnostic messages).
143 Returns
144 -------
145 adjusted_inputs : `dict`
146 Dict of the same form as ``inputs`` with updated containers of
147 input `DatasetRef` objects. Connections that are not changed
148 should not be returned at all. Datasets may only be removed, not
149 added. Nested collections may be of any multi-pass iterable type,
150 and the order of iteration will set the order of iteration within
151 `PipelineTask.runQuantum`.
152 adjusted_outputs : `dict`
153 Dict of updated output datasets, with the same structure and
154 interpretation as ``adjusted_inputs``.
156 Raises
157 ------
158 ScalarError
159 Raised if any `Input` or `PrerequisiteInput` connection has
160 ``multiple`` set to `False`, but multiple datasets.
161 NoWorkFound
162 Raised to indicate that this quantum should not be run; not enough
163 datasets were found for a regular `Input` connection, and the
164 quantum should be pruned or skipped.
165 FileNotFoundError
166 Raised to cause QuantumGraph generation to fail (with the message
167 included in this exception); not enough datasets were found for a
168 `PrerequisiteInput` connection.
169 """
170 _, refs = inputs["diffIm"]
171 for ref in refs:
172 if ref.dataId["band"] not in self.config.validBands:
173 raise ValueError(
174 f"Requested '{ref.dataId['band']}' not in "
175 "DiaPipelineConfig.validBands. To process bands not in "
176 "the standard Rubin set (ugrizy) you must add the band to "
177 "the validBands list in DiaPipelineConfig and add the "
178 "appropriate columns to the Apdb schema.")
179 return super().adjustQuantum(inputs, outputs, label, dataId)
182class DiaPipelineConfig(pipeBase.PipelineTaskConfig,
183 pipelineConnections=DiaPipelineConnections):
184 """Config for DiaPipelineTask.
185 """
186 coaddName = pexConfig.Field(
187 doc="coadd name: typically one of deep, goodSeeing, or dcr",
188 dtype=str,
189 default="deep",
190 )
191 apdb = daxApdb.ApdbSql.makeField(
192 doc="Database connection for storing associated DiaSources and "
193 "DiaObjects. Must already be initialized.",
194 )
195 validBands = pexConfig.ListField(
196 dtype=str,
197 default=["u", "g", "r", "i", "z", "y"],
198 doc="List of bands that are valid for AP processing. To process a "
199 "band not on this list, the appropriate band specific columns "
200 "must be added to the Apdb schema in dax_apdb.",
201 )
202 diaCatalogLoader = pexConfig.ConfigurableField(
203 target=LoadDiaCatalogsTask,
204 doc="Task to load DiaObjects and DiaSources from the Apdb.",
205 )
206 associator = pexConfig.ConfigurableField(
207 target=AssociationTask,
208 doc="Task used to associate DiaSources with DiaObjects.",
209 )
210 doSolarSystemAssociation = pexConfig.Field(
211 dtype=bool,
212 default=False,
213 doc="Process SolarSystem objects through the pipeline.",
214 )
215 solarSystemAssociator = pexConfig.ConfigurableField(
216 target=SolarSystemAssociationTask,
217 doc="Task used to associate DiaSources with SolarSystemObjects.",
218 )
219 diaCalculation = pexConfig.ConfigurableField(
220 target=DiaObjectCalculationTask,
221 doc="Task to compute summary statistics for DiaObjects.",
222 )
223 diaForcedSource = pexConfig.ConfigurableField(
224 target=DiaForcedSourceTask,
225 doc="Task used for force photometer DiaObject locations in direct and "
226 "difference images.",
227 )
228 alertPackager = pexConfig.ConfigurableField(
229 target=PackageAlertsTask,
230 doc="Subtask for packaging Ap data into alerts.",
231 )
232 doPackageAlerts = pexConfig.Field(
233 dtype=bool,
234 default=False,
235 doc="Package Dia-data into serialized alerts for distribution and "
236 "write them to disk.",
237 )
238 doWriteAssociatedSources = pexConfig.Field(
239 dtype=bool,
240 default=False,
241 doc="Write out associated and SDMed DiaSources.",
242 )
244 def setDefaults(self):
245 self.apdb.dia_object_index = "baseline"
246 self.apdb.dia_object_columns = []
247 self.apdb.extra_schema_file = os.path.join(
248 "${AP_ASSOCIATION_DIR}",
249 "data",
250 "apdb-ap-pipe-schema-extra.yaml")
251 self.diaCalculation.plugins = ["ap_meanPosition",
252 "ap_nDiaSources",
253 "ap_diaObjectFlag",
254 "ap_meanFlux",
255 "ap_percentileFlux",
256 "ap_sigmaFlux",
257 "ap_chi2Flux",
258 "ap_madFlux",
259 "ap_skewFlux",
260 "ap_minMaxFlux",
261 "ap_maxSlopeFlux",
262 "ap_meanErrFlux",
263 "ap_linearFit",
264 "ap_stetsonJ",
265 "ap_meanTotFlux",
266 "ap_sigmaTotFlux"]
269class DiaPipelineTask(pipeBase.PipelineTask):
270 """Task for loading, associating and storing Difference Image Analysis
271 (DIA) Objects and Sources.
272 """
273 ConfigClass = DiaPipelineConfig
274 _DefaultName = "diaPipe"
275 RunnerClass = pipeBase.ButlerInitializedTaskRunner
277 def __init__(self, initInputs=None, **kwargs):
278 super().__init__(**kwargs)
279 self.apdb = self.config.apdb.apply()
280 self.makeSubtask("diaCatalogLoader")
281 self.makeSubtask("associator")
282 self.makeSubtask("diaCalculation")
283 self.makeSubtask("diaForcedSource")
284 if self.config.doPackageAlerts:
285 self.makeSubtask("alertPackager")
286 if self.config.doSolarSystemAssociation:
287 self.makeSubtask("solarSystemAssociator")
289 def runQuantum(self, butlerQC, inputRefs, outputRefs):
290 inputs = butlerQC.get(inputRefs)
291 expId, expBits = butlerQC.quantum.dataId.pack("visit_detector",
292 returnMaxBits=True)
293 inputs["ccdExposureIdBits"] = expBits
294 inputs["band"] = butlerQC.quantum.dataId["band"]
295 if not self.config.doSolarSystemAssociation:
296 inputs["solarSystemObjectTable"] = None
298 outputs = self.run(**inputs)
300 butlerQC.put(outputs, outputRefs)
302 @pipeBase.timeMethod
303 def run(self,
304 diaSourceTable,
305 solarSystemObjectTable,
306 diffIm,
307 exposure,
308 warpedExposure,
309 ccdExposureIdBits,
310 band):
311 """Process DiaSources and DiaObjects.
313 Load previous DiaObjects and their DiaSource history. Calibrate the
314 values in the diaSourceCat. Associate new DiaSources with previous
315 DiaObjects. Run forced photometry at the updated DiaObject locations.
316 Store the results in the Alert Production Database (Apdb).
318 Parameters
319 ----------
320 diaSourceTable : `pandas.DataFrame`
321 Newly detected DiaSources.
322 diffIm : `lsst.afw.image.ExposureF`
323 Difference image exposure in which the sources in ``diaSourceCat``
324 were detected.
325 exposure : `lsst.afw.image.ExposureF`
326 Calibrated exposure differenced with a template to create
327 ``diffIm``.
328 warpedExposure : `lsst.afw.image.ExposureF`
329 Template exposure used to create diffIm.
330 ccdExposureIdBits : `int`
331 Number of bits used for a unique ``ccdVisitId``.
332 band : `str`
333 The band in which the new DiaSources were detected.
335 Returns
336 -------
337 results : `lsst.pipe.base.Struct`
338 Results struct with components.
340 - ``apdbMaker`` : Marker dataset to store in the Butler indicating
341 that this ccdVisit has completed successfully.
342 (`lsst.dax.apdb.ApdbConfig`)
343 - ``associatedDiaSources`` : Catalog of newly associated
344 DiaSources. (`pandas.DataFrame`)
345 """
346 # Load the DiaObjects and DiaSource history.
347 loaderResult = self.diaCatalogLoader.run(diffIm, self.apdb)
349 # Associate new DiaSources with existing DiaObjects.
350 assocResults = self.associator.run(diaSourceTable,
351 loaderResult.diaObjects)
352 if self.config.doSolarSystemAssociation:
353 ssoAssocResult = self.solarSystemAssociator.run(
354 assocResults.unAssocDiaSources,
355 solarSystemObjectTable,
356 diffIm)
357 createResults = self.createNewDiaObjects(
358 ssoAssocResult.unAssocDiaSources)
359 associatedDiaSources = pd.concat(
360 [assocResults.matchedDiaSources,
361 ssoAssocResult.ssoAssocDiaSources,
362 createResults.diaSources])
363 nTotalSsObjects = ssoAssocResult.nTotalSsObjects
364 nAssociatedSsObjects = ssoAssocResult.nAssociatedSsObjects
365 else:
366 createResults = self.createNewDiaObjects(
367 assocResults.unAssocDiaSources)
368 associatedDiaSources = pd.concat(
369 [assocResults.matchedDiaSources,
370 createResults.diaSources])
371 nTotalSsObjects = 0
372 nAssociatedSsObjects = 0
374 # Create new DiaObjects from unassociated diaSources.
375 self._add_association_meta_data(assocResults.nUpdatedDiaObjects,
376 assocResults.nUnassociatedDiaObjects,
377 createResults.nNewDiaObjects,
378 nTotalSsObjects,
379 nAssociatedSsObjects)
380 # Index the DiaSource catalog for this visit after all associations
381 # have been made.
382 updatedDiaObjectIds = associatedDiaSources["diaObjectId"][
383 associatedDiaSources["diaObjectId"] != 0].to_numpy()
384 associatedDiaSources.set_index(["diaObjectId",
385 "filterName",
386 "diaSourceId"],
387 drop=False,
388 inplace=True)
390 # Append new DiaObjects and DiaSources to their previous history.
391 diaObjects = loaderResult.diaObjects.append(
392 createResults.newDiaObjects.set_index("diaObjectId", drop=False),
393 sort=True)
394 if self.testDataFrameIndex(diaObjects):
395 raise RuntimeError(
396 "Duplicate DiaObjects created after association. This is "
397 "likely due to re-running data with an already populated "
398 "Apdb. If this was not the case then there was an unexpected "
399 "failure in Association while matching and creating new "
400 "DiaObjects and should be reported. Exiting.")
401 mergedDiaSourceHistory = loaderResult.diaSources.append(
402 associatedDiaSources,
403 sort=True)
404 # Test for DiaSource duplication first. If duplicates are found,
405 # this likely means this is duplicate data being processed and sent
406 # to the Apdb.
407 if self.testDataFrameIndex(mergedDiaSourceHistory):
408 raise RuntimeError(
409 "Duplicate DiaSources found after association and merging "
410 "with history. This is likely due to re-running data with an "
411 "already populated Apdb. If this was not the case then there "
412 "was an unexpected failure in Association while matching "
413 "sources to objects, and should be reported. Exiting.")
415 # Compute DiaObject Summary statistics from their full DiaSource
416 # history.
417 diaCalResult = self.diaCalculation.run(
418 diaObjects,
419 mergedDiaSourceHistory,
420 updatedDiaObjectIds,
421 [band])
422 # Test for duplication in the updated DiaObjects.
423 if self.testDataFrameIndex(diaCalResult.diaObjectCat):
424 raise RuntimeError(
425 "Duplicate DiaObjects (loaded + updated) created after "
426 "DiaCalculation. This is unexpected behavior and should be "
427 "reported. Existing.")
428 if self.testDataFrameIndex(diaCalResult.updatedDiaObjects):
429 raise RuntimeError(
430 "Duplicate DiaObjects (updated) created after "
431 "DiaCalculation. This is unexpected behavior and should be "
432 "reported. Existing.")
434 # Force photometer on the Difference and Calibrated exposures using
435 # the new and updated DiaObject locations.
436 diaForcedSources = self.diaForcedSource.run(
437 diaCalResult.diaObjectCat,
438 diaCalResult.updatedDiaObjects.loc[:, "diaObjectId"].to_numpy(),
439 ccdExposureIdBits,
440 exposure,
441 diffIm)
443 # Store DiaSources, updated DiaObjects, and DiaForcedSources in the
444 # Apdb.
445 self.apdb.store(
446 exposure.getInfo().getVisitInfo().getDate(),
447 diaCalResult.updatedDiaObjects,
448 associatedDiaSources,
449 diaForcedSources)
451 if self.config.doPackageAlerts:
452 if len(loaderResult.diaForcedSources) > 1:
453 diaForcedSources = diaForcedSources.append(
454 loaderResult.diaForcedSources,
455 sort=True)
456 if self.testDataFrameIndex(diaForcedSources):
457 self.log.warn(
458 "Duplicate DiaForcedSources created after merge with "
459 "history and new sources. This may cause downstream "
460 "problems. Dropping duplicates.")
461 # Drop duplicates via index and keep the first appearance.
462 # Reset due to the index shape being slight different than
463 # expected.
464 diaForcedSources = diaForcedSources.groupby(
465 diaForcedSources.index).first()
466 diaForcedSources.reset_index(drop=True, inplace=True)
467 diaForcedSources.set_index(
468 ["diaObjectId", "diaForcedSourceId"],
469 drop=False,
470 inplace=True)
471 self.alertPackager.run(associatedDiaSources,
472 diaCalResult.diaObjectCat,
473 loaderResult.diaSources,
474 diaForcedSources,
475 diffIm,
476 warpedExposure,
477 ccdExposureIdBits)
479 return pipeBase.Struct(apdbMarker=self.config.apdb.value,
480 associatedDiaSources=associatedDiaSources,)
482 def createNewDiaObjects(self, unAssocDiaSources):
483 """Loop through the set of DiaSources and create new DiaObjects
484 for unassociated DiaSources.
486 Parameters
487 ----------
488 unAssocDiaSources : `pandas.DataFrame`
489 Set of DiaSources to create new DiaObjects from.
491 Returns
492 -------
493 results : `lsst.pipe.base.Struct`
494 Results struct containing:
496 - ``diaSources`` : DiaSource catalog with updated DiaObject ids.
497 (`pandas.DataFrame`)
498 - ``newDiaObjects`` : Newly created DiaObjects from the
499 unassociated DiaSources. (`pandas.DataFrame`)
500 - ``nNewDiaObjects`` : Number of newly created diaObjects.(`int`)
501 """
502 if len(unAssocDiaSources) == 0:
503 tmpObj = self._initialize_dia_object(0)
504 newDiaObjects = pd.DataFrame(data=[],
505 columns=tmpObj.keys())
506 else:
507 newDiaObjects = unAssocDiaSources["diaSourceId"].apply(
508 self._initialize_dia_object)
509 unAssocDiaSources["diaObjectId"] = unAssocDiaSources["diaSourceId"]
510 return pipeBase.Struct(diaSources=unAssocDiaSources,
511 newDiaObjects=newDiaObjects,
512 nNewDiaObjects=len(newDiaObjects))
514 def _initialize_dia_object(self, objId):
515 """Create a new DiaObject with values required to be initialized by the
516 Ppdb.
518 Parameters
519 ----------
520 objid : `int`
521 ``diaObjectId`` value for the of the new DiaObject.
523 Returns
524 -------
525 diaObject : `dict`
526 Newly created DiaObject with keys:
528 ``diaObjectId``
529 Unique DiaObjectId (`int`).
530 ``pmParallaxNdata``
531 Number of data points used for parallax calculation (`int`).
532 ``nearbyObj1``
533 Id of the a nearbyObject in the Object table (`int`).
534 ``nearbyObj2``
535 Id of the a nearbyObject in the Object table (`int`).
536 ``nearbyObj3``
537 Id of the a nearbyObject in the Object table (`int`).
538 ``?PSFluxData``
539 Number of data points used to calculate point source flux
540 summary statistics in each bandpass (`int`).
541 """
542 new_dia_object = {"diaObjectId": objId,
543 "pmParallaxNdata": 0,
544 "nearbyObj1": 0,
545 "nearbyObj2": 0,
546 "nearbyObj3": 0,
547 "flags": 0}
548 for f in ["u", "g", "r", "i", "z", "y"]:
549 new_dia_object["%sPSFluxNdata" % f] = 0
550 return pd.Series(data=new_dia_object)
552 def testDataFrameIndex(self, df):
553 """Test the sorted DataFrame index for duplicates.
555 Wrapped as a separate function to allow for mocking of the this task
556 in unittesting. Default of a mock return for this test is True.
558 Parameters
559 ----------
560 df : `pandas.DataFrame`
561 DataFrame to text.
563 Returns
564 -------
565 `bool`
566 True if DataFrame contains duplicate rows.
567 """
568 return df.index.has_duplicates
570 def _add_association_meta_data(self,
571 nUpdatedDiaObjects,
572 nUnassociatedDiaObjects,
573 nNewDiaObjects,
574 nTotalSsObjects,
575 nAssociatedSsObjects):
576 """Store summaries of the association step in the task metadata.
578 Parameters
579 ----------
580 nUpdatedDiaObjects : `int`
581 Number of previous DiaObjects associated and updated in this
582 ccdVisit.
583 nUnassociatedDiaObjects : `int`
584 Number of previous DiaObjects that were not associated or updated
585 in this ccdVisit.
586 nNewDiaObjects : `int`
587 Number of newly created DiaObjects for this ccdVisit.
588 nTotalSsObjects : `int`
589 Number of SolarSystemObjects within the observable detector
590 area.
591 nAssociatedSsObjects : `int`
592 Number of successfully associated SolarSystemObjects.
593 """
594 self.metadata.add('numUpdatedDiaObjects', nUpdatedDiaObjects)
595 self.metadata.add('numUnassociatedDiaObjects', nUnassociatedDiaObjects)
596 self.metadata.add('numNewDiaObjects', nNewDiaObjects)
597 self.metadata.add('numTotalSolarSystemObjects', nTotalSsObjects)
598 self.metadata.add('numAssociatedSsObjects', nAssociatedSsObjects)