Coverage for python/lsst/ap/association/diaPipe.py : 29%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#
2# LSST Data Management System
3# Copyright 2008-2016 AURA/LSST.
4#
5# This product includes software developed by the
6# LSST Project (http://www.lsst.org/).
7#
8# This program is free software: you can redistribute it and/or modify
9# it under the terms of the GNU General Public License as published by
10# the Free Software Foundation, either version 3 of the License, or
11# (at your option) any later version.
12#
13# This program is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the LSST License Statement and
19# the GNU General Public License along with this program. If not,
20# see <https://www.lsstcorp.org/LegalNotices/>.
21#
23"""PipelineTask for associating DiaSources with previous DiaObjects.
25Additionally performs forced photometry on the calibrated and difference
26images at the updated locations of DiaObjects.
28Currently loads directly from the Apdb rather than pre-loading.
29"""
31import os
32import pandas as pd
34import lsst.dax.apdb as daxApdb
35from lsst.meas.base import DiaObjectCalculationTask
36import lsst.pex.config as pexConfig
37import lsst.pipe.base as pipeBase
38import lsst.pipe.base.connectionTypes as connTypes
40from lsst.ap.association import (
41 AssociationTask,
42 DiaForcedSourceTask,
43 LoadDiaCatalogsTask,
44 PackageAlertsTask)
45from lsst.ap.association.ssoAssociation import SolarSystemAssociationTask
47__all__ = ("DiaPipelineConfig",
48 "DiaPipelineTask",
49 "DiaPipelineConnections")
52class DiaPipelineConnections(
53 pipeBase.PipelineTaskConnections,
54 dimensions=("instrument", "visit", "detector"),
55 defaultTemplates={"coaddName": "deep", "fakesType": ""}):
56 """Butler connections for DiaPipelineTask.
57 """
58 diaSourceTable = connTypes.Input(
59 doc="Catalog of calibrated DiaSources.",
60 name="{fakesType}{coaddName}Diff_diaSrcTable",
61 storageClass="DataFrame",
62 dimensions=("instrument", "visit", "detector"),
63 )
64 solarSystemObjectTable = connTypes.Input(
65 doc="Catalog of SolarSolarSystem objects expected to be observable in "
66 "this detectorVisit.",
67 name="visitSsObjects",
68 storageClass="DataFrame",
69 dimensions=("instrument", "visit"),
70 )
71 diffIm = connTypes.Input(
72 doc="Difference image on which the DiaSources were detected.",
73 name="{fakesType}{coaddName}Diff_differenceExp",
74 storageClass="ExposureF",
75 dimensions=("instrument", "visit", "detector"),
76 )
77 exposure = connTypes.Input(
78 doc="Calibrated exposure differenced with a template image during "
79 "image differencing.",
80 name="calexp",
81 storageClass="ExposureF",
82 dimensions=("instrument", "visit", "detector"),
83 )
84 warpedExposure = connTypes.Input(
85 doc="Warped template used to create `subtractedExposure`. Not PSF "
86 "matched.",
87 dimensions=("instrument", "visit", "detector"),
88 storageClass="ExposureF",
89 name="{fakesType}{coaddName}Diff_warpedExp",
90 )
91 apdbMarker = connTypes.Output(
92 doc="Marker dataset storing the configuration of the Apdb for each "
93 "visit/detector. Used to signal the completion of the pipeline.",
94 name="apdb_marker",
95 storageClass="Config",
96 dimensions=("instrument", "visit", "detector"),
97 )
98 associatedDiaSources = connTypes.Output(
99 doc="Optional output storing the DiaSource catalog after matching, "
100 "calibration, and standardization for insertation into the Apdb.",
101 name="{fakesType}{coaddName}Diff_assocDiaSrc",
102 storageClass="DataFrame",
103 dimensions=("instrument", "visit", "detector"),
104 )
106 def __init__(self, *, config=None):
107 super().__init__(config=config)
109 if not config.doWriteAssociatedSources:
110 self.outputs.remove("associatedDiaSources")
111 if not config.doSolarSystemAssociation:
112 self.inputs.remove("solarSystemObjectTable")
114 def adjustQuantum(self, inputs, outputs, label, dataId):
115 """Override to make adjustments to `lsst.daf.butler.DatasetRef` objects
116 in the `lsst.daf.butler.core.Quantum` during the graph generation stage
117 of the activator.
119 This implementation checks to make sure that the filters in the dataset
120 are compatible with AP processing as set by the Apdb/DPDD schema.
122 Parameters
123 ----------
124 inputs : `dict`
125 Dictionary whose keys are an input (regular or prerequisite)
126 connection name and whose values are a tuple of the connection
127 instance and a collection of associated `DatasetRef` objects.
128 The exact type of the nested collections is unspecified; it can be
129 assumed to be multi-pass iterable and support `len` and ``in``, but
130 it should not be mutated in place. In contrast, the outer
131 dictionaries are guaranteed to be temporary copies that are true
132 `dict` instances, and hence may be modified and even returned; this
133 is especially useful for delegating to `super` (see notes below).
134 outputs : `dict`
135 Dict of output datasets, with the same structure as ``inputs``.
136 label : `str`
137 Label for this task in the pipeline (should be used in all
138 diagnostic messages).
139 data_id : `lsst.daf.butler.DataCoordinate`
140 Data ID for this quantum in the pipeline (should be used in all
141 diagnostic messages).
143 Returns
144 -------
145 adjusted_inputs : `dict`
146 Dict of the same form as ``inputs`` with updated containers of
147 input `DatasetRef` objects. Connections that are not changed
148 should not be returned at all. Datasets may only be removed, not
149 added. Nested collections may be of any multi-pass iterable type,
150 and the order of iteration will set the order of iteration within
151 `PipelineTask.runQuantum`.
152 adjusted_outputs : `dict`
153 Dict of updated output datasets, with the same structure and
154 interpretation as ``adjusted_inputs``.
156 Raises
157 ------
158 ScalarError
159 Raised if any `Input` or `PrerequisiteInput` connection has
160 ``multiple`` set to `False`, but multiple datasets.
161 NoWorkFound
162 Raised to indicate that this quantum should not be run; not enough
163 datasets were found for a regular `Input` connection, and the
164 quantum should be pruned or skipped.
165 FileNotFoundError
166 Raised to cause QuantumGraph generation to fail (with the message
167 included in this exception); not enough datasets were found for a
168 `PrerequisiteInput` connection.
169 """
170 _, refs = inputs["diffIm"]
171 for ref in refs:
172 if ref.dataId["band"] not in self.config.validBands:
173 raise ValueError(
174 f"Requested '{ref.dataId['band']}' not in "
175 "DiaPipelineConfig.validBands. To process bands not in "
176 "the standard Rubin set (ugrizy) you must add the band to "
177 "the validBands list in DiaPipelineConfig and add the "
178 "appropriate columns to the Apdb schema.")
179 return super().adjustQuantum(inputs, outputs, label, dataId)
182class DiaPipelineConfig(pipeBase.PipelineTaskConfig,
183 pipelineConnections=DiaPipelineConnections):
184 """Config for DiaPipelineTask.
185 """
186 coaddName = pexConfig.Field(
187 doc="coadd name: typically one of deep, goodSeeing, or dcr",
188 dtype=str,
189 default="deep",
190 )
191 apdb = daxApdb.ApdbSql.makeField(
192 doc="Database connection for storing associated DiaSources and "
193 "DiaObjects. Must already be initialized.",
194 )
195 validBands = pexConfig.ListField(
196 dtype=str,
197 default=["u", "g", "r", "i", "z", "y"],
198 doc="List of bands that are valid for AP processing. To process a "
199 "band not on this list, the appropriate band specific columns "
200 "must be added to the Apdb schema in dax_apdb.",
201 )
202 diaCatalogLoader = pexConfig.ConfigurableField(
203 target=LoadDiaCatalogsTask,
204 doc="Task to load DiaObjects and DiaSources from the Apdb.",
205 )
206 associator = pexConfig.ConfigurableField(
207 target=AssociationTask,
208 doc="Task used to associate DiaSources with DiaObjects.",
209 )
210 doSolarSystemAssociation = pexConfig.Field(
211 dtype=bool,
212 default=False,
213 doc="Process SolarSystem objects through the pipeline.",
214 )
215 solarSystemAssociator = pexConfig.ConfigurableField(
216 target=SolarSystemAssociationTask,
217 doc="Task used to associate DiaSources with SolarSystemObjects.",
218 )
219 diaCalculation = pexConfig.ConfigurableField(
220 target=DiaObjectCalculationTask,
221 doc="Task to compute summary statistics for DiaObjects.",
222 )
223 diaForcedSource = pexConfig.ConfigurableField(
224 target=DiaForcedSourceTask,
225 doc="Task used for force photometer DiaObject locations in direct and "
226 "difference images.",
227 )
228 alertPackager = pexConfig.ConfigurableField(
229 target=PackageAlertsTask,
230 doc="Subtask for packaging Ap data into alerts.",
231 )
232 doPackageAlerts = pexConfig.Field(
233 dtype=bool,
234 default=False,
235 doc="Package Dia-data into serialized alerts for distribution and "
236 "write them to disk.",
237 )
238 doWriteAssociatedSources = pexConfig.Field(
239 dtype=bool,
240 default=False,
241 doc="Write out associated and SDMed DiaSources.",
242 )
244 def setDefaults(self):
245 self.apdb.dia_object_index = "baseline"
246 self.apdb.dia_object_columns = []
247 self.apdb.extra_schema_file = os.path.join(
248 "${AP_ASSOCIATION_DIR}",
249 "data",
250 "apdb-ap-pipe-schema-extra.yaml")
251 self.diaCalculation.plugins = ["ap_meanPosition",
252 "ap_nDiaSources",
253 "ap_diaObjectFlag",
254 "ap_meanFlux",
255 "ap_percentileFlux",
256 "ap_sigmaFlux",
257 "ap_chi2Flux",
258 "ap_madFlux",
259 "ap_skewFlux",
260 "ap_minMaxFlux",
261 "ap_maxSlopeFlux",
262 "ap_meanErrFlux",
263 "ap_linearFit",
264 "ap_stetsonJ",
265 "ap_meanTotFlux",
266 "ap_sigmaTotFlux"]
269class DiaPipelineTask(pipeBase.PipelineTask):
270 """Task for loading, associating and storing Difference Image Analysis
271 (DIA) Objects and Sources.
272 """
273 ConfigClass = DiaPipelineConfig
274 _DefaultName = "diaPipe"
275 RunnerClass = pipeBase.ButlerInitializedTaskRunner
277 def __init__(self, initInputs=None, **kwargs):
278 super().__init__(**kwargs)
279 self.apdb = self.config.apdb.apply()
280 self.makeSubtask("diaCatalogLoader")
281 self.makeSubtask("associator")
282 self.makeSubtask("diaCalculation")
283 self.makeSubtask("diaForcedSource")
284 if self.config.doPackageAlerts:
285 self.makeSubtask("alertPackager")
286 if self.config.doSolarSystemAssociation:
287 self.makeSubtask("solarSystemAssociator")
289 def runQuantum(self, butlerQC, inputRefs, outputRefs):
290 inputs = butlerQC.get(inputRefs)
291 expId, expBits = butlerQC.quantum.dataId.pack("visit_detector",
292 returnMaxBits=True)
293 inputs["ccdExposureIdBits"] = expBits
294 inputs["band"] = butlerQC.quantum.dataId["band"]
295 if not self.config.doSolarSystemAssociation:
296 inputs["solarSystemObjectTable"] = None
298 outputs = self.run(**inputs)
300 butlerQC.put(outputs, outputRefs)
302 @pipeBase.timeMethod
303 def run(self,
304 diaSourceTable,
305 solarSystemObjectTable,
306 diffIm,
307 exposure,
308 warpedExposure,
309 ccdExposureIdBits,
310 band):
311 """Process DiaSources and DiaObjects.
313 Load previous DiaObjects and their DiaSource history. Calibrate the
314 values in the diaSourceCat. Associate new DiaSources with previous
315 DiaObjects. Run forced photometry at the updated DiaObject locations.
316 Store the results in the Alert Production Database (Apdb).
318 Parameters
319 ----------
320 diaSourceTable : `pandas.DataFrame`
321 Newly detected DiaSources.
322 diffIm : `lsst.afw.image.ExposureF`
323 Difference image exposure in which the sources in ``diaSourceCat``
324 were detected.
325 exposure : `lsst.afw.image.ExposureF`
326 Calibrated exposure differenced with a template to create
327 ``diffIm``.
328 warpedExposure : `lsst.afw.image.ExposureF`
329 Template exposure used to create diffIm.
330 ccdExposureIdBits : `int`
331 Number of bits used for a unique ``ccdVisitId``.
332 band : `str`
333 The band in which the new DiaSources were detected.
335 Returns
336 -------
337 results : `lsst.pipe.base.Struct`
338 Results struct with components.
340 - ``apdbMaker`` : Marker dataset to store in the Butler indicating
341 that this ccdVisit has completed successfully.
342 (`lsst.dax.apdb.ApdbConfig`)
343 - ``associatedDiaSources`` : Catalog of newly associated
344 DiaSources. (`pandas.DataFrame`)
345 """
346 # Load the DiaObjects and DiaSource history.
347 loaderResult = self.diaCatalogLoader.run(diffIm, self.apdb)
349 # Associate new DiaSources with existing DiaObjects.
350 assocResults = self.associator.run(diaSourceTable,
351 loaderResult.diaObjects)
352 if self.config.doSolarSystemAssociation:
353 ssoAssocResult = self.solarSystemAssociator.run(
354 assocResults.unAssocDiaSources, solarSystemObjectTable)
355 createResults = self.createNewDiaObjects(
356 ssoAssocResult.unAssocDiaSources)
357 associatedDiaSources = pd.concat(
358 [assocResults.matchedDiaSources,
359 ssoAssocResult.ssoAssocDiaSources,
360 createResults.diaSources])
361 else:
362 createResults = self.createNewDiaObjects(
363 assocResults.unAssocDiaSources)
364 associatedDiaSources = pd.concat(
365 [assocResults.matchedDiaSources,
366 createResults.diaSources])
368 # Create new DiaObjects from unassociated diaSources.
369 self._add_association_meta_data(assocResults.nUpdatedDiaObjects,
370 assocResults.nUnassociatedDiaObjects,
371 createResults.nNewDiaObjects)
372 # Index the DiaSource catalog for this visit after all associations
373 # have been made.
374 updatedDiaObjectIds = associatedDiaSources["diaObjectId"][
375 associatedDiaSources["diaObjectId"] != 0].to_numpy()
376 associatedDiaSources.set_index(["diaObjectId",
377 "filterName",
378 "diaSourceId"],
379 drop=False,
380 inplace=True)
382 # Append new DiaObjects and DiaSources to their previous history.
383 diaObjects = loaderResult.diaObjects.append(
384 createResults.newDiaObjects.set_index("diaObjectId", drop=False),
385 sort=True)
386 if self.testDataFrameIndex(diaObjects):
387 raise RuntimeError(
388 "Duplicate DiaObjects created after association. This is "
389 "likely due to re-running data with an already populated "
390 "Apdb. If this was not the case then there was an unexpected "
391 "failure in Association while matching and creating new "
392 "DiaObjects and should be reported. Exiting.")
393 mergedDiaSourceHistory = loaderResult.diaSources.append(
394 associatedDiaSources,
395 sort=True)
396 # Test for DiaSource duplication first. If duplicates are found,
397 # this likely means this is duplicate data being processed and sent
398 # to the Apdb.
399 if self.testDataFrameIndex(mergedDiaSourceHistory):
400 raise RuntimeError(
401 "Duplicate DiaSources found after association and merging "
402 "with history. This is likely due to re-running data with an "
403 "already populated Apdb. If this was not the case then there "
404 "was an unexpected failure in Association while matching "
405 "sources to objects, and should be reported. Exiting.")
407 # Compute DiaObject Summary statistics from their full DiaSource
408 # history.
409 diaCalResult = self.diaCalculation.run(
410 diaObjects,
411 mergedDiaSourceHistory,
412 updatedDiaObjectIds,
413 [band])
414 # Test for duplication in the updated DiaObjects.
415 if self.testDataFrameIndex(diaCalResult.diaObjectCat):
416 raise RuntimeError(
417 "Duplicate DiaObjects (loaded + updated) created after "
418 "DiaCalculation. This is unexpected behavior and should be "
419 "reported. Existing.")
420 if self.testDataFrameIndex(diaCalResult.updatedDiaObjects):
421 raise RuntimeError(
422 "Duplicate DiaObjects (updated) created after "
423 "DiaCalculation. This is unexpected behavior and should be "
424 "reported. Existing.")
426 # Force photometer on the Difference and Calibrated exposures using
427 # the new and updated DiaObject locations.
428 diaForcedSources = self.diaForcedSource.run(
429 diaCalResult.diaObjectCat,
430 diaCalResult.updatedDiaObjects.loc[:, "diaObjectId"].to_numpy(),
431 ccdExposureIdBits,
432 exposure,
433 diffIm)
435 # Store DiaSources, updated DiaObjects, and DiaForcedSources in the
436 # Apdb.
437 self.apdb.store(
438 exposure.getInfo().getVisitInfo().getDate(),
439 diaCalResult.updatedDiaObjects,
440 associatedDiaSources,
441 diaForcedSources)
443 if self.config.doPackageAlerts:
444 if len(loaderResult.diaForcedSources) > 1:
445 diaForcedSources = diaForcedSources.append(
446 loaderResult.diaForcedSources,
447 sort=True)
448 if self.testDataFrameIndex(diaForcedSources):
449 self.log.warn(
450 "Duplicate DiaForcedSources created after merge with "
451 "history and new sources. This may cause downstream "
452 "problems. Dropping duplicates.")
453 # Drop duplicates via index and keep the first appearance.
454 # Reset due to the index shape being slight different than
455 # expected.
456 diaForcedSources = diaForcedSources.groupby(
457 diaForcedSources.index).first()
458 diaForcedSources.reset_index(drop=True, inplace=True)
459 diaForcedSources.set_index(
460 ["diaObjectId", "diaForcedSourceId"],
461 drop=False,
462 inplace=True)
463 self.alertPackager.run(associatedDiaSources,
464 diaCalResult.diaObjectCat,
465 loaderResult.diaSources,
466 diaForcedSources,
467 diffIm,
468 warpedExposure,
469 ccdExposureIdBits)
471 return pipeBase.Struct(apdbMarker=self.config.apdb.value,
472 associatedDiaSources=associatedDiaSources,)
474 def createNewDiaObjects(self, unAssocDiaSources):
475 """Loop through the set of DiaSources and create new DiaObjects
476 for unassociated DiaSources.
478 Parameters
479 ----------
480 unAssocDiaSources : `pandas.DataFrame`
481 Set of DiaSources to create new DiaObjects from.
483 Returns
484 -------
485 results : `lsst.pipe.base.Struct`
486 Results struct containing:
488 - ``diaSources`` : DiaSource catalog with updated DiaObject ids.
489 (`pandas.DataFrame`)
490 - ``newDiaObjects`` : Newly created DiaObjects from the
491 unassociated DiaSources. (`pandas.DataFrame`)
492 - ``nNewDiaObjects`` : Number of newly created diaObjects.(`int`)
493 """
494 if len(unAssocDiaSources) == 0:
495 tmpObj = self._initialize_dia_object(0)
496 newDiaObjects = pd.DataFrame(data=[],
497 columns=tmpObj.keys())
498 else:
499 newDiaObjects = unAssocDiaSources["diaSourceId"].apply(
500 self._initialize_dia_object)
501 unAssocDiaSources["diaObjectId"] = unAssocDiaSources["diaSourceId"]
502 return pipeBase.Struct(diaSources=unAssocDiaSources,
503 newDiaObjects=newDiaObjects,
504 nNewDiaObjects=len(newDiaObjects))
506 def _initialize_dia_object(self, objId):
507 """Create a new DiaObject with values required to be initialized by the
508 Ppdb.
510 Parameters
511 ----------
512 objid : `int`
513 ``diaObjectId`` value for the of the new DiaObject.
515 Returns
516 -------
517 diaObject : `dict`
518 Newly created DiaObject with keys:
520 ``diaObjectId``
521 Unique DiaObjectId (`int`).
522 ``pmParallaxNdata``
523 Number of data points used for parallax calculation (`int`).
524 ``nearbyObj1``
525 Id of the a nearbyObject in the Object table (`int`).
526 ``nearbyObj2``
527 Id of the a nearbyObject in the Object table (`int`).
528 ``nearbyObj3``
529 Id of the a nearbyObject in the Object table (`int`).
530 ``?PSFluxData``
531 Number of data points used to calculate point source flux
532 summary statistics in each bandpass (`int`).
533 """
534 new_dia_object = {"diaObjectId": objId,
535 "pmParallaxNdata": 0,
536 "nearbyObj1": 0,
537 "nearbyObj2": 0,
538 "nearbyObj3": 0,
539 "flags": 0}
540 for f in ["u", "g", "r", "i", "z", "y"]:
541 new_dia_object["%sPSFluxNdata" % f] = 0
542 return pd.Series(data=new_dia_object)
544 def testDataFrameIndex(self, df):
545 """Test the sorted DataFrame index for duplicates.
547 Wrapped as a separate function to allow for mocking of the this task
548 in unittesting. Default of a mock return for this test is True.
550 Parameters
551 ----------
552 df : `pandas.DataFrame`
553 DataFrame to text.
555 Returns
556 -------
557 `bool`
558 True if DataFrame contains duplicate rows.
559 """
560 return df.index.has_duplicates
562 def _add_association_meta_data(self,
563 nUpdatedDiaObjects,
564 nUnassociatedDiaObjects,
565 nNewDiaObjects):
566 """Store summaries of the association step in the task metadata.
568 Parameters
569 ----------
570 nUpdatedDiaObjects : `int`
571 Number of previous DiaObjects associated and updated in this
572 ccdVisit.
573 nUnassociatedDiaObjects : `int`
574 Number of previous DiaObjects that were not associated or updated
575 in this ccdVisit.
576 nNewDiaObjects : `int`
577 Number of newly created DiaObjects for this ccdVisit.
578 """
579 self.metadata.add('numUpdatedDiaObjects', nUpdatedDiaObjects)
580 self.metadata.add('numUnassociatedDiaObjects', nUnassociatedDiaObjects)
581 self.metadata.add('numNewDiaObjects', nNewDiaObjects)