Coverage for python/lsst/ap/association/diaPipe.py : 28%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#
2# LSST Data Management System
3# Copyright 2008-2016 AURA/LSST.
4#
5# This product includes software developed by the
6# LSST Project (http://www.lsst.org/).
7#
8# This program is free software: you can redistribute it and/or modify
9# it under the terms of the GNU General Public License as published by
10# the Free Software Foundation, either version 3 of the License, or
11# (at your option) any later version.
12#
13# This program is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the LSST License Statement and
19# the GNU General Public License along with this program. If not,
20# see <https://www.lsstcorp.org/LegalNotices/>.
21#
23"""PipelineTask for associating DiaSources with previous DiaObjects.
25Additionally performs forced photometry on the calibrated and difference
26images at the updated locations of DiaObjects.
28Currently loads directly from the Apdb rather than pre-loading.
29"""
31import os
32import pandas as pd
34import lsst.dax.apdb as daxApdb
35from lsst.meas.base import DiaObjectCalculationTask
36import lsst.pex.config as pexConfig
37import lsst.pipe.base as pipeBase
38import lsst.pipe.base.connectionTypes as connTypes
40from lsst.ap.association import (
41 AssociationTask,
42 DiaForcedSourceTask,
43 LoadDiaCatalogsTask,
44 make_dia_object_schema,
45 make_dia_source_schema,
46 PackageAlertsTask)
48__all__ = ("DiaPipelineConfig",
49 "DiaPipelineTask",
50 "DiaPipelineConnections")
53class DiaPipelineConnections(
54 pipeBase.PipelineTaskConnections,
55 dimensions=("instrument", "visit", "detector"),
56 defaultTemplates={"coaddName": "deep", "fakesType": ""}):
57 """Butler connections for DiaPipelineTask.
58 """
59 diaSourceTable = connTypes.Input(
60 doc="Catalog of calibrated DiaSources.",
61 name="{fakesType}{coaddName}Diff_diaSrcTable",
62 storageClass="DataFrame",
63 dimensions=("instrument", "visit", "detector"),
64 )
65 diffIm = connTypes.Input(
66 doc="Difference image on which the DiaSources were detected.",
67 name="{fakesType}{coaddName}Diff_differenceExp",
68 storageClass="ExposureF",
69 dimensions=("instrument", "visit", "detector"),
70 )
71 exposure = connTypes.Input(
72 doc="Calibrated exposure differenced with a template image during "
73 "image differencing.",
74 name="calexp",
75 storageClass="ExposureF",
76 dimensions=("instrument", "visit", "detector"),
77 )
78 warpedExposure = connTypes.Input(
79 doc="Warped template used to create `subtractedExposure`. Not PSF "
80 "matched.",
81 dimensions=("instrument", "visit", "detector"),
82 storageClass="ExposureF",
83 name="{fakesType}{coaddName}Diff_warpedExp",
84 )
85 apdbMarker = connTypes.Output(
86 doc="Marker dataset storing the configuration of the Apdb for each "
87 "visit/detector. Used to signal the completion of the pipeline.",
88 name="apdb_marker",
89 storageClass="Config",
90 dimensions=("instrument", "visit", "detector"),
91 )
92 associatedDiaSources = connTypes.Output(
93 doc="Optional output storing the DiaSource catalog after matching, "
94 "calibration, and standardization for insertation into the Apdb.",
95 name="{fakesType}{coaddName}Diff_assocDiaSrc",
96 storageClass="DataFrame",
97 dimensions=("instrument", "visit", "detector"),
98 )
100 def __init__(self, *, config=None):
101 super().__init__(config=config)
103 if not config.doWriteAssociatedSources:
104 self.outputs.remove("associatedDiaSources")
106 def adjustQuantum(self, inputs, outputs, label, dataId):
107 """Override to make adjustments to `lsst.daf.butler.DatasetRef` objects
108 in the `lsst.daf.butler.core.Quantum` during the graph generation stage
109 of the activator.
111 This implementation checks to make sure that the filters in the dataset
112 are compatible with AP processing as set by the Apdb/DPDD schema.
114 Parameters
115 ----------
116 inputs : `dict`
117 Dictionary whose keys are an input (regular or prerequisite)
118 connection name and whose values are a tuple of the connection
119 instance and a collection of associated `DatasetRef` objects.
120 The exact type of the nested collections is unspecified; it can be
121 assumed to be multi-pass iterable and support `len` and ``in``, but
122 it should not be mutated in place. In contrast, the outer
123 dictionaries are guaranteed to be temporary copies that are true
124 `dict` instances, and hence may be modified and even returned; this
125 is especially useful for delegating to `super` (see notes below).
126 outputs : `dict`
127 Dict of output datasets, with the same structure as ``inputs``.
128 label : `str`
129 Label for this task in the pipeline (should be used in all
130 diagnostic messages).
131 data_id : `lsst.daf.butler.DataCoordinate`
132 Data ID for this quantum in the pipeline (should be used in all
133 diagnostic messages).
135 Returns
136 -------
137 adjusted_inputs : `dict`
138 Dict of the same form as ``inputs`` with updated containers of
139 input `DatasetRef` objects. Connections that are not changed
140 should not be returned at all. Datasets may only be removed, not
141 added. Nested collections may be of any multi-pass iterable type,
142 and the order of iteration will set the order of iteration within
143 `PipelineTask.runQuantum`.
144 adjusted_outputs : `dict`
145 Dict of updated output datasets, with the same structure and
146 interpretation as ``adjusted_inputs``.
148 Raises
149 ------
150 ScalarError
151 Raised if any `Input` or `PrerequisiteInput` connection has
152 ``multiple`` set to `False`, but multiple datasets.
153 NoWorkFound
154 Raised to indicate that this quantum should not be run; not enough
155 datasets were found for a regular `Input` connection, and the
156 quantum should be pruned or skipped.
157 FileNotFoundError
158 Raised to cause QuantumGraph generation to fail (with the message
159 included in this exception); not enough datasets were found for a
160 `PrerequisiteInput` connection.
161 """
162 _, refs = inputs["diffIm"]
163 for ref in refs:
164 if ref.dataId["band"] not in self.config.validBands:
165 raise ValueError(
166 f"Requested '{ref.dataId['band']}' not in "
167 "DiaPipelineConfig.validBands. To process bands not in "
168 "the standard Rubin set (ugrizy) you must add the band to "
169 "the validBands list in DiaPipelineConfig and add the "
170 "appropriate columns to the Apdb schema.")
171 return super().adjustQuantum(inputs, outputs, label, dataId)
174class DiaPipelineConfig(pipeBase.PipelineTaskConfig,
175 pipelineConnections=DiaPipelineConnections):
176 """Config for DiaPipelineTask.
177 """
178 coaddName = pexConfig.Field(
179 doc="coadd name: typically one of deep, goodSeeing, or dcr",
180 dtype=str,
181 default="deep",
182 )
183 apdb = pexConfig.ConfigurableField(
184 target=daxApdb.Apdb,
185 ConfigClass=daxApdb.ApdbConfig,
186 doc="Database connection for storing associated DiaSources and "
187 "DiaObjects. Must already be initialized.",
188 )
189 validBands = pexConfig.ListField(
190 dtype=str,
191 default=["u", "g", "r", "i", "z", "y"],
192 doc="List of bands that are valid for AP processing. To process a "
193 "band not on this list, the appropriate band specific columns "
194 "must be added to the Apdb schema in dax_apdb.",
195 )
196 diaCatalogLoader = pexConfig.ConfigurableField(
197 target=LoadDiaCatalogsTask,
198 doc="Task to load DiaObjects and DiaSources from the Apdb.",
199 )
200 associator = pexConfig.ConfigurableField(
201 target=AssociationTask,
202 doc="Task used to associate DiaSources with DiaObjects.",
203 )
204 diaCalculation = pexConfig.ConfigurableField(
205 target=DiaObjectCalculationTask,
206 doc="Task to compute summary statistics for DiaObjects.",
207 )
208 diaForcedSource = pexConfig.ConfigurableField(
209 target=DiaForcedSourceTask,
210 doc="Task used for force photometer DiaObject locations in direct and "
211 "difference images.",
212 )
213 alertPackager = pexConfig.ConfigurableField(
214 target=PackageAlertsTask,
215 doc="Subtask for packaging Ap data into alerts.",
216 )
217 doPackageAlerts = pexConfig.Field(
218 dtype=bool,
219 default=False,
220 doc="Package Dia-data into serialized alerts for distribution and "
221 "write them to disk.",
222 )
223 doWriteAssociatedSources = pexConfig.Field(
224 dtype=bool,
225 default=False,
226 doc="Write out associated and SDMed DiaSources.",
227 )
229 def setDefaults(self):
230 self.apdb.dia_object_index = "baseline"
231 self.apdb.dia_object_columns = []
232 self.apdb.extra_schema_file = os.path.join(
233 "${AP_ASSOCIATION_DIR}",
234 "data",
235 "apdb-ap-pipe-schema-extra.yaml")
236 self.diaCalculation.plugins = ["ap_meanPosition",
237 "ap_HTMIndex",
238 "ap_nDiaSources",
239 "ap_diaObjectFlag",
240 "ap_meanFlux",
241 "ap_percentileFlux",
242 "ap_sigmaFlux",
243 "ap_chi2Flux",
244 "ap_madFlux",
245 "ap_skewFlux",
246 "ap_minMaxFlux",
247 "ap_maxSlopeFlux",
248 "ap_meanErrFlux",
249 "ap_linearFit",
250 "ap_stetsonJ",
251 "ap_meanTotFlux",
252 "ap_sigmaTotFlux"]
254 def validate(self):
255 pexConfig.Config.validate(self)
256 if self.diaCatalogLoader.htmLevel != \
257 self.diaCalculation.plugins["ap_HTMIndex"].htmLevel:
258 raise ValueError("HTM index level in LoadDiaCatalogsTask must be "
259 "equal to HTMIndexDiaCalculationPlugin index "
260 "level.")
261 if "ap_HTMIndex" not in self.diaCalculation.plugins:
262 raise ValueError("DiaPipe requires the ap_HTMIndex plugin "
263 "be enabled for proper insertion into the Apdb.")
266class DiaPipelineTask(pipeBase.PipelineTask):
267 """Task for loading, associating and storing Difference Image Analysis
268 (DIA) Objects and Sources.
269 """
270 ConfigClass = DiaPipelineConfig
271 _DefaultName = "diaPipe"
272 RunnerClass = pipeBase.ButlerInitializedTaskRunner
274 def __init__(self, initInputs=None, **kwargs):
275 super().__init__(**kwargs)
276 self.apdb = self.config.apdb.apply(
277 afw_schemas=dict(DiaObject=make_dia_object_schema(),
278 DiaSource=make_dia_source_schema()))
279 self.makeSubtask("diaCatalogLoader")
280 self.makeSubtask("associator")
281 self.makeSubtask("diaCalculation")
282 self.makeSubtask("diaForcedSource")
283 if self.config.doPackageAlerts:
284 self.makeSubtask("alertPackager")
286 def runQuantum(self, butlerQC, inputRefs, outputRefs):
287 inputs = butlerQC.get(inputRefs)
288 expId, expBits = butlerQC.quantum.dataId.pack("visit_detector",
289 returnMaxBits=True)
290 inputs["ccdExposureIdBits"] = expBits
291 inputs["band"] = butlerQC.quantum.dataId["band"]
293 outputs = self.run(**inputs)
295 butlerQC.put(outputs, outputRefs)
297 @pipeBase.timeMethod
298 def run(self,
299 diaSourceTable,
300 diffIm,
301 exposure,
302 warpedExposure,
303 ccdExposureIdBits,
304 band):
305 """Process DiaSources and DiaObjects.
307 Load previous DiaObjects and their DiaSource history. Calibrate the
308 values in the diaSourceCat. Associate new DiaSources with previous
309 DiaObjects. Run forced photometry at the updated DiaObject locations.
310 Store the results in the Alert Production Database (Apdb).
312 Parameters
313 ----------
314 diaSourceTable : `pandas.DataFrame`
315 Newly detected DiaSources.
316 diffIm : `lsst.afw.image.ExposureF`
317 Difference image exposure in which the sources in ``diaSourceCat``
318 were detected.
319 exposure : `lsst.afw.image.ExposureF`
320 Calibrated exposure differenced with a template to create
321 ``diffIm``.
322 warpedExposure : `lsst.afw.image.ExposureF`
323 Template exposure used to create diffIm.
324 ccdExposureIdBits : `int`
325 Number of bits used for a unique ``ccdVisitId``.
326 band : `str`
327 The band in which the new DiaSources were detected.
329 Returns
330 -------
331 results : `lsst.pipe.base.Struct`
332 Results struct with components.
334 - ``apdb_maker`` : Marker dataset to store in the Butler indicating
335 that this ccdVisit has completed successfully.
336 (`lsst.dax.apdb.ApdbConfig`)
337 - ``associatedDiaSources`` : Catalog of newly associated
338 DiaSources. (`pandas.DataFrame`)
339 """
340 self.log.info("Running DiaPipeline...")
341 # Put the SciencePipelines through a SDMification step and return
342 # calibrated columns with the expect output database names.
344 # Load the DiaObjects and DiaSource history.
345 loaderResult = self.diaCatalogLoader.run(diffIm, self.apdb)
347 # Associate new DiaSources with existing DiaObjects.
348 assocResults = self.associator.run(diaSourceTable,
349 loaderResult.diaObjects)
351 # Create new DiaObjects from unassociated diaSources.
352 createResults = self.createNewDiaObjects(assocResults.diaSources)
353 self._add_association_meta_data(assocResults.nUpdatedDiaObjects,
354 assocResults.nUnassociatedDiaObjects,
355 len(createResults.newDiaObjects))
357 # Index the DiaSource catalog for this visit after all associations
358 # have been made.
359 updatedDiaObjectIds = createResults.diaSources["diaObjectId"][
360 createResults.diaSources["diaObjectId"] != 0].to_numpy()
361 diaSources = createResults.diaSources.set_index(["diaObjectId",
362 "filterName",
363 "diaSourceId"],
364 drop=False)
366 # Append new DiaObjects and DiaSources to their previous history.
367 diaObjects = loaderResult.diaObjects.append(
368 createResults.newDiaObjects.set_index("diaObjectId", drop=False),
369 sort=True)
370 if self.testDataFrameIndex(diaObjects):
371 raise RuntimeError(
372 "Duplicate DiaObjects created after association. This is "
373 "likely due to re-running data with an already populated "
374 "Apdb. If this was not the case then there was an unexpected "
375 "failure in Association while matching and creating new "
376 "DiaObjects and should be reported. Exiting.")
377 mergedDiaSourceHistory = loaderResult.diaSources.append(
378 diaSources,
379 sort=True)
380 # Test for DiaSource duplication first. If duplicates are found,
381 # this likely means this is duplicate data being processed and sent
382 # to the Apdb.
383 if self.testDataFrameIndex(mergedDiaSourceHistory):
384 raise RuntimeError(
385 "Duplicate DiaSources found after association and merging "
386 "with history. This is likely due to re-running data with an "
387 "already populated Apdb. If this was not the case then there "
388 "was an unexpected failure in Association while matching "
389 "sources to objects, and should be reported. Exiting.")
391 # Compute DiaObject Summary statistics from their full DiaSource
392 # history.
393 diaCalResult = self.diaCalculation.run(
394 diaObjects,
395 mergedDiaSourceHistory,
396 updatedDiaObjectIds,
397 [band])
398 # Test for duplication in the updated DiaObjects.
399 if self.testDataFrameIndex(diaCalResult.diaObjectCat):
400 raise RuntimeError(
401 "Duplicate DiaObjects (loaded + updated) created after "
402 "DiaCalculation. This is unexpected behavior and should be "
403 "reported. Existing.")
404 if self.testDataFrameIndex(diaCalResult.updatedDiaObjects):
405 raise RuntimeError(
406 "Duplicate DiaObjects (updated) created after "
407 "DiaCalculation. This is unexpected behavior and should be "
408 "reported. Existing.")
410 # Force photometer on the Difference and Calibrated exposures using
411 # the new and updated DiaObject locations.
412 diaForcedSources = self.diaForcedSource.run(
413 diaCalResult.diaObjectCat,
414 diaCalResult.updatedDiaObjects.loc[:, "diaObjectId"].to_numpy(),
415 ccdExposureIdBits,
416 exposure,
417 diffIm)
419 # Store DiaSources and updated DiaObjects in the Apdb.
420 self.apdb.storeDiaSources(diaSources)
421 self.apdb.storeDiaObjects(
422 diaCalResult.updatedDiaObjects,
423 exposure.getInfo().getVisitInfo().getDate().toPython())
424 self.apdb.storeDiaForcedSources(diaForcedSources)
426 if self.config.doPackageAlerts:
427 if len(loaderResult.diaForcedSources) > 1:
428 diaForcedSources = diaForcedSources.append(
429 loaderResult.diaForcedSources,
430 sort=True)
431 if self.testDataFrameIndex(diaForcedSources):
432 self.log.warn(
433 "Duplicate DiaForcedSources created after merge with "
434 "history and new sources. This may cause downstream "
435 "problems. Dropping duplicates.")
436 # Drop duplicates via index and keep the first appearance.
437 # Reset due to the index shape being slight different than
438 # expected.
439 diaForcedSources = diaForcedSources.groupby(
440 diaForcedSources.index).first()
441 diaForcedSources.reset_index(drop=True, inplace=True)
442 diaForcedSources.set_index(
443 ["diaObjectId", "diaForcedSourceId"],
444 drop=False,
445 inplace=True)
446 self.alertPackager.run(diaSources,
447 diaCalResult.diaObjectCat,
448 loaderResult.diaSources,
449 diaForcedSources,
450 diffIm,
451 warpedExposure,
452 ccdExposureIdBits)
454 return pipeBase.Struct(apdbMarker=self.config.apdb.value,
455 associatedDiaSources=diaSources)
457 def createNewDiaObjects(self, diaSources):
458 """Loop through the set of DiaSources and create new DiaObjects
459 for unassociated DiaSources.
461 Parameters
462 ----------
463 diaSources : `pandas.DataFrame`
464 Set of DiaSources to create new DiaObjects from.
466 Returns
467 -------
468 results : `lsst.pipe.base.Struct`
469 Results struct containing:
471 - ``diaSources`` : DiaSource catalog with updated DiaObject ids.
472 (`pandas.DataFrame`)
473 - ``newDiaObjects`` : Newly created DiaObjects from the
474 unassociated DiaSources. (`pandas.DataFrame`)
475 """
476 newDiaObjectsList = []
477 for idx, diaSource in diaSources.iterrows():
478 if diaSource["diaObjectId"] == 0:
479 newDiaObjectsList.append(
480 self._initialize_dia_object(diaSource["diaSourceId"]))
481 diaSources.loc[idx, "diaObjectId"] = diaSource["diaSourceId"]
482 if len(newDiaObjectsList) > 0:
483 newDiaObjects = pd.DataFrame(data=newDiaObjectsList)
484 else:
485 tmpObj = self._initialize_dia_object(0)
486 newDiaObjects = pd.DataFrame(data=newDiaObjectsList,
487 columns=tmpObj.keys())
488 return pipeBase.Struct(diaSources=diaSources,
489 newDiaObjects=pd.DataFrame(data=newDiaObjects))
491 def _initialize_dia_object(self, objId):
492 """Create a new DiaObject with values required to be initialized by the
493 Ppdb.
495 Parameters
496 ----------
497 objid : `int`
498 ``diaObjectId`` value for the of the new DiaObject.
500 Returns
501 -------
502 diaObject : `dict`
503 Newly created DiaObject with keys:
505 ``diaObjectId``
506 Unique DiaObjectId (`int`).
507 ``pmParallaxNdata``
508 Number of data points used for parallax calculation (`int`).
509 ``nearbyObj1``
510 Id of the a nearbyObject in the Object table (`int`).
511 ``nearbyObj2``
512 Id of the a nearbyObject in the Object table (`int`).
513 ``nearbyObj3``
514 Id of the a nearbyObject in the Object table (`int`).
515 ``?PSFluxData``
516 Number of data points used to calculate point source flux
517 summary statistics in each bandpass (`int`).
518 """
519 new_dia_object = {"diaObjectId": objId,
520 "pmParallaxNdata": 0,
521 "nearbyObj1": 0,
522 "nearbyObj2": 0,
523 "nearbyObj3": 0,
524 "flags": 0}
525 for f in ["u", "g", "r", "i", "z", "y"]:
526 new_dia_object["%sPSFluxNdata" % f] = 0
527 return new_dia_object
529 def testDataFrameIndex(self, df):
530 """Test the sorted DataFrame index for duplicates.
532 Wrapped as a separate function to allow for mocking of the this task
533 in unittesting. Default of a mock return for this test is True.
535 Parameters
536 ----------
537 df : `pandas.DataFrame`
538 DataFrame to text.
540 Returns
541 -------
542 `bool`
543 True if DataFrame contains duplicate rows.
544 """
545 return df.index.has_duplicates
547 def _add_association_meta_data(self,
548 nUpdatedDiaObjects,
549 nUnassociatedDiaObjects,
550 nNewDiaObjects):
551 """Store summaries of the association step in the task metadata.
553 Parameters
554 ----------
555 nUpdatedDiaObjects : `int`
556 Number of previous DiaObjects associated and updated in this
557 ccdVisit.
558 nUnassociatedDiaObjects : `int`
559 Number of previous DiaObjects that were not associated or updated
560 in this ccdVisit.
561 nNewDiaObjects : `int`
562 Number of newly created DiaObjects for this ccdVisit.
563 """
564 self.metadata.add('numUpdatedDiaObjects', nUpdatedDiaObjects)
565 self.metadata.add('numUnassociatedDiaObjects', nUnassociatedDiaObjects)
566 self.metadata.add('numNewDiaObjects', nNewDiaObjects)