Coverage for python/lsst/ap/association/diaPipe.py : 33%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#
2# LSST Data Management System
3# Copyright 2008-2016 AURA/LSST.
4#
5# This product includes software developed by the
6# LSST Project (http://www.lsst.org/).
7#
8# This program is free software: you can redistribute it and/or modify
9# it under the terms of the GNU General Public License as published by
10# the Free Software Foundation, either version 3 of the License, or
11# (at your option) any later version.
12#
13# This program is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the LSST License Statement and
19# the GNU General Public License along with this program. If not,
20# see <https://www.lsstcorp.org/LegalNotices/>.
21#
23"""PipelineTask for associating DiaSources with previous DiaObjects.
25Additionally performs forced photometry on the calibrated and difference
26images at the updated locations of DiaObjects.
28Currently loads directly from the Apdb rather than pre-loading.
29"""
31import os
33import lsst.dax.apdb as daxApdb
34from lsst.meas.base import DiaObjectCalculationTask
35import lsst.pex.config as pexConfig
36import lsst.pipe.base as pipeBase
37import lsst.pipe.base.connectionTypes as connTypes
39from lsst.ap.association import (
40 AssociationTask,
41 DiaForcedSourceTask,
42 LoadDiaCatalogsTask,
43 make_dia_object_schema,
44 make_dia_source_schema,
45 PackageAlertsTask)
47__all__ = ("DiaPipelineConfig",
48 "DiaPipelineTask",
49 "DiaPipelineConnections")
52class DiaPipelineConnections(
53 pipeBase.PipelineTaskConnections,
54 dimensions=("instrument", "visit", "detector"),
55 defaultTemplates={"coaddName": "deep", "fakesType": ""}):
56 """Butler connections for DiaPipelineTask.
57 """
58 diaSourceTable = connTypes.Input(
59 doc="Catalog of calibrated DiaSources.",
60 name="{fakesType}{coaddName}Diff_diaSrcTable",
61 storageClass="DataFrame",
62 dimensions=("instrument", "visit", "detector"),
63 )
64 diffIm = connTypes.Input(
65 doc="Difference image on which the DiaSources were detected.",
66 name="{fakesType}{coaddName}Diff_differenceExp",
67 storageClass="ExposureF",
68 dimensions=("instrument", "visit", "detector"),
69 )
70 exposure = connTypes.Input(
71 doc="Calibrated exposure differenced with a template image during "
72 "image differencing.",
73 name="calexp",
74 storageClass="ExposureF",
75 dimensions=("instrument", "visit", "detector"),
76 )
77 warpedExposure = connTypes.Input(
78 doc="Warped template used to create `subtractedExposure`. Not PSF "
79 "matched.",
80 dimensions=("instrument", "visit", "detector"),
81 storageClass="ExposureF",
82 name="{fakesType}{coaddName}Diff_warpedExp",
83 )
84 apdbMarker = connTypes.Output(
85 doc="Marker dataset storing the configuration of the Apdb for each "
86 "visit/detector. Used to signal the completion of the pipeline.",
87 name="apdb_marker",
88 storageClass="Config",
89 dimensions=("instrument", "visit", "detector"),
90 )
91 associatedDiaSources = connTypes.Output(
92 doc="Optional output storing the DiaSource catalog after matching, "
93 "calibration, and standardization for insertation into the Apdb.",
94 name="{fakesType}{coaddName}Diff_assocDiaSrc",
95 storageClass="DataFrame",
96 dimensions=("instrument", "visit", "detector"),
97 )
99 def __init__(self, *, config=None):
100 super().__init__(config=config)
102 if not config.doWriteAssociatedSources:
103 self.outputs.remove("associatedDiaSources")
105 def adjustQuantum(self, inputs, outputs, label, dataId):
106 """Override to make adjustments to `lsst.daf.butler.DatasetRef` objects
107 in the `lsst.daf.butler.core.Quantum` during the graph generation stage
108 of the activator.
110 This implementation checks to make sure that the filters in the dataset
111 are compatible with AP processing as set by the Apdb/DPDD schema.
113 Parameters
114 ----------
115 inputs : `dict`
116 Dictionary whose keys are an input (regular or prerequisite)
117 connection name and whose values are a tuple of the connection
118 instance and a collection of associated `DatasetRef` objects.
119 The exact type of the nested collections is unspecified; it can be
120 assumed to be multi-pass iterable and support `len` and ``in``, but
121 it should not be mutated in place. In contrast, the outer
122 dictionaries are guaranteed to be temporary copies that are true
123 `dict` instances, and hence may be modified and even returned; this
124 is especially useful for delegating to `super` (see notes below).
125 outputs : `dict`
126 Dict of output datasets, with the same structure as ``inputs``.
127 label : `str`
128 Label for this task in the pipeline (should be used in all
129 diagnostic messages).
130 data_id : `lsst.daf.butler.DataCoordinate`
131 Data ID for this quantum in the pipeline (should be used in all
132 diagnostic messages).
134 Returns
135 -------
136 adjusted_inputs : `dict`
137 Dict of the same form as ``inputs`` with updated containers of
138 input `DatasetRef` objects. Connections that are not changed
139 should not be returned at all. Datasets may only be removed, not
140 added. Nested collections may be of any multi-pass iterable type,
141 and the order of iteration will set the order of iteration within
142 `PipelineTask.runQuantum`.
143 adjusted_outputs : `dict`
144 Dict of updated output datasets, with the same structure and
145 interpretation as ``adjusted_inputs``.
147 Raises
148 ------
149 ScalarError
150 Raised if any `Input` or `PrerequisiteInput` connection has
151 ``multiple`` set to `False`, but multiple datasets.
152 NoWorkFound
153 Raised to indicate that this quantum should not be run; not enough
154 datasets were found for a regular `Input` connection, and the
155 quantum should be pruned or skipped.
156 FileNotFoundError
157 Raised to cause QuantumGraph generation to fail (with the message
158 included in this exception); not enough datasets were found for a
159 `PrerequisiteInput` connection.
160 """
161 _, refs = inputs["diffIm"]
162 for ref in refs:
163 if ref.dataId["band"] not in self.config.validBands:
164 raise ValueError(
165 f"Requested '{ref.dataId['band']}' not in "
166 "DiaPipelineConfig.validBands. To process bands not in "
167 "the standard Rubin set (ugrizy) you must add the band to "
168 "the validBands list in DiaPipelineConfig and add the "
169 "appropriate columns to the Apdb schema.")
170 return super().adjustQuantum(inputs, outputs, label, dataId)
173class DiaPipelineConfig(pipeBase.PipelineTaskConfig,
174 pipelineConnections=DiaPipelineConnections):
175 """Config for DiaPipelineTask.
176 """
177 coaddName = pexConfig.Field(
178 doc="coadd name: typically one of deep, goodSeeing, or dcr",
179 dtype=str,
180 default="deep",
181 )
182 apdb = pexConfig.ConfigurableField(
183 target=daxApdb.Apdb,
184 ConfigClass=daxApdb.ApdbConfig,
185 doc="Database connection for storing associated DiaSources and "
186 "DiaObjects. Must already be initialized.",
187 )
188 validBands = pexConfig.ListField(
189 dtype=str,
190 default=["u", "g", "r", "i", "z", "y"],
191 doc="List of bands that are valid for AP processing. To process a "
192 "band not on this list, the appropriate band specific columns "
193 "must be added to the Apdb schema in dax_apdb.",
194 )
195 diaCatalogLoader = pexConfig.ConfigurableField(
196 target=LoadDiaCatalogsTask,
197 doc="Task to load DiaObjects and DiaSources from the Apdb.",
198 )
199 associator = pexConfig.ConfigurableField(
200 target=AssociationTask,
201 doc="Task used to associate DiaSources with DiaObjects.",
202 )
203 diaCalculation = pexConfig.ConfigurableField(
204 target=DiaObjectCalculationTask,
205 doc="Task to compute summary statistics for DiaObjects.",
206 )
207 diaForcedSource = pexConfig.ConfigurableField(
208 target=DiaForcedSourceTask,
209 doc="Task used for force photometer DiaObject locations in direct and "
210 "difference images.",
211 )
212 alertPackager = pexConfig.ConfigurableField(
213 target=PackageAlertsTask,
214 doc="Subtask for packaging Ap data into alerts.",
215 )
216 doPackageAlerts = pexConfig.Field(
217 dtype=bool,
218 default=False,
219 doc="Package Dia-data into serialized alerts for distribution and "
220 "write them to disk.",
221 )
222 doWriteAssociatedSources = pexConfig.Field(
223 dtype=bool,
224 default=False,
225 doc="Write out associated and SDMed DiaSources.",
226 )
228 def setDefaults(self):
229 self.apdb.dia_object_index = "baseline"
230 self.apdb.dia_object_columns = []
231 self.apdb.extra_schema_file = os.path.join(
232 "${AP_ASSOCIATION_DIR}",
233 "data",
234 "apdb-ap-pipe-schema-extra.yaml")
235 self.diaCalculation.plugins = ["ap_meanPosition",
236 "ap_HTMIndex",
237 "ap_nDiaSources",
238 "ap_diaObjectFlag",
239 "ap_meanFlux",
240 "ap_percentileFlux",
241 "ap_sigmaFlux",
242 "ap_chi2Flux",
243 "ap_madFlux",
244 "ap_skewFlux",
245 "ap_minMaxFlux",
246 "ap_maxSlopeFlux",
247 "ap_meanErrFlux",
248 "ap_linearFit",
249 "ap_stetsonJ",
250 "ap_meanTotFlux",
251 "ap_sigmaTotFlux"]
253 def validate(self):
254 pexConfig.Config.validate(self)
255 if self.diaCatalogLoader.htmLevel != \
256 self.diaCalculation.plugins["ap_HTMIndex"].htmLevel:
257 raise ValueError("HTM index level in LoadDiaCatalogsTask must be "
258 "equal to HTMIndexDiaCalculationPlugin index "
259 "level.")
260 if "ap_HTMIndex" not in self.diaCalculation.plugins:
261 raise ValueError("DiaPipe requires the ap_HTMIndex plugin "
262 "be enabled for proper insertion into the Apdb.")
265class DiaPipelineTask(pipeBase.PipelineTask):
266 """Task for loading, associating and storing Difference Image Analysis
267 (DIA) Objects and Sources.
268 """
269 ConfigClass = DiaPipelineConfig
270 _DefaultName = "diaPipe"
271 RunnerClass = pipeBase.ButlerInitializedTaskRunner
273 def __init__(self, initInputs=None, **kwargs):
274 super().__init__(**kwargs)
275 self.apdb = self.config.apdb.apply(
276 afw_schemas=dict(DiaObject=make_dia_object_schema(),
277 DiaSource=make_dia_source_schema()))
278 self.makeSubtask("diaCatalogLoader")
279 self.makeSubtask("associator")
280 self.makeSubtask("diaCalculation")
281 self.makeSubtask("diaForcedSource")
282 if self.config.doPackageAlerts:
283 self.makeSubtask("alertPackager")
285 def runQuantum(self, butlerQC, inputRefs, outputRefs):
286 inputs = butlerQC.get(inputRefs)
287 expId, expBits = butlerQC.quantum.dataId.pack("visit_detector",
288 returnMaxBits=True)
289 inputs["ccdExposureIdBits"] = expBits
290 inputs["band"] = butlerQC.quantum.dataId["band"]
292 outputs = self.run(**inputs)
294 butlerQC.put(outputs, outputRefs)
296 @pipeBase.timeMethod
297 def run(self,
298 diaSourceTable,
299 diffIm,
300 exposure,
301 warpedExposure,
302 ccdExposureIdBits,
303 band):
304 """Process DiaSources and DiaObjects.
306 Load previous DiaObjects and their DiaSource history. Calibrate the
307 values in the diaSourceCat. Associate new DiaSources with previous
308 DiaObjects. Run forced photometry at the updated DiaObject locations.
309 Store the results in the Alert Production Database (Apdb).
311 Parameters
312 ----------
313 diaSourceTable : `pandas.DataFrame`
314 Newly detected DiaSources.
315 diffIm : `lsst.afw.image.ExposureF`
316 Difference image exposure in which the sources in ``diaSourceCat``
317 were detected.
318 exposure : `lsst.afw.image.ExposureF`
319 Calibrated exposure differenced with a template to create
320 ``diffIm``.
321 warpedExposure : `lsst.afw.image.ExposureF`
322 Template exposure used to create diffIm.
323 ccdExposureIdBits : `int`
324 Number of bits used for a unique ``ccdVisitId``.
325 band : `str`
326 The band in which the new DiaSources were detected.
328 Returns
329 -------
330 results : `lsst.pipe.base.Struct`
331 Results struct with components.
333 - ``apdb_maker`` : Marker dataset to store in the Butler indicating
334 that this ccdVisit has completed successfully.
335 (`lsst.dax.apdb.ApdbConfig`)
336 """
337 self.log.info("Running DiaPipeline...")
338 # Put the SciencePipelines through a SDMification step and return
339 # calibrated columns with the expect output database names.
341 # Load the DiaObjects and DiaSource history.
342 loaderResult = self.diaCatalogLoader.run(diffIm, self.apdb)
344 # Associate new DiaSources with existing DiaObjects and update
345 # DiaObject summary statistics using the full DiaSource history.
346 assocResults = self.associator.run(diaSourceTable,
347 loaderResult.diaObjects,
348 loaderResult.diaSources)
350 mergedDiaSourceHistory = loaderResult.diaSources.append(
351 assocResults.diaSources,
352 sort=True)
353 # Test for DiaSource duplication first. If duplicates are found,
354 # this likely means this is duplicate data being processed and sent
355 # to the Apdb.
356 if self.testDataFrameIndex(mergedDiaSourceHistory):
357 raise RuntimeError(
358 "Duplicate DiaSources found after association and merging "
359 "with history. This is likely due to re-running data with an "
360 "already populated Apdb. If this was not the case then there "
361 "was an unexpected failure in Association while matching "
362 "sources to objects, and should be reported. Exiting.")
364 diaCalResult = self.diaCalculation.run(
365 assocResults.diaObjects,
366 mergedDiaSourceHistory,
367 assocResults.matchedDiaObjectIds,
368 [band])
369 if self.testDataFrameIndex(diaCalResult.diaObjectCat):
370 raise RuntimeError(
371 "Duplicate DiaObjects (loaded + updated) created after "
372 "DiaCalculation. This is unexpected behavior and should be "
373 "reported. Existing.")
374 if self.testDataFrameIndex(diaCalResult.updatedDiaObjects):
375 raise RuntimeError(
376 "Duplicate DiaObjects (updated) created after "
377 "DiaCalculation. This is unexpected behavior and should be "
378 "reported. Existing.")
380 # Force photometer on the Difference and Calibrated exposures using
381 # the new and updated DiaObject locations.
382 diaForcedSources = self.diaForcedSource.run(
383 diaCalResult.diaObjectCat,
384 diaCalResult.updatedDiaObjects.loc[:, "diaObjectId"].to_numpy(),
385 ccdExposureIdBits,
386 exposure,
387 diffIm)
389 # Store DiaSources and updated DiaObjects in the Apdb.
390 self.apdb.storeDiaSources(assocResults.diaSources)
391 self.apdb.storeDiaObjects(
392 diaCalResult.updatedDiaObjects,
393 exposure.getInfo().getVisitInfo().getDate().toPython())
394 self.apdb.storeDiaForcedSources(diaForcedSources)
396 if self.config.doPackageAlerts:
397 if len(loaderResult.diaForcedSources) > 1:
398 diaForcedSources = diaForcedSources.append(
399 loaderResult.diaForcedSources,
400 sort=True)
401 if self.testDataFrameIndex(diaForcedSources):
402 self.log.warn(
403 "Duplicate DiaForcedSources created after merge with "
404 "history and new sources. This may cause downstream "
405 "problems. Dropping duplicates.")
406 # Drop duplicates via index and keep the first appearance.
407 # Reset due to the index shape being slight different than
408 # expected.
409 diaForcedSources = diaForcedSources.groupby(
410 diaForcedSources.index).first()
411 diaForcedSources.reset_index(drop=True, inplace=True)
412 diaForcedSources.set_index(
413 ["diaObjectId", "diaForcedSourceId"],
414 drop=False,
415 inplace=True)
416 self.alertPackager.run(assocResults.diaSources,
417 diaCalResult.diaObjectCat,
418 loaderResult.diaSources,
419 diaForcedSources,
420 diffIm,
421 warpedExposure,
422 ccdExposureIdBits)
424 return pipeBase.Struct(apdbMarker=self.config.apdb.value,
425 associatedDiaSources=assocResults.diaSources)
427 def testDataFrameIndex(self, df):
428 """Test the sorted DataFrame index for duplicates.
430 Wrapped as a separate function to allow for mocking of the this task
431 in unittesting. Default of a mock return for this test is True.
433 Parameters
434 ----------
435 df : `pandas.DataFrame`
436 DataFrame to text.
438 Returns
439 -------
440 `bool`
441 True if DataFrame contains duplicate rows.
442 """
443 return df.index.has_duplicates