Coverage for python/lsst/ap/pipe/ap_pipe.py : 26%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#
2# This file is part of ap_pipe.
3#
4# Developed for the LSST Data Management System.
5# This product includes software developed by the LSST Project
6# (http://www.lsst.org).
7# See the COPYRIGHT file at the top-level directory of this distribution
8# for details of code ownership.
9#
10# This program is free software: you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation, either version 3 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program. If not, see <http://www.gnu.org/licenses/>.
22#
24__all__ = ["ApPipeConfig", "ApPipeTask"]
26import warnings
28from sqlalchemy.exc import OperationalError, ProgrammingError
30import lsst.pex.config as pexConfig
31import lsst.pipe.base as pipeBase
33from lsst.pipe.tasks.processCcd import ProcessCcdTask
34from lsst.pipe.tasks.imageDifference import ImageDifferenceTask
35from lsst.ap.association import DiaPipelineTask
36from lsst.ap.pipe.apPipeParser import ApPipeParser
37from lsst.ap.pipe.apPipeTaskRunner import ApPipeTaskRunner
40class ApPipeConfig(pexConfig.Config):
41 """Settings and defaults for ApPipeTask.
42 """
44 ccdProcessor = pexConfig.ConfigurableField(
45 target=ProcessCcdTask,
46 doc="Task used to perform basic image reduction and characterization.",
47 )
48 differencer = pexConfig.ConfigurableField(
49 target=ImageDifferenceTask,
50 doc="Task used to do image subtraction and DiaSource detection.",
51 )
52 diaPipe = pexConfig.ConfigurableField(
53 target=DiaPipelineTask,
54 doc="Pipeline task for loading/store DiaSources and DiaObjects and "
55 "spatially associating them.",
56 )
58 def setDefaults(self):
59 """Settings appropriate for most or all ap_pipe runs.
60 """
61 # Always prefer decorrelation; may eventually become ImageDifferenceTask default
62 self.differencer.doDecorrelation = True
63 self.differencer.detection.thresholdValue = 5.0 # needed with doDecorrelation
65 # Don't have source catalogs for templates
66 self.differencer.doSelectSources = False
68 def validate(self):
69 pexConfig.Config.validate(self)
70 if not self.differencer.doMeasurement:
71 raise ValueError("Source association needs diaSource fluxes [differencer.doMeasurement].")
72 if not self.differencer.doWriteSources:
73 raise ValueError("Source association needs diaSource catalogs [differencer.doWriteSources].")
74 if not self.differencer.doWriteSubtractedExp:
75 raise ValueError("Source association needs difference exposures "
76 "[differencer.doWriteSubtractedExp].")
79class ApPipeTask(pipeBase.CmdLineTask):
80 """Command-line task representing the entire AP pipeline.
82 ``ApPipeTask`` processes raw DECam images from basic processing through
83 source association. Other observatories will be supported in the future.
85 ``ApPipeTask`` can be run from the command line, but it can also be called
86 from other pipeline code. It provides public methods for executing each
87 major step of the pipeline by itself.
89 Parameters
90 ----------
91 butler : `lsst.daf.persistence.Butler`
92 A Butler providing access to the science, calibration, and (unless
93 ``config.differencer.getTemplate`` is overridden) template data to
94 be processed. Its output repository must be both readable
95 and writable.
96 """
98 ConfigClass = ApPipeConfig
99 RunnerClass = ApPipeTaskRunner
100 _DefaultName = "apPipe"
102 def __init__(self, butler, *args, **kwargs):
103 pipeBase.CmdLineTask.__init__(self, *args, **kwargs)
105 self.makeSubtask("ccdProcessor", butler=butler)
106 self.makeSubtask("differencer", butler=butler)
107 self.makeSubtask("diaPipe", initInputs={"diaSourceSchema": self.differencer.schema})
109 @pipeBase.timeMethod
110 def runDataRef(self, rawRef, templateIds=None, reuse=None):
111 """Execute the ap_pipe pipeline on a single image.
113 Parameters
114 ----------
115 rawRef : `lsst.daf.persistence.ButlerDataRef`
116 A reference to the raw data to process.
117 templateIds : `list` of `dict`, optional
118 A list of parsed data IDs for templates to use. Only used if
119 ``config.differencer`` is configured to do so. ``differencer`` or
120 its subtasks may restrict the allowed IDs.
121 reuse : `list` of `str`, optional
122 The names of all subtasks that may be skipped if their output is
123 present. Defaults to skipping nothing.
125 Returns
126 -------
127 result : `lsst.pipe.base.Struct`
128 Result struct with components:
130 - l1Database : handle for accessing the final association database, conforming to
131 `ap_association`'s DB access API
132 - ccdProcessor : output of `config.ccdProcessor.runDataRef` (`lsst.pipe.base.Struct` or `None`).
133 - differencer : output of `config.differencer.runDataRef` (`lsst.pipe.base.Struct` or `None`).
134 - diaPipe : output of `config.diaPipe.run` (`lsst.pipe.base.Struct` or `None`).
135 """
136 if reuse is None:
137 reuse = []
138 # Work around mismatched HDU lists for raw and processed data
139 calexpId = rawRef.dataId.copy()
140 if 'hdu' in calexpId:
141 del calexpId['hdu']
142 calexpRef = rawRef.getButler().dataRef("calexp", dataId=calexpId)
144 # Ensure that templateIds make it through basic data reduction
145 # TODO: treat as independent jobs (may need SuperTask framework?)
146 if templateIds is not None:
147 for templateId in templateIds:
148 # templateId is typically visit-only; consider only the same raft/CCD/etc. as rawRef
149 rawTemplateRef = _siblingRef(rawRef, "raw", templateId)
150 calexpTemplateRef = _siblingRef(calexpRef, "calexp", templateId)
151 if "ccdProcessor" not in reuse or not calexpTemplateRef.datasetExists("calexp", write=True):
152 self.runProcessCcd(rawTemplateRef)
154 if "ccdProcessor" in reuse and calexpRef.datasetExists("calexp", write=True):
155 self.log.info("ProcessCcd has already been run for {0}, skipping...".format(rawRef.dataId))
156 processResults = None
157 else:
158 processResults = self.runProcessCcd(rawRef)
160 diffType = self.config.differencer.coaddName
161 if "differencer" in reuse and calexpRef.datasetExists(diffType + "Diff_diaSrc", write=True):
162 self.log.info("DiffIm has already been run for {0}, skipping...".format(calexpRef.dataId))
163 diffImResults = None
164 else:
165 diffImResults = self.runDiffIm(calexpRef, templateIds)
167 try:
168 if "diaPipe" in reuse:
169 warnings.warn(
170 "Reusing association results for some images while rerunning "
171 "others may change the associations. If exact reproducibility "
172 "matters, please clear the association database and run "
173 "ap_pipe.py with --reuse-output-from=differencer to redo all "
174 "association results consistently.")
175 if "diaPipe" in reuse and calexpRef.datasetExists("apdb_marker", write=True):
176 message = "DiaPipeline has already been run for {0}, skipping...".format(calexpRef.dataId)
177 self.log.info(message)
178 diaPipeResults = None
179 else:
180 diaPipeResults = self.runAssociation(calexpRef)
181 except (OperationalError, ProgrammingError) as e:
182 # Don't use lsst.pipe.base.TaskError because it mixes poorly with exception chaining
183 raise RuntimeError("Database query failed; did you call make_apdb.py first?") from e
185 return pipeBase.Struct(
186 l1Database=self.diaPipe.apdb,
187 ccdProcessor=processResults if processResults else None,
188 differencer=diffImResults if diffImResults else None,
189 diaPipe=diaPipeResults.taskResults if diaPipeResults else None
190 )
192 @pipeBase.timeMethod
193 def runProcessCcd(self, sensorRef):
194 """Perform ISR with ingested images and calibrations via processCcd.
196 The output repository associated with ``sensorRef`` will be populated with the
197 usual post-ISR data (bkgd, calexp, icExp, icSrc, postISR).
199 Parameters
200 ----------
201 sensorRef : `lsst.daf.persistence.ButlerDataRef`
202 Data reference for raw data.
204 Returns
205 -------
206 result : `lsst.pipe.base.Struct`
207 Output of `config.ccdProcessor.runDataRef`.
209 Notes
210 -----
211 The input repository corresponding to ``sensorRef`` must already contain the refcats.
212 """
213 self.log.info("Running ProcessCcd...")
214 return self.ccdProcessor.runDataRef(sensorRef)
216 @pipeBase.timeMethod
217 def runDiffIm(self, sensorRef, templateIds=None):
218 """Do difference imaging with a template and a science image
220 The output repository associated with ``sensorRef`` will be populated with difference images
221 and catalogs of detected sources (diaSrc, diffexp, and metadata files)
223 Parameters
224 ----------
225 sensorRef : `lsst.daf.persistence.ButlerDataRef`
226 Data reference for multiple dataset types, both input and output.
227 templateIds : `list` of `dict`, optional
228 A list of parsed data IDs for templates to use. Only used if
229 ``config.differencer`` is configured to do so. ``differencer`` or
230 its subtasks may restrict the allowed IDs.
232 Returns
233 -------
234 result : `lsst.pipe.base.Struct`
235 Output of `config.differencer.runDataRef`.
236 """
237 self.log.info("Running ImageDifference...")
238 return self.differencer.runDataRef(sensorRef, templateIdList=templateIds)
240 @pipeBase.timeMethod
241 def runAssociation(self, sensorRef):
242 """Do source association.
244 This method writes an ``apdb_marker`` dataset once all changes related
245 to the current exposure have been committed.
247 Parameters
248 ----------
249 sensorRef : `lsst.daf.persistence.ButlerDataRef`
250 Data reference for multiple input dataset types.
252 Returns
253 -------
254 result : `lsst.pipe.base.Struct`
255 Result struct with components:
257 - apdb : `lsst.dax.apdb.Apdb` Initialized association database containing final association
258 results.
259 - taskResults : output of `config.diaPipe.run` (`lsst.pipe.base.Struct`).
260 """
261 diffType = self.config.differencer.coaddName
263 results = self.diaPipe.run(
264 diaSourceCat=sensorRef.get(diffType + "Diff_diaSrc"),
265 diffIm=sensorRef.get(diffType + "Diff_differenceExp"),
266 exposure=sensorRef.get("calexp"),
267 ccdExposureIdBits=sensorRef.get("ccdExposureId_bits"))
269 # apdb_marker triggers metrics processing; let them try to read
270 # something even if association failed
271 sensorRef.put(results.apdb_marker, "apdb_marker")
273 return pipeBase.Struct(
274 l1Database=self.diaPipe.apdb,
275 taskResults=results
276 )
278 @classmethod
279 def _makeArgumentParser(cls):
280 """A parser that can handle extra arguments for ap_pipe.
281 """
282 return ApPipeParser(name=cls._DefaultName)
285def _siblingRef(original, datasetType, dataId):
286 """Construct a new dataRef using an existing dataRef as a template.
288 The typical application is to construct a data ID that differs from an
289 existing ID in one or two keys, but is more specific than expanding a
290 partial data ID would be.
292 Parameters
293 ----------
294 original : `lsst.daf.persistence.ButlerDataRef`
295 A dataRef related to the desired one. Assumed to represent a unique dataset.
296 datasetType : `str`
297 The desired type of the new dataRef. Must be compatible
298 with ``original``.
299 dataId : `dict` from `str` to any
300 A possibly partial data ID for the new dataRef. Any properties left
301 unspecified shall be copied from ``original``.
303 Returns
304 -------
305 dataRef : `lsst.daf.persistence.ButlerDataRef`
306 A dataRef to the same butler as ``original``, but of type
307 ``datasetType`` and with data ID equivalent to
308 ``original.dataId.update(dataId)``.
309 """
310 butler = original.getButler()
311 return butler.dataRef(datasetType, dataId=original.dataId, **dataId)