1 from __future__
import absolute_import, division, print_function
3 from argparse
import ArgumentError
5 from builtins
import zip
7 from lsst.pex.config
import Config, Field, ConfigurableField
8 from lsst.pipe.base
import ArgumentParser, TaskRunner
9 from lsst.pipe.tasks.multiBand
import (MergeDetectionsTask,
10 MeasureMergedCoaddSourcesTask, MergeMeasurementsTask,)
11 from lsst.ctrl.pool.parallel
import BatchPoolTask
12 from lsst.ctrl.pool.pool
import Pool, abortOnError
13 from lsst.meas.base.references
import MultiBandReferencesTask
14 from lsst.meas.base.forcedPhotCoadd
import ForcedPhotCoaddTask
16 from lsst.pipe.tasks.coaddBase
import CoaddDataIdContainer
18 import lsst.afw.table
as afwTable
24 """!Make self.refList from self.idList
26 It's difficult to make a data reference that merely points to an entire
27 tract: there is no data product solely at the tract level. Instead, we
28 generate a list of data references for patches within the tract.
30 @param namespace namespace object that is the result of an argument parser
32 datasetType = namespace.config.coaddName +
"Coadd_calexp"
34 def getPatchRefList(tract):
35 return [namespace.butler.dataRef(datasetType=datasetType,
37 filter=dataId[
"filter"],
38 patch=
"%d,%d" % patch.getIndex())
42 for dataId
in self.idList:
45 if "filter" not in dataId:
46 raise ArgumentError(
None,
"--id must include 'filter'")
48 skymap = self.getSkymap(namespace, datasetType)
51 tractId = dataId[
"tract"]
52 if tractId
not in tractRefs:
53 tractRefs[tractId] = []
55 tractRefs[tractId].append(namespace.butler.dataRef(datasetType=datasetType,
59 patch=dataId[
'patch']))
61 tractRefs[tractId] += getPatchRefList(skymap[tractId])
63 tractRefs = dict((tract.getId(), tractRefs.get(tract.getId(), []) + getPatchRefList(tract))
70 coaddName = Field(dtype=str, default=
"deep", doc=
"Name of coadd")
71 mergeCoaddDetections = ConfigurableField(
72 target=MergeDetectionsTask, doc=
"Merge detections")
73 measureCoaddSources = ConfigurableField(target=MeasureMergedCoaddSourcesTask,
74 doc=
"Measure merged detections")
75 mergeCoaddMeasurements = ConfigurableField(
76 target=MergeMeasurementsTask, doc=
"Merge measurements")
77 forcedPhotCoadd = ConfigurableField(target=ForcedPhotCoaddTask,
78 doc=
"Forced measurement on coadded images")
79 clobberDetections = Field(
80 dtype=bool, default=
False, doc=
"Clobber existing detections?")
81 clobberMergedDetections = Field(
82 dtype=bool, default=
False, doc=
"Clobber existing merged detections?")
83 clobberMeasurements = Field(
84 dtype=bool, default=
False, doc=
"Clobber existing measurements?")
85 clobberMergedMeasurements = Field(
86 dtype=bool, default=
False, doc=
"Clobber existing merged measurements?")
87 clobberForcedPhotometry = Field(
88 dtype=bool, default=
False, doc=
"Clobber existing forced photometry?")
90 dtype=bool, default=
False,
91 doc=(
"Are we reprocessing?\n\n"
92 "This exists as a workaround for large deblender footprints causing large memory use "
93 "and/or very slow processing. We refuse to deblend those footprints when running on a cluster "
94 "and return to reprocess on a machine with larger memory or more time "
95 "if we consider those footprints important to recover."),
99 Config.setDefaults(self)
100 self.forcedPhotCoadd.references.retarget(MultiBandReferencesTask)
103 for subtask
in (
"mergeCoaddDetections",
"measureCoaddSources",
104 "mergeCoaddMeasurements",
"forcedPhotCoadd"):
105 coaddName = getattr(self, subtask).coaddName
107 raise RuntimeError(
"%s.coaddName (%s) doesn't match root coaddName (%s)" %
112 """TaskRunner for running MultiBandTask
114 This is similar to the lsst.pipe.base.ButlerInitializedTaskRunner,
115 except that we have a list of data references instead of a single
116 data reference being passed to the Task.run.
120 """A variant of the base version that passes a butler argument to the task's constructor
121 parsedCmd or args must be specified.
123 if parsedCmd
is not None:
124 butler = parsedCmd.butler
125 elif args
is not None:
126 dataRefList, kwargs = args
127 butler = dataRefList[0].butlerSubset.butler
129 raise RuntimeError(
"parsedCmd or args must be specified")
130 return self.TaskClass(config=self.config, log=self.log, butler=butler)
134 """Unpickle something by calling a factory"""
135 return factory(*args, **kwargs)
139 """Multi-node driver for multiband processing"""
140 ConfigClass = MultiBandDriverConfig
141 _DefaultName =
"multiBandDriver"
142 RunnerClass = MultiBandDriverTaskRunner
144 def __init__(self, butler=None, schema=None, refObjLoader=None, **kwargs):
146 @param[in] butler: the butler can be used to retrieve schema or passed to the refObjLoader constructor
147 in case it is needed.
148 @param[in] schema: the schema of the source detection catalog used as input.
149 @param[in] refObjLoader: an instance of LoadReferenceObjectsTasks that supplies an external reference
150 catalog. May be None if the butler argument is provided or all steps requiring a reference
151 catalog are disabled.
153 BatchPoolTask.__init__(self, **kwargs)
155 assert butler
is not None,
"Butler not provided"
156 schema = butler.get(self.config.coaddName +
157 "Coadd_det_schema", immediate=
True).schema
159 self.makeSubtask(
"mergeCoaddDetections", schema=schema)
160 self.makeSubtask(
"measureCoaddSources", schema=afwTable.Schema(self.mergeCoaddDetections.schema),
161 peakSchema=afwTable.Schema(
162 self.mergeCoaddDetections.merged.getPeakSchema()),
163 refObjLoader=refObjLoader, butler=butler)
164 self.makeSubtask(
"mergeCoaddMeasurements", schema=afwTable.Schema(
165 self.measureCoaddSources.schema))
166 self.makeSubtask(
"forcedPhotCoadd", refSchema=afwTable.Schema(
167 self.mergeCoaddMeasurements.schema))
171 return unpickle, (self.__class__, [], dict(config=self.config, name=self._name,
172 parentTask=self._parentTask, log=self.log,
176 def _makeArgumentParser(cls, *args, **kwargs):
177 kwargs.pop(
"doBatch",
False)
178 parser = ArgumentParser(name=cls._DefaultName, *args, **kwargs)
179 parser.add_id_argument(
"--id",
"deepCoadd", help=
"data ID, e.g. --id tract=12345 patch=1,2",
180 ContainerClass=TractDataIdContainer)
185 """!Return walltime request for batch job
187 @param time: Requested time per iteration
188 @param parsedCmd: Results of argument parsing
189 @param numCores: Number of cores
192 for refList
in parsedCmd.id.refList:
193 numTargets += len(refList)
194 return time*numTargets/float(numCpus)
197 def run(self, patchRefList):
198 """!Run multiband processing on coadds
200 Only the master node runs this method.
202 No real MPI communication (scatter/gather) takes place: all I/O goes
203 through the disk. We want the intermediate stages on disk, and the
204 component Tasks are implemented around this, so we just follow suit.
206 @param patchRefList: Data references to run measurement
208 for patchRef
in patchRefList:
210 butler = patchRef.getButler()
213 raise RuntimeError(
"No valid patches")
216 pool.storeSet(butler=butler)
218 patchRefList = [patchRef
for patchRef
in patchRefList
if
219 patchRef.datasetExists(self.config.coaddName +
"Coadd_calexp")
and
220 patchRef.datasetExists(self.config.coaddName +
"Coadd_det")]
221 dataIdList = [patchRef.dataId
for patchRef
in patchRefList]
226 for patchRef
in patchRefList:
227 dataId = patchRef.dataId
229 tract = dataId[
"tract"]
231 assert tract == dataId[
"tract"]
233 patch = dataId[
"patch"]
234 if patch
not in patches:
236 patches[patch].append(dataId)
265 if self.config.reprocessing:
266 patchReprocessing = {}
267 for dataId, reprocess
in zip(dataIdList, reprocessed):
268 patchId = dataId[
"patch"]
269 patchReprocessing[patchId] = patchReprocessing.get(
270 patchId,
False)
or reprocess
272 reprocessDataset = self.config.coaddName +
"Coadd_multibandReprocessing"
273 for patchId
in patchReprocessing:
274 if not patchReprocessing[patchId]:
276 dataId = dict(tract=tract, patch=patchId)
277 if patchReprocessing[patchId]:
278 filename = butler.get(
279 reprocessDataset +
"_filename", dataId)[0]
280 open(filename,
'a').close()
281 elif butler.datasetExists(reprocessDataset, dataId):
284 patchReprocessing[patchId] =
True
288 not self.config.reprocessing
or patchReprocessing[patchId]])
289 pool.map(self.
runForcedPhot, [dataId1
for dataId1
in dataIdList
if not self.config.reprocessing
or
290 patchReprocessing[dataId[
"patch"]]])
293 if self.config.reprocessing:
294 for patchId
in patchReprocessing:
295 if not patchReprocessing[patchId]:
297 dataId = dict(tract=tract, patch=patchId)
298 filename = butler.get(
299 reprocessDataset +
"_filename", dataId)[0]
303 """!Run detection merging on a patch
305 Only slave nodes execute this method.
307 @param cache: Pool cache, containing butler
308 @param dataIdList: List of data identifiers for the patch in different filters
310 with self.logOperation(
"merge detections from %s" % (dataIdList,)):
311 dataRefList = [
getDataRef(cache.butler, dataId, self.config.coaddName +
"Coadd_calexp")
for
312 dataId
in dataIdList]
313 if (
not self.config.clobberMergedDetections
and
314 dataRefList[0].datasetExists(self.config.coaddName +
"Coadd_mergeDet")):
316 self.mergeCoaddDetections.run(dataRefList)
319 """!Run measurement on a patch for a single filter
321 Only slave nodes execute this method.
323 @param cache: Pool cache, with butler
324 @param dataId: Data identifier for patch
325 @return whether the patch requires reprocessing.
327 with self.logOperation(
"measurement on %s" % (dataId,)):
329 self.config.coaddName +
"Coadd_calexp")
331 if (
not self.config.clobberMeasurements
and
332 dataRef.datasetExists(self.config.coaddName +
"Coadd_meas")):
333 if not self.config.reprocessing:
336 catalog = dataRef.get(self.config.coaddName +
"Coadd_meas")
337 bigFlag = catalog[
"deblend.parent-too-big"]
338 numOldBig = bigFlag.sum()
340 self.log.info(
"No large footprints in %s" %
343 numNewBig = sum((self.measureCoaddSources.deblend.isLargeFootprint(src.getFootprint())
for
344 src
in catalog[bigFlag]))
345 if numNewBig == numOldBig:
346 self.log.info(
"All %d formerly large footprints continue to be large in %s" %
347 (numOldBig, dataRef.dataId,))
349 self.log.info(
"Found %d large footprints to be reprocessed in %s" %
350 (numOldBig - numNewBig, dataRef.dataId))
353 self.measureCoaddSources.run(dataRef)
357 """!Run measurement merging on a patch
359 Only slave nodes execute this method.
361 @param cache: Pool cache, containing butler
362 @param dataIdList: List of data identifiers for the patch in different filters
364 with self.logOperation(
"merge measurements from %s" % (dataIdList,)):
365 dataRefList = [
getDataRef(cache.butler, dataId, self.config.coaddName +
"Coadd_calexp")
for
366 dataId
in dataIdList]
367 if (
not self.config.clobberMergedMeasurements
and
368 not self.config.reprocessing
and
369 dataRefList[0].datasetExists(self.config.coaddName +
"Coadd_ref")):
371 self.mergeCoaddMeasurements.run(dataRefList)
374 """!Run forced photometry on a patch for a single filter
376 Only slave nodes execute this method.
378 @param cache: Pool cache, with butler
379 @param dataId: Data identifier for patch
381 with self.logOperation(
"forced photometry on %s" % (dataId,)):
383 self.config.coaddName +
"Coadd_calexp")
384 if (
not self.config.clobberForcedPhotometry
and
385 not self.config.reprocessing
and
386 dataRef.datasetExists(self.config.coaddName +
"Coadd_forced_src")):
388 self.forcedPhotCoadd.run(dataRef)
391 """We don't collect any metadata, so skip"""
def run
Run multiband processing on coadds.
def runMergeMeasurements
Run measurement merging on a patch.
def makeDataRefList
Make self.refList from self.idList.
def runMergeDetections
Run detection merging on a patch.
def runMeasureMerged
Run measurement on a patch for a single filter.
def runForcedPhot
Run forced photometry on a patch for a single filter.
def batchWallTime
Return walltime request for batch job.