1 from __future__
import absolute_import, division, print_function
3 from argparse
import ArgumentError
5 from builtins
import zip
7 from lsst.pex.config
import Config, Field, ConfigurableField
8 from lsst.pipe.base
import ArgumentParser, TaskRunner
9 from lsst.pipe.tasks.multiBand
import (DetectCoaddSourcesTask,
11 MeasureMergedCoaddSourcesTask,
12 MergeMeasurementsTask,)
13 from lsst.ctrl.pool.parallel
import BatchPoolTask
14 from lsst.ctrl.pool.pool
import Pool, abortOnError, NODE
15 from lsst.meas.base.references
import MultiBandReferencesTask
16 from lsst.meas.base.forcedPhotCoadd
import ForcedPhotCoaddTask
18 from lsst.pipe.tasks.coaddBase
import CoaddDataIdContainer
20 import lsst.afw.table
as afwTable
26 """!Make self.refList from self.idList 28 It's difficult to make a data reference that merely points to an entire 29 tract: there is no data product solely at the tract level. Instead, we 30 generate a list of data references for patches within the tract. 32 @param namespace namespace object that is the result of an argument parser 34 datasetType = namespace.config.coaddName +
"Coadd_calexp" 36 def getPatchRefList(tract):
37 return [namespace.butler.dataRef(datasetType=datasetType,
39 filter=dataId[
"filter"],
40 patch=
"%d,%d" % patch.getIndex())
44 for dataId
in self.idList:
47 if "filter" not in dataId:
48 raise ArgumentError(
None,
"--id must include 'filter'")
50 skymap = self.getSkymap(namespace, datasetType)
53 tractId = dataId[
"tract"]
54 if tractId
not in tractRefs:
55 tractRefs[tractId] = []
57 tractRefs[tractId].append(namespace.butler.dataRef(datasetType=datasetType,
61 patch=dataId[
'patch']))
63 tractRefs[tractId] += getPatchRefList(skymap[tractId])
65 tractRefs = dict((tract.getId(), tractRefs.get(tract.getId(), []) + getPatchRefList(tract))
72 coaddName = Field(dtype=str, default=
"deep", doc=
"Name of coadd")
73 detectCoaddSources = ConfigurableField(target=DetectCoaddSourcesTask,
74 doc=
"Detect sources on coadd")
75 mergeCoaddDetections = ConfigurableField(
76 target=MergeDetectionsTask, doc=
"Merge detections")
77 measureCoaddSources = ConfigurableField(target=MeasureMergedCoaddSourcesTask,
78 doc=
"Measure merged detections")
79 mergeCoaddMeasurements = ConfigurableField(
80 target=MergeMeasurementsTask, doc=
"Merge measurements")
81 forcedPhotCoadd = ConfigurableField(target=ForcedPhotCoaddTask,
82 doc=
"Forced measurement on coadded images")
83 clobberDetections = Field(
84 dtype=bool, default=
False, doc=
"Clobber existing detections?")
85 clobberMergedDetections = Field(
86 dtype=bool, default=
False, doc=
"Clobber existing merged detections?")
87 clobberMeasurements = Field(
88 dtype=bool, default=
False, doc=
"Clobber existing measurements?")
89 clobberMergedMeasurements = Field(
90 dtype=bool, default=
False, doc=
"Clobber existing merged measurements?")
91 clobberForcedPhotometry = Field(
92 dtype=bool, default=
False, doc=
"Clobber existing forced photometry?")
94 dtype=bool, default=
False,
95 doc=(
"Are we reprocessing?\n\n" 96 "This exists as a workaround for large deblender footprints causing large memory use " 97 "and/or very slow processing. We refuse to deblend those footprints when running on a cluster " 98 "and return to reprocess on a machine with larger memory or more time " 99 "if we consider those footprints important to recover."),
103 Config.setDefaults(self)
107 for subtask
in (
"mergeCoaddDetections",
"measureCoaddSources",
108 "mergeCoaddMeasurements",
"forcedPhotCoadd"):
109 coaddName = getattr(self, subtask).coaddName
111 raise RuntimeError(
"%s.coaddName (%s) doesn't match root coaddName (%s)" %
116 """TaskRunner for running MultiBandTask 118 This is similar to the lsst.pipe.base.ButlerInitializedTaskRunner, 119 except that we have a list of data references instead of a single 120 data reference being passed to the Task.run. 124 """A variant of the base version that passes a butler argument to the task's constructor 125 parsedCmd or args must be specified. 127 if parsedCmd
is not None:
128 butler = parsedCmd.butler
129 elif args
is not None:
130 dataRefList, kwargs = args
131 butler = dataRefList[0].butlerSubset.butler
133 raise RuntimeError(
"parsedCmd or args must be specified")
134 return self.TaskClass(config=self.config, log=self.log, butler=butler)
138 """Unpickle something by calling a factory""" 139 return factory(*args, **kwargs)
143 """Multi-node driver for multiband processing""" 144 ConfigClass = MultiBandDriverConfig
145 _DefaultName =
"multiBandDriver" 146 RunnerClass = MultiBandDriverTaskRunner
148 def __init__(self, butler=None, schema=None, refObjLoader=None, **kwargs):
150 @param[in] butler: the butler can be used to retrieve schema or passed to the refObjLoader constructor 151 in case it is needed. 152 @param[in] schema: the schema of the source detection catalog used as input. 153 @param[in] refObjLoader: an instance of LoadReferenceObjectsTasks that supplies an external reference 154 catalog. May be None if the butler argument is provided or all steps requiring a reference 155 catalog are disabled. 157 BatchPoolTask.__init__(self, **kwargs)
159 assert butler
is not None,
"Butler not provided" 160 schema = butler.get(self.config.coaddName +
161 "Coadd_det_schema", immediate=
True).schema
163 self.makeSubtask(
"detectCoaddSources")
164 self.makeSubtask(
"mergeCoaddDetections", schema=schema)
165 self.makeSubtask(
"measureCoaddSources", schema=afwTable.Schema(self.mergeCoaddDetections.schema),
166 peakSchema=afwTable.Schema(
167 self.mergeCoaddDetections.merged.getPeakSchema()),
168 refObjLoader=refObjLoader, butler=butler)
169 self.makeSubtask(
"mergeCoaddMeasurements", schema=afwTable.Schema(
170 self.measureCoaddSources.schema))
171 self.makeSubtask(
"forcedPhotCoadd", refSchema=afwTable.Schema(
172 self.mergeCoaddMeasurements.schema))
176 return unpickle, (self.__class__, [], dict(config=self.config, name=self._name,
177 parentTask=self._parentTask, log=self.log,
181 def _makeArgumentParser(cls, *args, **kwargs):
182 kwargs.pop(
"doBatch",
False)
183 parser = ArgumentParser(name=cls.
_DefaultName, *args, **kwargs)
184 parser.add_id_argument(
"--id",
"deepCoadd", help=
"data ID, e.g. --id tract=12345 patch=1,2",
185 ContainerClass=TractDataIdContainer)
190 """!Return walltime request for batch job 192 @param time: Requested time per iteration 193 @param parsedCmd: Results of argument parsing 194 @param numCores: Number of cores 197 for refList
in parsedCmd.id.refList:
198 numTargets += len(refList)
199 return time*numTargets/float(numCpus)
202 def run(self, patchRefList):
203 """!Run multiband processing on coadds 205 Only the master node runs this method. 207 No real MPI communication (scatter/gather) takes place: all I/O goes 208 through the disk. We want the intermediate stages on disk, and the 209 component Tasks are implemented around this, so we just follow suit. 211 @param patchRefList: Data references to run measurement 213 for patchRef
in patchRefList:
215 butler = patchRef.getButler()
218 raise RuntimeError(
"No valid patches")
221 pool.storeSet(butler=butler)
230 detectionList = [patchRef
for patchRef
in patchRefList
if not 231 patchRef.datasetExists(self.config.coaddName +
233 patchRef.datasetExists(self.config.coaddName +
238 patchRefList = [patchRef
for patchRef
in patchRefList
if 239 patchRef.datasetExists(self.config.coaddName +
"Coadd_calexp")
and 240 patchRef.datasetExists(self.config.coaddName +
"Coadd_det")]
241 dataIdList = [patchRef.dataId
for patchRef
in patchRefList]
246 for patchRef
in patchRefList:
247 dataId = patchRef.dataId
249 tract = dataId[
"tract"]
251 assert tract == dataId[
"tract"]
253 patch = dataId[
"patch"]
254 if patch
not in patches:
256 patches[patch].append(dataId)
285 if self.config.reprocessing:
286 patchReprocessing = {}
287 for dataId, reprocess
in zip(dataIdList, reprocessed):
288 patchId = dataId[
"patch"]
289 patchReprocessing[patchId] = patchReprocessing.get(
290 patchId,
False)
or reprocess
292 reprocessDataset = self.config.coaddName +
"Coadd_multibandReprocessing" 293 for patchId
in patchReprocessing:
294 if not patchReprocessing[patchId]:
296 dataId = dict(tract=tract, patch=patchId)
297 if patchReprocessing[patchId]:
298 filename = butler.get(
299 reprocessDataset +
"_filename", dataId)[0]
300 open(filename,
'a').close()
301 elif butler.datasetExists(reprocessDataset, dataId):
304 patchReprocessing[patchId] =
True 308 not self.config.reprocessing
or patchReprocessing[patchId]])
309 pool.map(self.
runForcedPhot, [dataId1
for dataId1
in dataIdList
if not self.config.reprocessing
or 310 patchReprocessing[dataId[
"patch"]]])
313 if self.config.reprocessing:
314 for patchId
in patchReprocessing:
315 if not patchReprocessing[patchId]:
317 dataId = dict(tract=tract, patch=patchId)
318 filename = butler.get(
319 reprocessDataset +
"_filename", dataId)[0]
323 """! Run detection on a patch 325 Only slave nodes execute this method. 327 @param cache: Pool cache, containing butler 328 @param patchRef: Patch on which to do detection 330 with self.logOperation(
"do detections on {}".format(patchRef.dataId)):
331 idFactory = self.detectCoaddSources.makeIdFactory(patchRef)
332 coadd = patchRef.get(self.config.coaddName +
"Coadd",
334 detResults = self.detectCoaddSources.
runDetection(coadd, idFactory)
335 self.detectCoaddSources.write(coadd, detResults, patchRef)
338 """!Run detection merging on a patch 340 Only slave nodes execute this method. 342 @param cache: Pool cache, containing butler 343 @param dataIdList: List of data identifiers for the patch in different filters 345 with self.logOperation(
"merge detections from %s" % (dataIdList,)):
346 dataRefList = [
getDataRef(cache.butler, dataId, self.config.coaddName +
"Coadd_calexp")
for 347 dataId
in dataIdList]
348 if (
not self.config.clobberMergedDetections
and 349 dataRefList[0].datasetExists(self.config.coaddName +
"Coadd_mergeDet")):
351 self.mergeCoaddDetections.
run(dataRefList)
354 """!Run measurement on a patch for a single filter 356 Only slave nodes execute this method. 358 @param cache: Pool cache, with butler 359 @param dataId: Data identifier for patch 360 @return whether the patch requires reprocessing. 362 with self.logOperation(
"measurement on %s" % (dataId,)):
364 self.config.coaddName +
"Coadd_calexp")
366 if (
not self.config.clobberMeasurements
and 367 dataRef.datasetExists(self.config.coaddName +
"Coadd_meas")):
368 if not self.config.reprocessing:
371 catalog = dataRef.get(self.config.coaddName +
"Coadd_meas")
372 bigFlag = catalog[
"deblend.parent-too-big"]
373 numOldBig = bigFlag.sum()
375 self.log.info(
"No large footprints in %s" %
378 numNewBig = sum((self.measureCoaddSources.deblend.isLargeFootprint(src.getFootprint())
for 379 src
in catalog[bigFlag]))
380 if numNewBig == numOldBig:
381 self.log.info(
"All %d formerly large footprints continue to be large in %s" %
382 (numOldBig, dataRef.dataId,))
384 self.log.info(
"Found %d large footprints to be reprocessed in %s" %
385 (numOldBig - numNewBig, dataRef.dataId))
388 self.measureCoaddSources.
run(dataRef)
392 """!Run measurement merging on a patch 394 Only slave nodes execute this method. 396 @param cache: Pool cache, containing butler 397 @param dataIdList: List of data identifiers for the patch in different filters 399 with self.logOperation(
"merge measurements from %s" % (dataIdList,)):
400 dataRefList = [
getDataRef(cache.butler, dataId, self.config.coaddName +
"Coadd_calexp")
for 401 dataId
in dataIdList]
402 if (
not self.config.clobberMergedMeasurements
and 403 not self.config.reprocessing
and 404 dataRefList[0].datasetExists(self.config.coaddName +
"Coadd_ref")):
406 self.mergeCoaddMeasurements.
run(dataRefList)
409 """!Run forced photometry on a patch for a single filter 411 Only slave nodes execute this method. 413 @param cache: Pool cache, with butler 414 @param dataId: Data identifier for patch 416 with self.logOperation(
"forced photometry on %s" % (dataId,)):
418 self.config.coaddName +
"Coadd_calexp")
419 if (
not self.config.clobberForcedPhotometry
and 420 not self.config.reprocessing
and 421 dataRef.datasetExists(self.config.coaddName +
"Coadd_forced_src")):
423 self.forcedPhotCoadd.
run(dataRef)
426 """We don't collect any metadata, so skip"""
def unpickle(factory, args, kwargs)
def __init__(self, butler=None, schema=None, refObjLoader=None, kwargs)
def writeMetadata(self, dataRef)
def getDataRef(butler, dataId, datasetType="raw")
def runForcedPhot(self, cache, dataId)
Run forced photometry on a patch for a single filter.
def batchWallTime(cls, time, parsedCmd, numCpus)
Return walltime request for batch job.
def runMeasureMerged(self, cache, dataId)
Run measurement on a patch for a single filter.
def makeDataRefList(self, namespace)
Make self.refList from self.idList.
def runDetection(self, cache, patchRef)
Run detection on a patch.
def runMergeDetections(self, cache, dataIdList)
Run detection merging on a patch.
def makeTask(self, parsedCmd=None, args=None)
def run(self, patchRefList)
Run multiband processing on coadds.
def runMergeMeasurements(self, cache, dataIdList)
Run measurement merging on a patch.