1 from __future__
import absolute_import, division, print_function
3 from argparse
import ArgumentError
5 from builtins
import zip
11 MeasureMergedCoaddSourcesTask,
12 MergeMeasurementsTask,)
26 """!Make self.refList from self.idList 28 It's difficult to make a data reference that merely points to an entire 29 tract: there is no data product solely at the tract level. Instead, we 30 generate a list of data references for patches within the tract. 32 @param namespace namespace object that is the result of an argument parser 34 datasetType = namespace.config.coaddName +
"Coadd_calexp" 36 def getPatchRefList(tract):
37 return [namespace.butler.dataRef(datasetType=datasetType,
39 filter=dataId[
"filter"],
40 patch=
"%d,%d" % patch.getIndex())
44 for dataId
in self.idList:
47 if "filter" not in dataId:
48 raise ArgumentError(
None,
"--id must include 'filter'")
50 skymap = self.getSkymap(namespace, datasetType)
53 tractId = dataId[
"tract"]
54 if tractId
not in tractRefs:
55 tractRefs[tractId] = []
57 tractRefs[tractId].append(namespace.butler.dataRef(datasetType=datasetType,
61 patch=dataId[
'patch']))
63 tractRefs[tractId] += getPatchRefList(skymap[tractId])
65 tractRefs = dict((tract.getId(), tractRefs.get(tract.getId(), []) + getPatchRefList(tract))
72 coaddName = Field(dtype=str, default=
"deep", doc=
"Name of coadd")
73 doDetection = Field(dtype=bool, default=
False,
74 doc=
"Re-run detection? (requires *Coadd dataset to have been written)")
75 detectCoaddSources = ConfigurableField(target=DetectCoaddSourcesTask,
76 doc=
"Detect sources on coadd")
77 mergeCoaddDetections = ConfigurableField(
78 target=MergeDetectionsTask, doc=
"Merge detections")
79 measureCoaddSources = ConfigurableField(target=MeasureMergedCoaddSourcesTask,
80 doc=
"Measure merged detections")
81 mergeCoaddMeasurements = ConfigurableField(
82 target=MergeMeasurementsTask, doc=
"Merge measurements")
83 forcedPhotCoadd = ConfigurableField(target=ForcedPhotCoaddTask,
84 doc=
"Forced measurement on coadded images")
86 dtype=bool, default=
False,
87 doc=(
"Are we reprocessing?\n\n" 88 "This exists as a workaround for large deblender footprints causing large memory use " 89 "and/or very slow processing. We refuse to deblend those footprints when running on a cluster " 90 "and return to reprocess on a machine with larger memory or more time " 91 "if we consider those footprints important to recover."),
95 Config.setDefaults(self)
99 for subtask
in (
"mergeCoaddDetections",
"measureCoaddSources",
100 "mergeCoaddMeasurements",
"forcedPhotCoadd"):
101 coaddName = getattr(self, subtask).coaddName
103 raise RuntimeError(
"%s.coaddName (%s) doesn't match root coaddName (%s)" %
108 """TaskRunner for running MultiBandTask 110 This is similar to the lsst.pipe.base.ButlerInitializedTaskRunner, 111 except that we have a list of data references instead of a single 112 data reference being passed to the Task.run, and we pass the results 113 of the '--reuse-outputs-from' command option to the Task constructor. 116 def __init__(self, TaskClass, parsedCmd, doReturnResults=False):
117 TaskRunner.__init__(self, TaskClass, parsedCmd, doReturnResults)
121 """A variant of the base version that passes a butler argument to the task's constructor 122 parsedCmd or args must be specified. 124 if parsedCmd
is not None:
125 butler = parsedCmd.butler
126 elif args
is not None:
127 dataRefList, kwargs = args
128 butler = dataRefList[0].butlerSubset.butler
130 raise RuntimeError(
"parsedCmd or args must be specified")
131 return self.TaskClass(config=self.config, log=self.log, butler=butler, reuse=self.
reuse)
135 """Unpickle something by calling a factory""" 136 return factory(*args, **kwargs)
140 """Multi-node driver for multiband processing""" 141 ConfigClass = MultiBandDriverConfig
142 _DefaultName =
"multiBandDriver" 143 RunnerClass = MultiBandDriverTaskRunner
145 def __init__(self, butler=None, schema=None, refObjLoader=None, reuse=tuple(), **kwargs):
147 @param[in] butler: the butler can be used to retrieve schema or passed to the refObjLoader constructor 148 in case it is needed. 149 @param[in] schema: the schema of the source detection catalog used as input. 150 @param[in] refObjLoader: an instance of LoadReferenceObjectsTasks that supplies an external reference 151 catalog. May be None if the butler argument is provided or all steps requiring a reference 152 catalog are disabled. 154 BatchPoolTask.__init__(self, **kwargs)
156 assert butler
is not None,
"Butler not provided" 157 schema = butler.get(self.config.coaddName +
158 "Coadd_det_schema", immediate=
True).schema
161 self.makeSubtask(
"detectCoaddSources")
162 self.makeSubtask(
"mergeCoaddDetections", schema=schema)
163 self.makeSubtask(
"measureCoaddSources", schema=afwTable.Schema(self.mergeCoaddDetections.schema),
164 peakSchema=afwTable.Schema(
165 self.mergeCoaddDetections.merged.getPeakSchema()),
166 refObjLoader=refObjLoader, butler=butler)
167 self.makeSubtask(
"mergeCoaddMeasurements", schema=afwTable.Schema(
168 self.measureCoaddSources.schema))
169 self.makeSubtask(
"forcedPhotCoadd", refSchema=afwTable.Schema(
170 self.mergeCoaddMeasurements.schema))
174 return unpickle, (self.__class__, [], dict(config=self.config, name=self._name,
175 parentTask=self._parentTask, log=self.log,
179 def _makeArgumentParser(cls, *args, **kwargs):
180 kwargs.pop(
"doBatch",
False)
181 parser = ArgumentParser(name=cls.
_DefaultName, *args, **kwargs)
182 parser.add_id_argument(
"--id",
"deepCoadd", help=
"data ID, e.g. --id tract=12345 patch=1,2",
183 ContainerClass=TractDataIdContainer)
184 parser.addReuseOption([
"detectCoaddSources",
"mergeCoaddDetections",
"measureCoaddSources",
185 "mergeCoaddMeasurements",
"forcedPhotCoadd"])
190 """!Return walltime request for batch job 192 @param time: Requested time per iteration 193 @param parsedCmd: Results of argument parsing 194 @param numCores: Number of cores 197 for refList
in parsedCmd.id.refList:
198 numTargets += len(refList)
199 return time*numTargets/float(numCpus)
203 """!Run multiband processing on coadds 205 Only the master node runs this method. 207 No real MPI communication (scatter/gather) takes place: all I/O goes 208 through the disk. We want the intermediate stages on disk, and the 209 component Tasks are implemented around this, so we just follow suit. 211 @param patchRefList: Data references to run measurement 213 for patchRef
in patchRefList:
215 butler = patchRef.getButler()
218 raise RuntimeError(
"No valid patches")
221 pool.storeSet(butler=butler)
236 if self.config.doDetection:
238 for patchRef
in patchRefList:
239 if (
"detectCoaddSources" in self.
reuse and 240 patchRef.datasetExists(self.config.coaddName +
"Coadd_calexp", write=
True)):
241 self.log.info(
"Skipping detectCoaddSources for %s; output already exists." % patchRef.dataId)
243 if not patchRef.datasetExists(self.config.coaddName +
"Coadd"):
244 self.log.debug(
"Not processing %s; required input %sCoadd missing." %
245 (patchRef.dataId, self.config.coaddName))
247 detectionList.append(patchRef)
251 patchRefList = [patchRef
for patchRef
in patchRefList
if 252 patchRef.datasetExists(self.config.coaddName +
"Coadd_calexp")
and 253 patchRef.datasetExists(self.config.coaddName +
"Coadd_det", write=self.config.doDetection)]
254 dataIdList = [patchRef.dataId
for patchRef
in patchRefList]
259 for patchRef
in patchRefList:
260 dataId = patchRef.dataId
262 tract = dataId[
"tract"]
264 assert tract == dataId[
"tract"]
266 patch = dataId[
"patch"]
267 if patch
not in patches:
269 patches[patch].append(dataId)
298 if self.config.reprocessing:
299 patchReprocessing = {}
300 for dataId, reprocess
in zip(dataIdList, reprocessed):
301 patchId = dataId[
"patch"]
302 patchReprocessing[patchId] = patchReprocessing.get(
303 patchId,
False)
or reprocess
305 reprocessDataset = self.config.coaddName +
"Coadd_multibandReprocessing" 306 for patchId
in patchReprocessing:
307 if not patchReprocessing[patchId]:
309 dataId = dict(tract=tract, patch=patchId)
310 if patchReprocessing[patchId]:
311 filename = butler.get(
312 reprocessDataset +
"_filename", dataId)[0]
313 open(filename,
'a').close()
314 elif butler.datasetExists(reprocessDataset, dataId):
317 patchReprocessing[patchId] =
True 321 not self.config.reprocessing
or patchReprocessing[patchId]])
322 pool.map(self.
runForcedPhot, [dataId1
for dataId1
in dataIdList
if not self.config.reprocessing
or 323 patchReprocessing[dataId[
"patch"]]])
326 if self.config.reprocessing:
327 for patchId
in patchReprocessing:
328 if not patchReprocessing[patchId]:
330 dataId = dict(tract=tract, patch=patchId)
331 filename = butler.get(
332 reprocessDataset +
"_filename", dataId)[0]
336 """! Run detection on a patch 338 Only slave nodes execute this method. 340 @param cache: Pool cache, containing butler 341 @param patchRef: Patch on which to do detection 343 with self.
logOperation(
"do detections on {}".format(patchRef.dataId)):
344 idFactory = self.detectCoaddSources.makeIdFactory(patchRef)
345 coadd = patchRef.get(self.config.coaddName +
"Coadd",
347 expId = int(patchRef.get(self.config.coaddName +
"CoaddId"))
348 self.detectCoaddSources.emptyMetadata()
349 detResults = self.detectCoaddSources.run(coadd, idFactory, expId=expId)
350 self.detectCoaddSources.write(coadd, detResults, patchRef)
354 """!Run detection merging on a patch 356 Only slave nodes execute this method. 358 @param cache: Pool cache, containing butler 359 @param dataIdList: List of data identifiers for the patch in different filters 361 with self.
logOperation(
"merge detections from %s" % (dataIdList,)):
362 dataRefList = [
getDataRef(cache.butler, dataId, self.config.coaddName +
"Coadd_calexp")
for 363 dataId
in dataIdList]
364 if (
"mergeCoaddDetections" in self.
reuse and 365 dataRefList[0].datasetExists(self.config.coaddName +
"Coadd_mergeDet", write=
True)):
366 self.log.info(
"Skipping mergeCoaddDetections for %s; output already exists." %
367 dataRefList[0].dataId)
369 self.mergeCoaddDetections.
runDataRef(dataRefList)
372 """!Run measurement on a patch for a single filter 374 Only slave nodes execute this method. 376 @param cache: Pool cache, with butler 377 @param dataId: Data identifier for patch 378 @return whether the patch requires reprocessing. 380 with self.
logOperation(
"measurement on %s" % (dataId,)):
382 self.config.coaddName +
"Coadd_calexp")
384 if (
"measureCoaddSources" in self.
reuse and 385 dataRef.datasetExists(self.config.coaddName +
"Coadd_meas", write=
True)):
386 if not self.config.reprocessing:
387 self.log.info(
"Skipping measureCoaddSources for %s; output already exists" % dataId)
390 catalog = dataRef.get(self.config.coaddName +
"Coadd_meas")
391 bigFlag = catalog[
"deblend.parent-too-big"]
392 numOldBig = bigFlag.sum()
394 self.log.info(
"No large footprints in %s" %
397 numNewBig = sum((self.measureCoaddSources.deblend.isLargeFootprint(src.getFootprint())
for 398 src
in catalog[bigFlag]))
399 if numNewBig == numOldBig:
400 self.log.info(
"All %d formerly large footprints continue to be large in %s" %
401 (numOldBig, dataRef.dataId,))
403 self.log.info(
"Found %d large footprints to be reprocessed in %s" %
404 (numOldBig - numNewBig, dataRef.dataId))
411 """!Run measurement merging on a patch 413 Only slave nodes execute this method. 415 @param cache: Pool cache, containing butler 416 @param dataIdList: List of data identifiers for the patch in different filters 418 with self.
logOperation(
"merge measurements from %s" % (dataIdList,)):
419 dataRefList = [
getDataRef(cache.butler, dataId, self.config.coaddName +
"Coadd_calexp")
for 420 dataId
in dataIdList]
421 if (
"mergeCoaddMeasurements" in self.
reuse and 422 not self.config.reprocessing
and 423 dataRefList[0].datasetExists(self.config.coaddName +
"Coadd_ref", write=
True)):
424 self.log.info(
"Skipping mergeCoaddMeasurements for %s; output already exists" %
425 dataRefList[0].dataId)
427 self.mergeCoaddMeasurements.
runDataRef(dataRefList)
430 """!Run forced photometry on a patch for a single filter 432 Only slave nodes execute this method. 434 @param cache: Pool cache, with butler 435 @param dataId: Data identifier for patch 437 with self.
logOperation(
"forced photometry on %s" % (dataId,)):
439 self.config.coaddName +
"Coadd_calexp")
440 if (
"forcedPhotCoadd" in self.
reuse and 441 not self.config.reprocessing
and 442 dataRef.datasetExists(self.config.coaddName +
"Coadd_forced_src", write=
True)):
443 self.log.info(
"Skipping forcedPhotCoadd for %s; output already exists" % dataId)
448 """We don't collect any metadata, so skip"""
def unpickle(factory, args, kwargs)
def runDataRef(self, patchRefList)
Run multiband processing on coadds.
def __init__(self, butler=None, schema=None, refObjLoader=None, reuse=tuple(), kwargs)
def writeMetadata(self, dataRef)
def getDataRef(butler, dataId, datasetType="raw")
def runForcedPhot(self, cache, dataId)
Run forced photometry on a patch for a single filter.
def batchWallTime(cls, time, parsedCmd, numCpus)
Return walltime request for batch job.
def runMeasureMerged(self, cache, dataId)
Run measurement on a patch for a single filter.
def __init__(self, TaskClass, parsedCmd, doReturnResults=False)
def logOperation(self, operation, catch=False, trace=True)
def makeDataRefList(self, namespace)
Make self.refList from self.idList.
def runDetection(self, cache, patchRef)
Run detection on a patch.
def runMergeDetections(self, cache, dataIdList)
Run detection merging on a patch.
def makeTask(self, parsedCmd=None, args=None)
def runMergeMeasurements(self, cache, dataIdList)
Run measurement merging on a patch.