1 from __future__
import absolute_import, division, print_function
3 from argparse
import ArgumentError
5 from builtins
import zip
11 DeblendCoaddSourcesTask,
12 MeasureMergedCoaddSourcesTask,
13 MergeMeasurementsTask,)
27 """!Make self.refList from self.idList 29 It's difficult to make a data reference that merely points to an entire 30 tract: there is no data product solely at the tract level. Instead, we 31 generate a list of data references for patches within the tract. 33 @param namespace namespace object that is the result of an argument parser 35 datasetType = namespace.config.coaddName +
"Coadd_calexp" 37 def getPatchRefList(tract):
38 return [namespace.butler.dataRef(datasetType=datasetType,
40 filter=dataId[
"filter"],
41 patch=
"%d,%d" % patch.getIndex())
45 for dataId
in self.idList:
48 if "filter" not in dataId:
49 raise ArgumentError(
None,
"--id must include 'filter'")
51 skymap = self.getSkymap(namespace, datasetType)
54 tractId = dataId[
"tract"]
55 if tractId
not in tractRefs:
56 tractRefs[tractId] = []
58 tractRefs[tractId].append(namespace.butler.dataRef(datasetType=datasetType,
62 patch=dataId[
'patch']))
64 tractRefs[tractId] += getPatchRefList(skymap[tractId])
66 tractRefs = dict((tract.getId(), tractRefs.get(tract.getId(), []) + getPatchRefList(tract))
73 coaddName = Field(dtype=str, default=
"deep", doc=
"Name of coadd")
74 doDetection = Field(dtype=bool, default=
False,
75 doc=
"Re-run detection? (requires *Coadd dataset to have been written)")
76 detectCoaddSources = ConfigurableField(target=DetectCoaddSourcesTask,
77 doc=
"Detect sources on coadd")
78 mergeCoaddDetections = ConfigurableField(
79 target=MergeDetectionsTask, doc=
"Merge detections")
80 deblendCoaddSources = ConfigurableField(target=DeblendCoaddSourcesTask, doc=
"Deblend merged detections")
81 measureCoaddSources = ConfigurableField(target=MeasureMergedCoaddSourcesTask,
82 doc=
"Measure merged and (optionally) deblended detections")
83 mergeCoaddMeasurements = ConfigurableField(
84 target=MergeMeasurementsTask, doc=
"Merge measurements")
85 forcedPhotCoadd = ConfigurableField(target=ForcedPhotCoaddTask,
86 doc=
"Forced measurement on coadded images")
88 dtype=bool, default=
False,
89 doc=(
"Are we reprocessing?\n\n" 90 "This exists as a workaround for large deblender footprints causing large memory use " 91 "and/or very slow processing. We refuse to deblend those footprints when running on a cluster " 92 "and return to reprocess on a machine with larger memory or more time " 93 "if we consider those footprints important to recover."),
97 Config.setDefaults(self)
101 for subtask
in (
"mergeCoaddDetections",
"deblendCoaddSources",
"measureCoaddSources",
102 "mergeCoaddMeasurements",
"forcedPhotCoadd"):
103 coaddName = getattr(self, subtask).coaddName
105 raise RuntimeError(
"%s.coaddName (%s) doesn't match root coaddName (%s)" %
110 """TaskRunner for running MultiBandTask 112 This is similar to the lsst.pipe.base.ButlerInitializedTaskRunner, 113 except that we have a list of data references instead of a single 114 data reference being passed to the Task.run, and we pass the results 115 of the '--reuse-outputs-from' command option to the Task constructor. 118 def __init__(self, TaskClass, parsedCmd, doReturnResults=False):
119 TaskRunner.__init__(self, TaskClass, parsedCmd, doReturnResults)
123 """A variant of the base version that passes a butler argument to the task's constructor 124 parsedCmd or args must be specified. 126 if parsedCmd
is not None:
127 butler = parsedCmd.butler
128 elif args
is not None:
129 dataRefList, kwargs = args
130 butler = dataRefList[0].butlerSubset.butler
132 raise RuntimeError(
"parsedCmd or args must be specified")
133 return self.TaskClass(config=self.config, log=self.log, butler=butler, reuse=self.
reuse)
137 """Unpickle something by calling a factory""" 138 return factory(*args, **kwargs)
142 """Multi-node driver for multiband processing""" 143 ConfigClass = MultiBandDriverConfig
144 _DefaultName =
"multiBandDriver" 145 RunnerClass = MultiBandDriverTaskRunner
147 def __init__(self, butler=None, schema=None, refObjLoader=None, reuse=tuple(), **kwargs):
149 @param[in] butler: the butler can be used to retrieve schema or passed to the refObjLoader constructor 150 in case it is needed. 151 @param[in] schema: the schema of the source detection catalog used as input. 152 @param[in] refObjLoader: an instance of LoadReferenceObjectsTasks that supplies an external reference 153 catalog. May be None if the butler argument is provided or all steps requiring a reference 154 catalog are disabled. 156 BatchPoolTask.__init__(self, **kwargs)
158 assert butler
is not None,
"Butler not provided" 159 schema = butler.get(self.config.coaddName +
160 "Coadd_det_schema", immediate=
True).schema
163 self.makeSubtask(
"detectCoaddSources")
164 self.makeSubtask(
"mergeCoaddDetections", schema=schema)
165 if self.config.measureCoaddSources.inputCatalog.startswith(
"deblended"):
169 if self.config.deblendCoaddSources.simultaneous:
170 if self.config.deblendCoaddSources.multiBandDeblend.conserveFlux:
172 if self.config.deblendCoaddSources.multiBandDeblend.saveTemplates:
177 err =
"Measurement input '{0}' is not in the list of deblender output catalogs '{1}'" 180 self.makeSubtask(
"deblendCoaddSources",
181 schema=afwTable.Schema(self.mergeCoaddDetections.schema),
182 peakSchema=afwTable.Schema(self.mergeCoaddDetections.merged.getPeakSchema()),
184 measureInputSchema = afwTable.Schema(self.deblendCoaddSources.schema)
186 measureInputSchema = afwTable.Schema(self.mergeCoaddDetections.schema)
187 self.makeSubtask(
"measureCoaddSources", schema=measureInputSchema,
188 peakSchema=afwTable.Schema(
189 self.mergeCoaddDetections.merged.getPeakSchema()),
190 refObjLoader=refObjLoader, butler=butler)
191 self.makeSubtask(
"mergeCoaddMeasurements", schema=afwTable.Schema(
192 self.measureCoaddSources.schema))
193 self.makeSubtask(
"forcedPhotCoadd", refSchema=afwTable.Schema(
194 self.mergeCoaddMeasurements.schema))
198 return unpickle, (self.__class__, [], dict(config=self.config, name=self._name,
199 parentTask=self._parentTask, log=self.log,
203 def _makeArgumentParser(cls, *args, **kwargs):
204 kwargs.pop(
"doBatch",
False)
205 parser = ArgumentParser(name=cls.
_DefaultName, *args, **kwargs)
206 parser.add_id_argument(
"--id",
"deepCoadd", help=
"data ID, e.g. --id tract=12345 patch=1,2",
207 ContainerClass=TractDataIdContainer)
208 parser.addReuseOption([
"detectCoaddSources",
"mergeCoaddDetections",
"measureCoaddSources",
209 "mergeCoaddMeasurements",
"forcedPhotCoadd"])
214 """!Return walltime request for batch job 216 @param time: Requested time per iteration 217 @param parsedCmd: Results of argument parsing 218 @param numCores: Number of cores 221 for refList
in parsedCmd.id.refList:
222 numTargets += len(refList)
223 return time*numTargets/float(numCpus)
227 """!Run multiband processing on coadds 229 Only the master node runs this method. 231 No real MPI communication (scatter/gather) takes place: all I/O goes 232 through the disk. We want the intermediate stages on disk, and the 233 component Tasks are implemented around this, so we just follow suit. 235 @param patchRefList: Data references to run measurement 237 for patchRef
in patchRefList:
239 butler = patchRef.getButler()
242 raise RuntimeError(
"No valid patches")
245 pool.storeSet(butler=butler)
260 if self.config.doDetection:
262 for patchRef
in patchRefList:
263 if (
"detectCoaddSources" in self.
reuse and 264 patchRef.datasetExists(self.config.coaddName +
"Coadd_calexp", write=
True)):
265 self.log.info(
"Skipping detectCoaddSources for %s; output already exists." % patchRef.dataId)
267 if not patchRef.datasetExists(self.config.coaddName +
"Coadd"):
268 self.log.debug(
"Not processing %s; required input %sCoadd missing." %
269 (patchRef.dataId, self.config.coaddName))
271 detectionList.append(patchRef)
275 patchRefList = [patchRef
for patchRef
in patchRefList
if 276 patchRef.datasetExists(self.config.coaddName +
"Coadd_calexp")
and 277 patchRef.datasetExists(self.config.coaddName +
"Coadd_det", write=self.config.doDetection)]
278 dataIdList = [patchRef.dataId
for patchRef
in patchRefList]
283 for patchRef
in patchRefList:
284 dataId = patchRef.dataId
286 tract = dataId[
"tract"]
288 assert tract == dataId[
"tract"]
290 patch = dataId[
"patch"]
291 if patch
not in patches:
293 patches[patch].append(dataId)
322 if self.config.reprocessing:
323 patchReprocessing = {}
324 for dataId, reprocess
in zip(dataIdList, reprocessed):
325 patchId = dataId[
"patch"]
326 patchReprocessing[patchId] = patchReprocessing.get(
327 patchId,
False)
or reprocess
329 reprocessDataset = self.config.coaddName +
"Coadd_multibandReprocessing" 330 for patchId
in patchReprocessing:
331 if not patchReprocessing[patchId]:
333 dataId = dict(tract=tract, patch=patchId)
334 if patchReprocessing[patchId]:
335 filename = butler.get(
336 reprocessDataset +
"_filename", dataId)[0]
337 open(filename,
'a').close()
338 elif butler.datasetExists(reprocessDataset, dataId):
341 patchReprocessing[patchId] =
True 344 pool.map(self.
runMeasurements, [dataId1
for dataId1
in dataIdList
if not self.config.reprocessing
or 345 patchReprocessing[dataId1[
"patch"]]])
347 not self.config.reprocessing
or patchReprocessing[patchId]])
348 pool.map(self.
runForcedPhot, [dataId1
for dataId1
in dataIdList
if not self.config.reprocessing
or 349 patchReprocessing[dataId1[
"patch"]]])
352 if self.config.reprocessing:
353 for patchId
in patchReprocessing:
354 if not patchReprocessing[patchId]:
356 dataId = dict(tract=tract, patch=patchId)
357 filename = butler.get(
358 reprocessDataset +
"_filename", dataId)[0]
362 """! Run detection on a patch 364 Only slave nodes execute this method. 366 @param cache: Pool cache, containing butler 367 @param patchRef: Patch on which to do detection 369 with self.
logOperation(
"do detections on {}".format(patchRef.dataId)):
370 idFactory = self.detectCoaddSources.makeIdFactory(patchRef)
371 coadd = patchRef.get(self.config.coaddName +
"Coadd",
373 expId = int(patchRef.get(self.config.coaddName +
"CoaddId"))
374 self.detectCoaddSources.emptyMetadata()
375 detResults = self.detectCoaddSources.run(coadd, idFactory, expId=expId)
376 self.detectCoaddSources.write(coadd, detResults, patchRef)
380 """!Run detection merging on a patch 382 Only slave nodes execute this method. 384 @param cache: Pool cache, containing butler 385 @param dataIdList: List of data identifiers for the patch in different filters 387 with self.
logOperation(
"merge detections from %s" % (dataIdList,)):
388 dataRefList = [
getDataRef(cache.butler, dataId, self.config.coaddName +
"Coadd_calexp")
for 389 dataId
in dataIdList]
390 if (
"mergeCoaddDetections" in self.
reuse and 391 dataRefList[0].datasetExists(self.config.coaddName +
"Coadd_mergeDet", write=
True)):
392 self.log.info(
"Skipping mergeCoaddDetections for %s; output already exists." %
393 dataRefList[0].dataId)
395 self.mergeCoaddDetections.
runDataRef(dataRefList)
398 """Run the deblender on a list of dataId's 400 Only slave nodes execute this method. 405 Pool cache with butler. 407 Data identifier for patch in each band. 412 whether the patch requires reprocessing. 414 with self.
logOperation(
"deblending %s" % (dataIdList,)):
415 dataRefList = [
getDataRef(cache.butler, dataId, self.config.coaddName +
"Coadd_calexp")
for 416 dataId
in dataIdList]
418 if (
"deblendCoaddSources" in self.
reuse and 419 dataRef.datasetExists(self.config.coaddName + self.
measurementInput, write=
True)):
420 if not self.config.reprocessing:
421 self.log.info(
"Skipping deblendCoaddSources for %s; output already exists" % dataIdList)
425 bigFlag = catalog[
"deblend.parent-too-big"]
426 numOldBig = bigFlag.sum()
428 self.log.info(
"No large footprints in %s" %
431 numNewBig = sum((self.deblendCoaddSources.isLargeFootprint(src.getFootprint())
for 432 src
in catalog[bigFlag]))
433 if numNewBig == numOldBig:
434 self.log.info(
"All %d formerly large footprints continue to be large in %s" %
435 (numOldBig, dataRefList[0].dataId,))
437 self.log.info(
"Found %d large footprints to be reprocessed in %s" %
438 (numOldBig - numNewBig, [dataRef.dataId
for dataRef
in dataRefList]))
441 self.deblendCoaddSources.
runDataRef(dataRefList)
445 """Run measurement on a patch for a single filter 447 Only slave nodes execute this method. 452 Pool cache, with butler 454 Data identifier for patch 456 with self.
logOperation(
"measurements on %s" % (dataId,)):
458 self.config.coaddName +
"Coadd_calexp")
459 if (
"measureCoaddSources" in self.
reuse and 460 not self.config.reprocessing
and 461 dataRef.datasetExists(self.config.coaddName +
"Coadd_meas", write=
True)):
462 self.log.info(
"Skipping measuretCoaddSources for %s; output already exists" % dataId)
467 """!Run measurement merging on a patch 469 Only slave nodes execute this method. 471 @param cache: Pool cache, containing butler 472 @param dataIdList: List of data identifiers for the patch in different filters 474 with self.
logOperation(
"merge measurements from %s" % (dataIdList,)):
475 dataRefList = [
getDataRef(cache.butler, dataId, self.config.coaddName +
"Coadd_calexp")
for 476 dataId
in dataIdList]
477 if (
"mergeCoaddMeasurements" in self.
reuse and 478 not self.config.reprocessing
and 479 dataRefList[0].datasetExists(self.config.coaddName +
"Coadd_ref", write=
True)):
480 self.log.info(
"Skipping mergeCoaddMeasurements for %s; output already exists" %
481 dataRefList[0].dataId)
483 self.mergeCoaddMeasurements.
runDataRef(dataRefList)
486 """!Run forced photometry on a patch for a single filter 488 Only slave nodes execute this method. 490 @param cache: Pool cache, with butler 491 @param dataId: Data identifier for patch 493 with self.
logOperation(
"forced photometry on %s" % (dataId,)):
495 self.config.coaddName +
"Coadd_calexp")
496 if (
"forcedPhotCoadd" in self.
reuse and 497 not self.config.reprocessing
and 498 dataRef.datasetExists(self.config.coaddName +
"Coadd_forced_src", write=
True)):
499 self.log.info(
"Skipping forcedPhotCoadd for %s; output already exists" % dataId)
504 """We don't collect any metadata, so skip"""
def unpickle(factory, args, kwargs)
def runDataRef(self, patchRefList)
Run multiband processing on coadds.
def __init__(self, butler=None, schema=None, refObjLoader=None, reuse=tuple(), kwargs)
def writeMetadata(self, dataRef)
def runDeblendMerged(self, cache, dataIdList)
def getDataRef(butler, dataId, datasetType="raw")
def runMeasurements(self, cache, dataId)
def runForcedPhot(self, cache, dataId)
Run forced photometry on a patch for a single filter.
def batchWallTime(cls, time, parsedCmd, numCpus)
Return walltime request for batch job.
def __init__(self, TaskClass, parsedCmd, doReturnResults=False)
def logOperation(self, operation, catch=False, trace=True)
def makeDataRefList(self, namespace)
Make self.refList from self.idList.
def runDetection(self, cache, patchRef)
Run detection on a patch.
def runMergeDetections(self, cache, dataIdList)
Run detection merging on a patch.
def makeTask(self, parsedCmd=None, args=None)
def runMergeMeasurements(self, cache, dataIdList)
Run measurement merging on a patch.