1 from __future__
import absolute_import, division, print_function
4 from builtins
import zip
10 DeblendCoaddSourcesTask,
11 MeasureMergedCoaddSourcesTask,
12 MergeMeasurementsTask,)
23 coaddName = Field(dtype=str, default=
"deep", doc=
"Name of coadd")
24 doDetection = Field(dtype=bool, default=
False,
25 doc=
"Re-run detection? (requires *Coadd dataset to have been written)")
26 detectCoaddSources = ConfigurableField(target=DetectCoaddSourcesTask,
27 doc=
"Detect sources on coadd")
28 mergeCoaddDetections = ConfigurableField(
29 target=MergeDetectionsTask, doc=
"Merge detections")
30 deblendCoaddSources = ConfigurableField(target=DeblendCoaddSourcesTask, doc=
"Deblend merged detections")
31 measureCoaddSources = ConfigurableField(target=MeasureMergedCoaddSourcesTask,
32 doc=
"Measure merged and (optionally) deblended detections")
33 mergeCoaddMeasurements = ConfigurableField(
34 target=MergeMeasurementsTask, doc=
"Merge measurements")
35 forcedPhotCoadd = ConfigurableField(target=ForcedPhotCoaddTask,
36 doc=
"Forced measurement on coadded images")
38 dtype=bool, default=
False,
39 doc=(
"Are we reprocessing?\n\n" 40 "This exists as a workaround for large deblender footprints causing large memory use " 41 "and/or very slow processing. We refuse to deblend those footprints when running on a cluster " 42 "and return to reprocess on a machine with larger memory or more time " 43 "if we consider those footprints important to recover."),
47 Config.setDefaults(self)
51 for subtask
in (
"mergeCoaddDetections",
"deblendCoaddSources",
"measureCoaddSources",
52 "mergeCoaddMeasurements",
"forcedPhotCoadd"):
53 coaddName = getattr(self, subtask).coaddName
55 raise RuntimeError(
"%s.coaddName (%s) doesn't match root coaddName (%s)" %
60 """TaskRunner for running MultiBandTask 62 This is similar to the lsst.pipe.base.ButlerInitializedTaskRunner, 63 except that we have a list of data references instead of a single 64 data reference being passed to the Task.run, and we pass the results 65 of the '--reuse-outputs-from' command option to the Task constructor. 68 def __init__(self, TaskClass, parsedCmd, doReturnResults=False):
69 TaskRunner.__init__(self, TaskClass, parsedCmd, doReturnResults)
73 """A variant of the base version that passes a butler argument to the task's constructor 74 parsedCmd or args must be specified. 76 if parsedCmd
is not None:
77 butler = parsedCmd.butler
78 elif args
is not None:
79 dataRefList, kwargs = args
80 butler = dataRefList[0].butlerSubset.butler
82 raise RuntimeError(
"parsedCmd or args must be specified")
83 return self.TaskClass(config=self.config, log=self.log, butler=butler, reuse=self.
reuse)
87 """Unpickle something by calling a factory""" 88 return factory(*args, **kwargs)
92 """Multi-node driver for multiband processing""" 93 ConfigClass = MultiBandDriverConfig
94 _DefaultName =
"multiBandDriver" 95 RunnerClass = MultiBandDriverTaskRunner
97 def __init__(self, butler=None, schema=None, refObjLoader=None, reuse=tuple(), **kwargs):
99 @param[in] butler: the butler can be used to retrieve schema or passed to the refObjLoader constructor 100 in case it is needed. 101 @param[in] schema: the schema of the source detection catalog used as input. 102 @param[in] refObjLoader: an instance of LoadReferenceObjectsTasks that supplies an external reference 103 catalog. May be None if the butler argument is provided or all steps requiring a reference 104 catalog are disabled. 106 BatchPoolTask.__init__(self, **kwargs)
108 assert butler
is not None,
"Butler not provided" 109 schema = butler.get(self.config.coaddName +
110 "Coadd_det_schema", immediate=
True).schema
113 self.makeSubtask(
"detectCoaddSources")
114 self.makeSubtask(
"mergeCoaddDetections", schema=schema)
115 if self.config.measureCoaddSources.inputCatalog.startswith(
"deblended"):
119 if self.config.deblendCoaddSources.simultaneous:
124 err =
"Measurement input '{0}' is not in the list of deblender output catalogs '{1}'" 127 self.makeSubtask(
"deblendCoaddSources",
128 schema=afwTable.Schema(self.mergeCoaddDetections.schema),
129 peakSchema=afwTable.Schema(self.mergeCoaddDetections.merged.getPeakSchema()),
131 measureInputSchema = afwTable.Schema(self.deblendCoaddSources.schema)
133 measureInputSchema = afwTable.Schema(self.mergeCoaddDetections.schema)
134 self.makeSubtask(
"measureCoaddSources", schema=measureInputSchema,
135 peakSchema=afwTable.Schema(
136 self.mergeCoaddDetections.merged.getPeakSchema()),
137 refObjLoader=refObjLoader, butler=butler)
138 self.makeSubtask(
"mergeCoaddMeasurements", schema=afwTable.Schema(
139 self.measureCoaddSources.schema))
140 self.makeSubtask(
"forcedPhotCoadd", refSchema=afwTable.Schema(
141 self.mergeCoaddMeasurements.schema))
145 return unpickle, (self.__class__, [], dict(config=self.config, name=self._name,
146 parentTask=self._parentTask, log=self.log,
150 def _makeArgumentParser(cls, *args, **kwargs):
151 kwargs.pop(
"doBatch",
False)
152 parser = ArgumentParser(name=cls.
_DefaultName, *args, **kwargs)
153 parser.add_id_argument(
"--id",
"deepCoadd", help=
"data ID, e.g. --id tract=12345 patch=1,2",
154 ContainerClass=TractDataIdContainer)
155 parser.addReuseOption([
"detectCoaddSources",
"mergeCoaddDetections",
"measureCoaddSources",
156 "mergeCoaddMeasurements",
"forcedPhotCoadd",
"deblendCoaddSources"])
161 """!Return walltime request for batch job 163 @param time: Requested time per iteration 164 @param parsedCmd: Results of argument parsing 165 @param numCores: Number of cores 168 for refList
in parsedCmd.id.refList:
169 numTargets += len(refList)
170 return time*numTargets/float(numCpus)
174 """!Run multiband processing on coadds 176 Only the master node runs this method. 178 No real MPI communication (scatter/gather) takes place: all I/O goes 179 through the disk. We want the intermediate stages on disk, and the 180 component Tasks are implemented around this, so we just follow suit. 182 @param patchRefList: Data references to run measurement 184 for patchRef
in patchRefList:
186 butler = patchRef.getButler()
189 raise RuntimeError(
"No valid patches")
192 pool.storeSet(butler=butler)
207 if self.config.doDetection:
209 for patchRef
in patchRefList:
210 if (
"detectCoaddSources" in self.
reuse and 211 patchRef.datasetExists(self.config.coaddName +
"Coadd_calexp", write=
True)):
212 self.log.info(
"Skipping detectCoaddSources for %s; output already exists." %
215 if not patchRef.datasetExists(self.config.coaddName +
"Coadd"):
216 self.log.debug(
"Not processing %s; required input %sCoadd missing." %
217 (patchRef.dataId, self.config.coaddName))
219 detectionList.append(patchRef)
223 patchRefList = [patchRef
for patchRef
in patchRefList
if 224 patchRef.datasetExists(self.config.coaddName +
"Coadd_calexp")
and 225 patchRef.datasetExists(self.config.coaddName +
"Coadd_det",
226 write=self.config.doDetection)]
227 dataIdList = [patchRef.dataId
for patchRef
in patchRefList]
232 for patchRef
in patchRefList:
233 dataId = patchRef.dataId
235 tract = dataId[
"tract"]
237 assert tract == dataId[
"tract"]
239 patch = dataId[
"patch"]
240 if patch
not in patches:
242 patches[patch].append(dataId)
271 if self.config.reprocessing:
272 patchReprocessing = {}
273 for dataId, reprocess
in zip(dataIdList, reprocessed):
274 patchId = dataId[
"patch"]
275 patchReprocessing[patchId] = patchReprocessing.get(
276 patchId,
False)
or reprocess
278 reprocessDataset = self.config.coaddName +
"Coadd_multibandReprocessing" 279 for patchId
in patchReprocessing:
280 if not patchReprocessing[patchId]:
282 dataId = dict(tract=tract, patch=patchId)
283 if patchReprocessing[patchId]:
284 filename = butler.get(
285 reprocessDataset +
"_filename", dataId)[0]
286 open(filename,
'a').close()
287 elif butler.datasetExists(reprocessDataset, dataId):
290 patchReprocessing[patchId] =
True 293 pool.map(self.
runMeasurements, [dataId1
for dataId1
in dataIdList
if not self.config.reprocessing
or 294 patchReprocessing[dataId1[
"patch"]]])
296 not self.config.reprocessing
or patchReprocessing[patchId]])
297 pool.map(self.
runForcedPhot, [dataId1
for dataId1
in dataIdList
if not self.config.reprocessing
or 298 patchReprocessing[dataId1[
"patch"]]])
301 if self.config.reprocessing:
302 for patchId
in patchReprocessing:
303 if not patchReprocessing[patchId]:
305 dataId = dict(tract=tract, patch=patchId)
306 filename = butler.get(
307 reprocessDataset +
"_filename", dataId)[0]
311 """! Run detection on a patch 313 Only slave nodes execute this method. 315 @param cache: Pool cache, containing butler 316 @param patchRef: Patch on which to do detection 318 with self.
logOperation(
"do detections on {}".format(patchRef.dataId)):
319 idFactory = self.detectCoaddSources.makeIdFactory(patchRef)
320 coadd = patchRef.get(self.config.coaddName +
"Coadd",
322 expId = int(patchRef.get(self.config.coaddName +
"CoaddId"))
323 self.detectCoaddSources.emptyMetadata()
324 detResults = self.detectCoaddSources.run(coadd, idFactory, expId=expId)
325 self.detectCoaddSources.write(detResults, patchRef)
329 """!Run detection merging on a patch 331 Only slave nodes execute this method. 333 @param cache: Pool cache, containing butler 334 @param dataIdList: List of data identifiers for the patch in different filters 336 with self.
logOperation(
"merge detections from %s" % (dataIdList,)):
337 dataRefList = [
getDataRef(cache.butler, dataId, self.config.coaddName +
"Coadd_calexp")
for 338 dataId
in dataIdList]
339 if (
"mergeCoaddDetections" in self.
reuse and 340 dataRefList[0].datasetExists(self.config.coaddName +
"Coadd_mergeDet", write=
True)):
341 self.log.info(
"Skipping mergeCoaddDetections for %s; output already exists." %
342 dataRefList[0].dataId)
344 self.mergeCoaddDetections.
runDataRef(dataRefList)
347 """Run the deblender on a list of dataId's 349 Only slave nodes execute this method. 354 Pool cache with butler. 356 Data identifier for patch in each band. 361 whether the patch requires reprocessing. 363 with self.
logOperation(
"deblending %s" % (dataIdList,)):
364 dataRefList = [
getDataRef(cache.butler, dataId, self.config.coaddName +
"Coadd_calexp")
for 365 dataId
in dataIdList]
367 if (
"deblendCoaddSources" in self.
reuse and 368 all([dataRef.datasetExists(self.config.coaddName +
"Coadd_" + self.
measurementInput,
369 write=
True)
for dataRef
in dataRefList])):
370 if not self.config.reprocessing:
371 self.log.info(
"Skipping deblendCoaddSources for %s; output already exists" % dataIdList)
375 catalog = dataRefList[0].get(self.config.coaddName +
"Coadd_" + self.
measurementInput)
376 bigFlag = catalog[
"deblend_parentTooBig"]
378 numOldBig = bigFlag.sum()
380 self.log.info(
"No large footprints in %s" % (dataRefList[0].dataId))
384 if self.config.deblendCoaddSources.simultaneous:
385 deblender = self.deblendCoaddSources.multiBandDeblend
387 deblender = self.deblendCoaddSources.singleBandDeblend
392 numNewBig = sum((deblender.isLargeFootprint(src.getFootprint())
for 393 src
in catalog[bigFlag]))
394 if numNewBig == numOldBig:
395 self.log.info(
"All %d formerly large footprints continue to be large in %s" %
396 (numOldBig, dataRefList[0].dataId,))
398 self.log.info(
"Found %d large footprints to be reprocessed in %s" %
399 (numOldBig - numNewBig, [dataRef.dataId
for dataRef
in dataRefList]))
402 self.deblendCoaddSources.
runDataRef(dataRefList)
406 """Run measurement on a patch for a single filter 408 Only slave nodes execute this method. 413 Pool cache, with butler 415 Data identifier for patch 417 with self.
logOperation(
"measurements on %s" % (dataId,)):
419 self.config.coaddName +
"Coadd_calexp")
420 if (
"measureCoaddSources" in self.
reuse and 421 not self.config.reprocessing
and 422 dataRef.datasetExists(self.config.coaddName +
"Coadd_meas", write=
True)):
423 self.log.info(
"Skipping measuretCoaddSources for %s; output already exists" % dataId)
428 """!Run measurement merging on a patch 430 Only slave nodes execute this method. 432 @param cache: Pool cache, containing butler 433 @param dataIdList: List of data identifiers for the patch in different filters 435 with self.
logOperation(
"merge measurements from %s" % (dataIdList,)):
436 dataRefList = [
getDataRef(cache.butler, dataId, self.config.coaddName +
"Coadd_calexp")
for 437 dataId
in dataIdList]
438 if (
"mergeCoaddMeasurements" in self.
reuse and 439 not self.config.reprocessing
and 440 dataRefList[0].datasetExists(self.config.coaddName +
"Coadd_ref", write=
True)):
441 self.log.info(
"Skipping mergeCoaddMeasurements for %s; output already exists" %
442 dataRefList[0].dataId)
444 self.mergeCoaddMeasurements.
runDataRef(dataRefList)
447 """!Run forced photometry on a patch for a single filter 449 Only slave nodes execute this method. 451 @param cache: Pool cache, with butler 452 @param dataId: Data identifier for patch 454 with self.
logOperation(
"forced photometry on %s" % (dataId,)):
456 self.config.coaddName +
"Coadd_calexp")
457 if (
"forcedPhotCoadd" in self.
reuse and 458 not self.config.reprocessing
and 459 dataRef.datasetExists(self.config.coaddName +
"Coadd_forced_src", write=
True)):
460 self.log.info(
"Skipping forcedPhotCoadd for %s; output already exists" % dataId)
465 """We don't collect any metadata, so skip"""
def unpickle(factory, args, kwargs)
def runDataRef(self, patchRefList)
Run multiband processing on coadds.
def __init__(self, butler=None, schema=None, refObjLoader=None, reuse=tuple(), kwargs)
def writeMetadata(self, dataRef)
def runDeblendMerged(self, cache, dataIdList)
def getDataRef(butler, dataId, datasetType="raw")
def runMeasurements(self, cache, dataId)
def runForcedPhot(self, cache, dataId)
Run forced photometry on a patch for a single filter.
def batchWallTime(cls, time, parsedCmd, numCpus)
Return walltime request for batch job.
def __init__(self, TaskClass, parsedCmd, doReturnResults=False)
def logOperation(self, operation, catch=False, trace=True)
def runDetection(self, cache, patchRef)
Run detection on a patch.
def runMergeDetections(self, cache, dataIdList)
Run detection merging on a patch.
def makeTask(self, parsedCmd=None, args=None)
def runMergeMeasurements(self, cache, dataIdList)
Run measurement merging on a patch.