1 from __future__
import absolute_import, division, print_function
4 from builtins
import zip
10 DeblendCoaddSourcesTask,
11 MeasureMergedCoaddSourcesTask,
12 MergeMeasurementsTask,)
23 coaddName = Field(dtype=str, default=
"deep", doc=
"Name of coadd")
24 doDetection = Field(dtype=bool, default=
False,
25 doc=
"Re-run detection? (requires *Coadd dataset to have been written)")
26 detectCoaddSources = ConfigurableField(target=DetectCoaddSourcesTask,
27 doc=
"Detect sources on coadd")
28 mergeCoaddDetections = ConfigurableField(
29 target=MergeDetectionsTask, doc=
"Merge detections")
30 deblendCoaddSources = ConfigurableField(target=DeblendCoaddSourcesTask, doc=
"Deblend merged detections")
31 measureCoaddSources = ConfigurableField(target=MeasureMergedCoaddSourcesTask,
32 doc=
"Measure merged and (optionally) deblended detections")
33 mergeCoaddMeasurements = ConfigurableField(
34 target=MergeMeasurementsTask, doc=
"Merge measurements")
35 forcedPhotCoadd = ConfigurableField(target=ForcedPhotCoaddTask,
36 doc=
"Forced measurement on coadded images")
38 dtype=bool, default=
False,
39 doc=(
"Are we reprocessing?\n\n" 40 "This exists as a workaround for large deblender footprints causing large memory use " 41 "and/or very slow processing. We refuse to deblend those footprints when running on a cluster " 42 "and return to reprocess on a machine with larger memory or more time " 43 "if we consider those footprints important to recover."),
47 Config.setDefaults(self)
51 for subtask
in (
"mergeCoaddDetections",
"deblendCoaddSources",
"measureCoaddSources",
52 "mergeCoaddMeasurements",
"forcedPhotCoadd"):
53 coaddName = getattr(self, subtask).coaddName
55 raise RuntimeError(
"%s.coaddName (%s) doesn't match root coaddName (%s)" %
60 """TaskRunner for running MultiBandTask 62 This is similar to the lsst.pipe.base.ButlerInitializedTaskRunner, 63 except that we have a list of data references instead of a single 64 data reference being passed to the Task.run, and we pass the results 65 of the '--reuse-outputs-from' command option to the Task constructor. 68 def __init__(self, TaskClass, parsedCmd, doReturnResults=False):
69 TaskRunner.__init__(self, TaskClass, parsedCmd, doReturnResults)
73 """A variant of the base version that passes a butler argument to the task's constructor 74 parsedCmd or args must be specified. 76 if parsedCmd
is not None:
77 butler = parsedCmd.butler
78 elif args
is not None:
79 dataRefList, kwargs = args
80 butler = dataRefList[0].butlerSubset.butler
82 raise RuntimeError(
"parsedCmd or args must be specified")
83 return self.TaskClass(config=self.config, log=self.log, butler=butler, reuse=self.
reuse)
87 """Unpickle something by calling a factory""" 88 return factory(*args, **kwargs)
92 """Multi-node driver for multiband processing""" 93 ConfigClass = MultiBandDriverConfig
94 _DefaultName =
"multiBandDriver" 95 RunnerClass = MultiBandDriverTaskRunner
97 def __init__(self, butler=None, schema=None, refObjLoader=None, reuse=tuple(), **kwargs):
99 @param[in] butler: the butler can be used to retrieve schema or passed to the refObjLoader constructor 100 in case it is needed. 101 @param[in] schema: the schema of the source detection catalog used as input. 102 @param[in] refObjLoader: an instance of LoadReferenceObjectsTasks that supplies an external reference 103 catalog. May be None if the butler argument is provided or all steps requiring a reference 104 catalog are disabled. 106 BatchPoolTask.__init__(self, **kwargs)
108 assert butler
is not None,
"Butler not provided" 109 schema = butler.get(self.config.coaddName +
110 "Coadd_det_schema", immediate=
True).schema
113 self.makeSubtask(
"detectCoaddSources")
114 self.makeSubtask(
"mergeCoaddDetections", schema=schema)
115 if self.config.measureCoaddSources.inputCatalog.startswith(
"deblended"):
119 if self.config.deblendCoaddSources.simultaneous:
120 if self.config.deblendCoaddSources.multiBandDeblend.conserveFlux:
122 if self.config.deblendCoaddSources.multiBandDeblend.saveTemplates:
127 err =
"Measurement input '{0}' is not in the list of deblender output catalogs '{1}'" 130 self.makeSubtask(
"deblendCoaddSources",
131 schema=afwTable.Schema(self.mergeCoaddDetections.schema),
132 peakSchema=afwTable.Schema(self.mergeCoaddDetections.merged.getPeakSchema()),
134 measureInputSchema = afwTable.Schema(self.deblendCoaddSources.schema)
136 measureInputSchema = afwTable.Schema(self.mergeCoaddDetections.schema)
137 self.makeSubtask(
"measureCoaddSources", schema=measureInputSchema,
138 peakSchema=afwTable.Schema(
139 self.mergeCoaddDetections.merged.getPeakSchema()),
140 refObjLoader=refObjLoader, butler=butler)
141 self.makeSubtask(
"mergeCoaddMeasurements", schema=afwTable.Schema(
142 self.measureCoaddSources.schema))
143 self.makeSubtask(
"forcedPhotCoadd", refSchema=afwTable.Schema(
144 self.mergeCoaddMeasurements.schema))
148 return unpickle, (self.__class__, [], dict(config=self.config, name=self._name,
149 parentTask=self._parentTask, log=self.log,
153 def _makeArgumentParser(cls, *args, **kwargs):
154 kwargs.pop(
"doBatch",
False)
155 parser = ArgumentParser(name=cls.
_DefaultName, *args, **kwargs)
156 parser.add_id_argument(
"--id",
"deepCoadd", help=
"data ID, e.g. --id tract=12345 patch=1,2",
157 ContainerClass=TractDataIdContainer)
158 parser.addReuseOption([
"detectCoaddSources",
"mergeCoaddDetections",
"measureCoaddSources",
159 "mergeCoaddMeasurements",
"forcedPhotCoadd",
"deblendCoaddSources"])
164 """!Return walltime request for batch job 166 @param time: Requested time per iteration 167 @param parsedCmd: Results of argument parsing 168 @param numCores: Number of cores 171 for refList
in parsedCmd.id.refList:
172 numTargets += len(refList)
173 return time*numTargets/float(numCpus)
177 """!Run multiband processing on coadds 179 Only the master node runs this method. 181 No real MPI communication (scatter/gather) takes place: all I/O goes 182 through the disk. We want the intermediate stages on disk, and the 183 component Tasks are implemented around this, so we just follow suit. 185 @param patchRefList: Data references to run measurement 187 for patchRef
in patchRefList:
189 butler = patchRef.getButler()
192 raise RuntimeError(
"No valid patches")
195 pool.storeSet(butler=butler)
210 if self.config.doDetection:
212 for patchRef
in patchRefList:
213 if (
"detectCoaddSources" in self.
reuse and 214 patchRef.datasetExists(self.config.coaddName +
"Coadd_calexp", write=
True)):
215 self.log.info(
"Skipping detectCoaddSources for %s; output already exists." %
218 if not patchRef.datasetExists(self.config.coaddName +
"Coadd"):
219 self.log.debug(
"Not processing %s; required input %sCoadd missing." %
220 (patchRef.dataId, self.config.coaddName))
222 detectionList.append(patchRef)
226 patchRefList = [patchRef
for patchRef
in patchRefList
if 227 patchRef.datasetExists(self.config.coaddName +
"Coadd_calexp")
and 228 patchRef.datasetExists(self.config.coaddName +
"Coadd_det",
229 write=self.config.doDetection)]
230 dataIdList = [patchRef.dataId
for patchRef
in patchRefList]
235 for patchRef
in patchRefList:
236 dataId = patchRef.dataId
238 tract = dataId[
"tract"]
240 assert tract == dataId[
"tract"]
242 patch = dataId[
"patch"]
243 if patch
not in patches:
245 patches[patch].append(dataId)
274 if self.config.reprocessing:
275 patchReprocessing = {}
276 for dataId, reprocess
in zip(dataIdList, reprocessed):
277 patchId = dataId[
"patch"]
278 patchReprocessing[patchId] = patchReprocessing.get(
279 patchId,
False)
or reprocess
281 reprocessDataset = self.config.coaddName +
"Coadd_multibandReprocessing" 282 for patchId
in patchReprocessing:
283 if not patchReprocessing[patchId]:
285 dataId = dict(tract=tract, patch=patchId)
286 if patchReprocessing[patchId]:
287 filename = butler.get(
288 reprocessDataset +
"_filename", dataId)[0]
289 open(filename,
'a').close()
290 elif butler.datasetExists(reprocessDataset, dataId):
293 patchReprocessing[patchId] =
True 296 pool.map(self.
runMeasurements, [dataId1
for dataId1
in dataIdList
if not self.config.reprocessing
or 297 patchReprocessing[dataId1[
"patch"]]])
299 not self.config.reprocessing
or patchReprocessing[patchId]])
300 pool.map(self.
runForcedPhot, [dataId1
for dataId1
in dataIdList
if not self.config.reprocessing
or 301 patchReprocessing[dataId1[
"patch"]]])
304 if self.config.reprocessing:
305 for patchId
in patchReprocessing:
306 if not patchReprocessing[patchId]:
308 dataId = dict(tract=tract, patch=patchId)
309 filename = butler.get(
310 reprocessDataset +
"_filename", dataId)[0]
314 """! Run detection on a patch 316 Only slave nodes execute this method. 318 @param cache: Pool cache, containing butler 319 @param patchRef: Patch on which to do detection 321 with self.
logOperation(
"do detections on {}".format(patchRef.dataId)):
322 idFactory = self.detectCoaddSources.makeIdFactory(patchRef)
323 coadd = patchRef.get(self.config.coaddName +
"Coadd",
325 expId = int(patchRef.get(self.config.coaddName +
"CoaddId"))
326 self.detectCoaddSources.emptyMetadata()
327 detResults = self.detectCoaddSources.run(coadd, idFactory, expId=expId)
328 self.detectCoaddSources.write(detResults, patchRef)
332 """!Run detection merging on a patch 334 Only slave nodes execute this method. 336 @param cache: Pool cache, containing butler 337 @param dataIdList: List of data identifiers for the patch in different filters 339 with self.
logOperation(
"merge detections from %s" % (dataIdList,)):
340 dataRefList = [
getDataRef(cache.butler, dataId, self.config.coaddName +
"Coadd_calexp")
for 341 dataId
in dataIdList]
342 if (
"mergeCoaddDetections" in self.
reuse and 343 dataRefList[0].datasetExists(self.config.coaddName +
"Coadd_mergeDet", write=
True)):
344 self.log.info(
"Skipping mergeCoaddDetections for %s; output already exists." %
345 dataRefList[0].dataId)
347 self.mergeCoaddDetections.
runDataRef(dataRefList)
350 """Run the deblender on a list of dataId's 352 Only slave nodes execute this method. 357 Pool cache with butler. 359 Data identifier for patch in each band. 364 whether the patch requires reprocessing. 366 with self.
logOperation(
"deblending %s" % (dataIdList,)):
367 dataRefList = [
getDataRef(cache.butler, dataId, self.config.coaddName +
"Coadd_calexp")
for 368 dataId
in dataIdList]
370 if (
"deblendCoaddSources" in self.
reuse and 371 all([dataRef.datasetExists(self.config.coaddName +
"Coadd_" + self.
measurementInput,
372 write=
True)
for dataRef
in dataRefList])):
373 if not self.config.reprocessing:
374 self.log.info(
"Skipping deblendCoaddSources for %s; output already exists" % dataIdList)
378 catalog = dataRefList[0].get(self.config.coaddName +
"Coadd_" + self.
measurementInput)
379 bigFlag = catalog[
"deblend_parentTooBig"]
381 numOldBig = bigFlag.sum()
383 self.log.info(
"No large footprints in %s" % (dataRefList[0].dataId))
387 if self.config.deblendCoaddSources.simultaneous:
388 deblender = self.deblendCoaddSources.multiBandDeblend
390 deblender = self.deblendCoaddSources.singleBandDeblend
395 numNewBig = sum((deblender.isLargeFootprint(src.getFootprint())
for 396 src
in catalog[bigFlag]))
397 if numNewBig == numOldBig:
398 self.log.info(
"All %d formerly large footprints continue to be large in %s" %
399 (numOldBig, dataRefList[0].dataId,))
401 self.log.info(
"Found %d large footprints to be reprocessed in %s" %
402 (numOldBig - numNewBig, [dataRef.dataId
for dataRef
in dataRefList]))
405 self.deblendCoaddSources.
runDataRef(dataRefList)
409 """Run measurement on a patch for a single filter 411 Only slave nodes execute this method. 416 Pool cache, with butler 418 Data identifier for patch 420 with self.
logOperation(
"measurements on %s" % (dataId,)):
422 self.config.coaddName +
"Coadd_calexp")
423 if (
"measureCoaddSources" in self.
reuse and 424 not self.config.reprocessing
and 425 dataRef.datasetExists(self.config.coaddName +
"Coadd_meas", write=
True)):
426 self.log.info(
"Skipping measuretCoaddSources for %s; output already exists" % dataId)
431 """!Run measurement merging on a patch 433 Only slave nodes execute this method. 435 @param cache: Pool cache, containing butler 436 @param dataIdList: List of data identifiers for the patch in different filters 438 with self.
logOperation(
"merge measurements from %s" % (dataIdList,)):
439 dataRefList = [
getDataRef(cache.butler, dataId, self.config.coaddName +
"Coadd_calexp")
for 440 dataId
in dataIdList]
441 if (
"mergeCoaddMeasurements" in self.
reuse and 442 not self.config.reprocessing
and 443 dataRefList[0].datasetExists(self.config.coaddName +
"Coadd_ref", write=
True)):
444 self.log.info(
"Skipping mergeCoaddMeasurements for %s; output already exists" %
445 dataRefList[0].dataId)
447 self.mergeCoaddMeasurements.
runDataRef(dataRefList)
450 """!Run forced photometry on a patch for a single filter 452 Only slave nodes execute this method. 454 @param cache: Pool cache, with butler 455 @param dataId: Data identifier for patch 457 with self.
logOperation(
"forced photometry on %s" % (dataId,)):
459 self.config.coaddName +
"Coadd_calexp")
460 if (
"forcedPhotCoadd" in self.
reuse and 461 not self.config.reprocessing
and 462 dataRef.datasetExists(self.config.coaddName +
"Coadd_forced_src", write=
True)):
463 self.log.info(
"Skipping forcedPhotCoadd for %s; output already exists" % dataId)
468 """We don't collect any metadata, so skip"""
def unpickle(factory, args, kwargs)
def runDataRef(self, patchRefList)
Run multiband processing on coadds.
def __init__(self, butler=None, schema=None, refObjLoader=None, reuse=tuple(), kwargs)
def writeMetadata(self, dataRef)
def runDeblendMerged(self, cache, dataIdList)
def getDataRef(butler, dataId, datasetType="raw")
def runMeasurements(self, cache, dataId)
def runForcedPhot(self, cache, dataId)
Run forced photometry on a patch for a single filter.
def batchWallTime(cls, time, parsedCmd, numCpus)
Return walltime request for batch job.
def __init__(self, TaskClass, parsedCmd, doReturnResults=False)
def logOperation(self, operation, catch=False, trace=True)
def runDetection(self, cache, patchRef)
Run detection on a patch.
def runMergeDetections(self, cache, dataIdList)
Run detection merging on a patch.
def makeTask(self, parsedCmd=None, args=None)
def runMergeMeasurements(self, cache, dataIdList)
Run measurement merging on a patch.