Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22__all__ = ['SingleQuantumExecutor'] 

23 

24# ------------------------------- 

25# Imports of standard modules -- 

26# ------------------------------- 

27from collections import defaultdict 

28import logging 

29from itertools import chain 

30import time 

31 

32# ----------------------------- 

33# Imports for other modules -- 

34# ----------------------------- 

35from .quantumGraphExecutor import QuantumExecutor 

36from lsst.log import Log 

37from lsst.obs.base import Instrument 

38from lsst.pipe.base import ButlerQuantumContext 

39from lsst.daf.butler import Quantum 

40 

41# ---------------------------------- 

42# Local non-exported definitions -- 

43# ---------------------------------- 

44 

45_LOG = logging.getLogger(__name__.partition(".")[2]) 

46 

47 

48class SingleQuantumExecutor(QuantumExecutor): 

49 """Executor class which runs one Quantum at a time. 

50 

51 Parameters 

52 ---------- 

53 butler : `~lsst.daf.butler.Butler` 

54 Data butler. 

55 taskFactory : `~lsst.pipe.base.TaskFactory` 

56 Instance of a task factory. 

57 skipExisting : `bool`, optional 

58 If True then quanta with all existing outputs are not executed. 

59 clobberPartialOutputs : `bool`, optional 

60 If True then delete any partial outputs from quantum execution. If 

61 complete outputs exists then exception is raise if ``skipExisting`` is 

62 False. 

63 enableLsstDebug : `bool`, optional 

64 Enable debugging with ``lsstDebug`` facility for a task. 

65 """ 

66 def __init__(self, taskFactory, skipExisting=False, clobberPartialOutputs=False, enableLsstDebug=False): 

67 self.taskFactory = taskFactory 

68 self.skipExisting = skipExisting 

69 self.enableLsstDebug = enableLsstDebug 

70 self.clobberPartialOutputs = clobberPartialOutputs 

71 

72 def execute(self, taskDef, quantum, butler): 

73 # Docstring inherited from QuantumExecutor.execute 

74 self.setupLogging(taskDef, quantum) 

75 taskClass, config = taskDef.taskClass, taskDef.config 

76 

77 # check whether to skip or delete old outputs 

78 if self.checkExistingOutputs(quantum, butler, taskDef): 

79 _LOG.info("Quantum execution skipped due to existing outputs, " 

80 f"task={taskClass.__name__} dataId={quantum.dataId}.") 

81 return 

82 

83 quantum = self.updatedQuantumInputs(quantum, butler) 

84 

85 # enable lsstDebug debugging 

86 if self.enableLsstDebug: 

87 try: 

88 _LOG.debug("Will try to import debug.py") 

89 import debug # noqa:F401 

90 except ImportError: 

91 _LOG.warn("No 'debug' module found.") 

92 

93 # initialize global state 

94 self.initGlobals(quantum, butler) 

95 

96 # Ensure that we are executing a frozen config 

97 config.freeze() 

98 

99 task = self.makeTask(taskClass, config, butler) 

100 self.runQuantum(task, quantum, taskDef, butler) 

101 

102 def setupLogging(self, taskDef, quantum): 

103 """Configure logging system for execution of this task. 

104 

105 Ths method can setup logging to attach task- or 

106 quantum-specific information to log messages. Potentially this can 

107 take into account some info from task configuration as well. 

108 

109 Parameters 

110 ---------- 

111 taskDef : `lsst.pipe.base.TaskDef` 

112 The task definition. 

113 quantum : `~lsst.daf.butler.Quantum` 

114 Single Quantum instance. 

115 """ 

116 # include quantum dataId and task label into MDC 

117 label = taskDef.label 

118 if quantum.dataId: 

119 label += f":{quantum.dataId}" 

120 Log.MDC("LABEL", label) 

121 

122 def checkExistingOutputs(self, quantum, butler, taskDef): 

123 """Decide whether this quantum needs to be executed. 

124 

125 If only partial outputs exist then they are removed if 

126 ``clobberPartialOutputs`` is True, otherwise an exception is raised. 

127 

128 Parameters 

129 ---------- 

130 quantum : `~lsst.daf.butler.Quantum` 

131 Quantum to check for existing outputs 

132 butler : `~lsst.daf.butler.Butler` 

133 Data butler. 

134 taskDef : `~lsst.pipe.base.TaskDef` 

135 Task definition structure. 

136 

137 Returns 

138 ------- 

139 exist : `bool` 

140 True if all quantum's outputs exist in a collection and 

141 ``skipExisting`` is True, False otherwise. 

142 

143 Raises 

144 ------ 

145 RuntimeError 

146 Raised if some outputs exist and some not. 

147 """ 

148 collection = butler.run 

149 registry = butler.registry 

150 

151 existingRefs = [] 

152 missingRefs = [] 

153 for datasetRefs in quantum.outputs.values(): 

154 for datasetRef in datasetRefs: 

155 ref = registry.findDataset(datasetRef.datasetType, datasetRef.dataId, 

156 collections=butler.run) 

157 if ref is None: 

158 missingRefs.append(datasetRef) 

159 else: 

160 existingRefs.append(ref) 

161 if existingRefs and missingRefs: 

162 # some outputs exist and some don't, either delete existing ones or complain 

163 _LOG.debug("Partial outputs exist for task %s dataId=%s collection=%s " 

164 "existingRefs=%s missingRefs=%s", 

165 taskDef, quantum.dataId, collection, existingRefs, missingRefs) 

166 if self.clobberPartialOutputs: 

167 _LOG.info("Removing partial outputs for task %s: %s", taskDef, existingRefs) 

168 butler.pruneDatasets(existingRefs, disassociate=True, unstore=True, purge=True) 

169 return False 

170 else: 

171 raise RuntimeError(f"Registry inconsistency while checking for existing outputs:" 

172 f" collection={collection} existingRefs={existingRefs}" 

173 f" missingRefs={missingRefs}") 

174 elif existingRefs: 

175 # complete outputs exist, this is fine only if skipExisting is set 

176 return self.skipExisting 

177 else: 

178 # no outputs exist 

179 return False 

180 

181 def makeTask(self, taskClass, config, butler): 

182 """Make new task instance. 

183 

184 Parameters 

185 ---------- 

186 taskClass : `type` 

187 Sub-class of `~lsst.pipe.base.PipelineTask`. 

188 config : `~lsst.pipe.base.PipelineTaskConfig` 

189 Configuration object for this task 

190 

191 Returns 

192 ------- 

193 task : `~lsst.pipe.base.PipelineTask` 

194 Instance of ``taskClass`` type. 

195 butler : `~lsst.daf.butler.Butler` 

196 Data butler. 

197 """ 

198 # call task factory for that 

199 return self.taskFactory.makeTask(taskClass, config, None, butler) 

200 

201 def updatedQuantumInputs(self, quantum, butler): 

202 """Update quantum with extra information, returns a new updated Quantum. 

203 

204 Some methods may require input DatasetRefs to have non-None 

205 ``dataset_id``, but in case of intermediate dataset it may not be 

206 filled during QuantumGraph construction. This method will retrieve 

207 missing info from registry. 

208 

209 Parameters 

210 ---------- 

211 quantum : `~lsst.daf.butler.Quantum` 

212 Single Quantum instance. 

213 butler : `~lsst.daf.butler.Butler` 

214 Data butler. 

215 

216 Returns 

217 ------- 

218 update : `~lsst.daf.butler.Quantum` 

219 Updated Quantum instance 

220 """ 

221 updatedInputs = defaultdict(list) 

222 for key, refsForDatasetType in quantum.inputs.items(): 

223 newRefsForDatasetType = updatedInputs[key] 

224 for ref in refsForDatasetType: 

225 if ref.id is None: 

226 resolvedRef = butler.registry.findDataset(ref.datasetType, ref.dataId, 

227 collections=butler.collections) 

228 if resolvedRef is None: 

229 raise ValueError( 

230 f"Cannot find {ref.datasetType.name} with id {ref.dataId} " 

231 f"in collections {butler.collections}." 

232 ) 

233 newRefsForDatasetType.append(resolvedRef) 

234 _LOG.debug("Updating dataset ID for %s", ref) 

235 else: 

236 newRefsForDatasetType.append(ref) 

237 return Quantum(taskName=quantum.taskName, 

238 taskClass=quantum.taskClass, 

239 dataId=quantum.dataId, 

240 initInputs=quantum.initInputs, 

241 inputs=updatedInputs, 

242 outputs=quantum.outputs 

243 ) 

244 

245 def runQuantum(self, task, quantum, taskDef, butler): 

246 """Execute task on a single quantum. 

247 

248 Parameters 

249 ---------- 

250 task : `~lsst.pipe.base.PipelineTask` 

251 Task object. 

252 quantum : `~lsst.daf.butler.Quantum` 

253 Single Quantum instance. 

254 taskDef : `~lsst.pipe.base.TaskDef` 

255 Task definition structure. 

256 butler : `~lsst.daf.butler.Butler` 

257 Data butler. 

258 """ 

259 # Create a butler that operates in the context of a quantum 

260 butlerQC = ButlerQuantumContext(butler, quantum) 

261 

262 # Get the input and output references for the task 

263 inputRefs, outputRefs = taskDef.connections.buildDatasetRefs(quantum) 

264 

265 startTime = time.time() 

266 

267 # Call task runQuantum() method. Any exception thrown by the task 

268 # propagates to caller. 

269 task.runQuantum(butlerQC, inputRefs, outputRefs) 

270 

271 stopTime = time.time() 

272 _LOG.info("Execution of task '%s' on quantum %s took %.3f seconds", 

273 taskDef.label, quantum.dataId, stopTime - startTime) 

274 

275 if taskDef.metadataDatasetName is not None: 

276 # DatasetRef has to be in the Quantum outputs, can lookup by name 

277 try: 

278 ref = quantum.outputs[taskDef.metadataDatasetName] 

279 except LookupError as exc: 

280 raise LookupError( 

281 f"Quantum outputs is missing metadata dataset type {taskDef.metadataDatasetName}," 

282 f" it could happen due to inconsistent options between Quantum generation" 

283 f" and execution") from exc 

284 butlerQC.put(task.getFullMetadata(), ref[0]) 

285 

286 def initGlobals(self, quantum, butler): 

287 """Initialize global state needed for task execution. 

288 

289 Parameters 

290 ---------- 

291 quantum : `~lsst.daf.butler.Quantum` 

292 Single Quantum instance. 

293 butler : `~lsst.daf.butler.Butler` 

294 Data butler. 

295 

296 Notes 

297 ----- 

298 There is an issue with initializing filters singleton which is done 

299 by instrument, to avoid requiring tasks to do it in runQuantum() 

300 we do it here when any dataId has an instrument dimension. Also for 

301 now we only allow single instrument, verify that all instrument 

302 names in all dataIds are identical. 

303 

304 This will need revision when filter singleton disappears. 

305 """ 

306 oneInstrument = None 

307 for datasetRefs in chain(quantum.inputs.values(), quantum.outputs.values()): 

308 for datasetRef in datasetRefs: 

309 dataId = datasetRef.dataId 

310 instrument = dataId.get("instrument") 

311 if instrument is not None: 

312 if oneInstrument is not None: 

313 assert instrument == oneInstrument, \ 

314 "Currently require that only one instrument is used per graph" 

315 else: 

316 oneInstrument = instrument 

317 Instrument.fromName(instrument, butler.registry)