Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22__all__ = ['SingleQuantumExecutor'] 

23 

24# ------------------------------- 

25# Imports of standard modules -- 

26# ------------------------------- 

27from collections import defaultdict 

28import logging 

29from itertools import chain 

30import sys 

31import time 

32 

33# ----------------------------- 

34# Imports for other modules -- 

35# ----------------------------- 

36from .quantumGraphExecutor import QuantumExecutor 

37from lsst.log import Log 

38from lsst.daf.base import PropertyList, PropertySet 

39from lsst.obs.base import Instrument 

40from lsst.pipe.base import ( 

41 AdjustQuantumHelper, 

42 ButlerQuantumContext, 

43 InvalidQuantumError, 

44 NoWorkFound, 

45 RepeatableQuantumError, 

46 logInfo, 

47) 

48from lsst.daf.butler import Quantum 

49 

50# ---------------------------------- 

51# Local non-exported definitions -- 

52# ---------------------------------- 

53 

54_LOG = logging.getLogger(__name__.partition(".")[2]) 

55 

56 

57class SingleQuantumExecutor(QuantumExecutor): 

58 """Executor class which runs one Quantum at a time. 

59 

60 Parameters 

61 ---------- 

62 butler : `~lsst.daf.butler.Butler` 

63 Data butler. 

64 taskFactory : `~lsst.pipe.base.TaskFactory` 

65 Instance of a task factory. 

66 skipExisting : `bool`, optional 

67 If `True`, then quanta that succeeded will not be rerun. 

68 clobberOutputs : `bool`, optional 

69 If `True`, then existing outputs will be overwritten. If 

70 `skipExisting` is also `True`, only outputs from failed quanta will 

71 be overwritten. 

72 enableLsstDebug : `bool`, optional 

73 Enable debugging with ``lsstDebug`` facility for a task. 

74 exitOnKnownError : `bool`, optional 

75 If `True`, call `sys.exit` with the appropriate exit code for special 

76 known exceptions, after printing a traceback, instead of letting the 

77 exception propagate up to calling. This is always the behavior for 

78 InvalidQuantumError. 

79 """ 

80 def __init__(self, taskFactory, skipExisting=False, clobberOutputs=False, enableLsstDebug=False, 

81 exitOnKnownError=False): 

82 self.taskFactory = taskFactory 

83 self.skipExisting = skipExisting 

84 self.enableLsstDebug = enableLsstDebug 

85 self.clobberOutputs = clobberOutputs 

86 self.exitOnKnownError = exitOnKnownError 

87 

88 def execute(self, taskDef, quantum, butler): 

89 

90 startTime = time.time() 

91 

92 # Save detailed resource usage before task start to metadata. 

93 quantumMetadata = PropertyList() 

94 logInfo(None, "prep", metadata=quantumMetadata) 

95 

96 # Docstring inherited from QuantumExecutor.execute 

97 self.setupLogging(taskDef, quantum) 

98 taskClass, label, config = taskDef.taskClass, taskDef.label, taskDef.config 

99 

100 # check whether to skip or delete old outputs 

101 if self.checkExistingOutputs(quantum, butler, taskDef): 

102 _LOG.info("Skipping already-successful quantum for label=%s dataId=%s.", label, quantum.dataId) 

103 return 

104 try: 

105 quantum = self.updatedQuantumInputs(quantum, butler, taskDef) 

106 except NoWorkFound as exc: 

107 _LOG.info("Nothing to do for task '%s' on quantum %s; saving metadata and skipping: %s", 

108 taskDef.label, quantum.dataId, str(exc)) 

109 # Make empty metadata that looks something like what a do-nothing 

110 # task would write (but we don't bother with empty nested 

111 # PropertySets for subtasks). This is slightly duplicative with 

112 # logic in pipe_base that we can't easily call from here; we'll fix 

113 # this on DM-29761. 

114 logInfo(None, "end", metadata=quantumMetadata) 

115 fullMetadata = PropertySet() 

116 fullMetadata[taskDef.label] = PropertyList() 

117 fullMetadata["quantum"] = quantumMetadata 

118 self.writeMetadata(quantum, fullMetadata, taskDef, butler) 

119 return 

120 

121 # enable lsstDebug debugging 

122 if self.enableLsstDebug: 

123 try: 

124 _LOG.debug("Will try to import debug.py") 

125 import debug # noqa:F401 

126 except ImportError: 

127 _LOG.warn("No 'debug' module found.") 

128 

129 # initialize global state 

130 self.initGlobals(quantum, butler) 

131 

132 # Ensure that we are executing a frozen config 

133 config.freeze() 

134 logInfo(None, "init", metadata=quantumMetadata) 

135 task = self.makeTask(taskClass, label, config, butler) 

136 logInfo(None, "start", metadata=quantumMetadata) 

137 self.runQuantum(task, quantum, taskDef, butler) 

138 logInfo(None, "end", metadata=quantumMetadata) 

139 fullMetadata = task.getFullMetadata() 

140 fullMetadata["quantum"] = quantumMetadata 

141 self.writeMetadata(quantum, fullMetadata, taskDef, butler) 

142 stopTime = time.time() 

143 _LOG.info("Execution of task '%s' on quantum %s took %.3f seconds", 

144 taskDef.label, quantum.dataId, stopTime - startTime) 

145 

146 def setupLogging(self, taskDef, quantum): 

147 """Configure logging system for execution of this task. 

148 

149 Ths method can setup logging to attach task- or 

150 quantum-specific information to log messages. Potentially this can 

151 take into account some info from task configuration as well. 

152 

153 Parameters 

154 ---------- 

155 taskDef : `lsst.pipe.base.TaskDef` 

156 The task definition. 

157 quantum : `~lsst.daf.butler.Quantum` 

158 Single Quantum instance. 

159 """ 

160 # include quantum dataId and task label into MDC 

161 label = taskDef.label 

162 if quantum.dataId: 

163 label += f":{quantum.dataId}" 

164 Log.MDC("LABEL", label) 

165 

166 def checkExistingOutputs(self, quantum, butler, taskDef): 

167 """Decide whether this quantum needs to be executed. 

168 

169 If only partial outputs exist then they are removed if 

170 ``clobberOutputs`` is True, otherwise an exception is raised. 

171 

172 Parameters 

173 ---------- 

174 quantum : `~lsst.daf.butler.Quantum` 

175 Quantum to check for existing outputs 

176 butler : `~lsst.daf.butler.Butler` 

177 Data butler. 

178 taskDef : `~lsst.pipe.base.TaskDef` 

179 Task definition structure. 

180 

181 Returns 

182 ------- 

183 exist : `bool` 

184 `True` if ``self.skipExisting`` is `True`, and a previous execution 

185 of this quanta appears to have completed successfully (either 

186 because metadata was written or all datasets were written). 

187 `False` otherwise. 

188 

189 Raises 

190 ------ 

191 RuntimeError 

192 Raised if some outputs exist and some not. 

193 """ 

194 collection = butler.run 

195 registry = butler.registry 

196 

197 if self.skipExisting and taskDef.metadataDatasetName is not None: 

198 # Metadata output exists; this is sufficient to assume the previous 

199 # run was successful and should be skipped. 

200 if (ref := butler.registry.findDataset(taskDef.metadataDatasetName, quantum.dataId)) is not None: 

201 if butler.datastore.exists(ref): 

202 return True 

203 

204 existingRefs = [] 

205 missingRefs = [] 

206 for datasetRefs in quantum.outputs.values(): 

207 for datasetRef in datasetRefs: 

208 ref = registry.findDataset(datasetRef.datasetType, datasetRef.dataId, 

209 collections=butler.run) 

210 if ref is None: 

211 missingRefs.append(datasetRef) 

212 else: 

213 if butler.datastore.exists(ref): 

214 existingRefs.append(ref) 

215 else: 

216 missingRefs.append(datasetRef) 

217 if existingRefs and missingRefs: 

218 # some outputs exist and some don't, either delete existing ones or complain 

219 _LOG.debug("Partial outputs exist for task %s dataId=%s collection=%s " 

220 "existingRefs=%s missingRefs=%s", 

221 taskDef, quantum.dataId, collection, existingRefs, missingRefs) 

222 if self.clobberOutputs: 

223 _LOG.info("Removing partial outputs for task %s: %s", taskDef, existingRefs) 

224 butler.pruneDatasets(existingRefs, disassociate=True, unstore=True, purge=True) 

225 return False 

226 else: 

227 raise RuntimeError(f"Registry inconsistency while checking for existing outputs:" 

228 f" collection={collection} existingRefs={existingRefs}" 

229 f" missingRefs={missingRefs}") 

230 elif existingRefs: 

231 # complete outputs exist, this is fine only if skipExisting is set 

232 return self.skipExisting 

233 else: 

234 # no outputs exist 

235 return False 

236 

237 def makeTask(self, taskClass, name, config, butler): 

238 """Make new task instance. 

239 

240 Parameters 

241 ---------- 

242 taskClass : `type` 

243 Sub-class of `~lsst.pipe.base.PipelineTask`. 

244 name : `str` 

245 Name for this task. 

246 config : `~lsst.pipe.base.PipelineTaskConfig` 

247 Configuration object for this task 

248 

249 Returns 

250 ------- 

251 task : `~lsst.pipe.base.PipelineTask` 

252 Instance of ``taskClass`` type. 

253 butler : `~lsst.daf.butler.Butler` 

254 Data butler. 

255 """ 

256 # call task factory for that 

257 return self.taskFactory.makeTask(taskClass, name, config, None, butler) 

258 

259 def updatedQuantumInputs(self, quantum, butler, taskDef): 

260 """Update quantum with extra information, returns a new updated Quantum. 

261 

262 Some methods may require input DatasetRefs to have non-None 

263 ``dataset_id``, but in case of intermediate dataset it may not be 

264 filled during QuantumGraph construction. This method will retrieve 

265 missing info from registry. 

266 

267 Parameters 

268 ---------- 

269 quantum : `~lsst.daf.butler.Quantum` 

270 Single Quantum instance. 

271 butler : `~lsst.daf.butler.Butler` 

272 Data butler. 

273 taskDef : `~lsst.pipe.base.TaskDef` 

274 Task definition structure. 

275 

276 Returns 

277 ------- 

278 update : `~lsst.daf.butler.Quantum` 

279 Updated Quantum instance 

280 """ 

281 anyChanges = False 

282 updatedInputs = defaultdict(list) 

283 for key, refsForDatasetType in quantum.inputs.items(): 

284 newRefsForDatasetType = updatedInputs[key] 

285 for ref in refsForDatasetType: 

286 if ref.id is None: 

287 resolvedRef = butler.registry.findDataset(ref.datasetType, ref.dataId, 

288 collections=butler.collections) 

289 if resolvedRef is None: 

290 _LOG.debug("No dataset found for %s", ref) 

291 continue 

292 else: 

293 _LOG.debug("Updated dataset ID for %s", ref) 

294 else: 

295 resolvedRef = ref 

296 # We need to ask datastore if the dataset actually exists 

297 # because the Registry of a local "execution butler" cannot 

298 # know this (because we prepopulate it with all of the datasets 

299 # that might be created). 

300 if butler.datastore.exists(resolvedRef): 

301 newRefsForDatasetType.append(resolvedRef) 

302 if len(newRefsForDatasetType) != len(refsForDatasetType): 

303 anyChanges = True 

304 # If we removed any input datasets, let the task check if it has enough 

305 # to proceed and/or prune related datasets that it also doesn't 

306 # need/produce anymore. It will raise NoWorkFound if it can't run, 

307 # which we'll let propagate up. This is exactly what we run during QG 

308 # generation, because a task shouldn't care whether an input is missing 

309 # because some previous task didn't produce it, or because it just 

310 # wasn't there during QG generation. 

311 helper = AdjustQuantumHelper(updatedInputs, quantum.outputs) 

312 if anyChanges: 

313 helper.adjust_in_place(taskDef.connections, label=taskDef.label, data_id=quantum.dataId) 

314 return Quantum(taskName=quantum.taskName, 

315 taskClass=quantum.taskClass, 

316 dataId=quantum.dataId, 

317 initInputs=quantum.initInputs, 

318 inputs=helper.inputs, 

319 outputs=helper.outputs 

320 ) 

321 

322 def runQuantum(self, task, quantum, taskDef, butler): 

323 """Execute task on a single quantum. 

324 

325 Parameters 

326 ---------- 

327 task : `~lsst.pipe.base.PipelineTask` 

328 Task object. 

329 quantum : `~lsst.daf.butler.Quantum` 

330 Single Quantum instance. 

331 taskDef : `~lsst.pipe.base.TaskDef` 

332 Task definition structure. 

333 butler : `~lsst.daf.butler.Butler` 

334 Data butler. 

335 """ 

336 # Create a butler that operates in the context of a quantum 

337 butlerQC = ButlerQuantumContext(butler, quantum) 

338 

339 # Get the input and output references for the task 

340 inputRefs, outputRefs = taskDef.connections.buildDatasetRefs(quantum) 

341 

342 # Call task runQuantum() method. Catch a few known failure modes and 

343 # translate them into specific 

344 try: 

345 task.runQuantum(butlerQC, inputRefs, outputRefs) 

346 except NoWorkFound as err: 

347 # Not an error, just an early exit. 

348 _LOG.info("Task '%s' on quantum %s exited early: %s", 

349 taskDef.label, quantum.dataId, str(err)) 

350 pass 

351 except RepeatableQuantumError as err: 

352 if self.exitOnKnownError: 

353 _LOG.warning("Caught repeatable quantum error for %s (%s):", taskDef, quantum.dataId) 

354 _LOG.warning(err, exc_info=True) 

355 sys.exit(err.EXIT_CODE) 

356 else: 

357 raise 

358 except InvalidQuantumError as err: 

359 _LOG.fatal("Invalid quantum error for %s (%s): %s", taskDef, quantum.dataId) 

360 _LOG.fatal(err, exc_info=True) 

361 sys.exit(err.EXIT_CODE) 

362 

363 def writeMetadata(self, quantum, metadata, taskDef, butler): 

364 if taskDef.metadataDatasetName is not None: 

365 # DatasetRef has to be in the Quantum outputs, can lookup by name 

366 try: 

367 ref = quantum.outputs[taskDef.metadataDatasetName] 

368 except LookupError as exc: 

369 raise InvalidQuantumError( 

370 f"Quantum outputs is missing metadata dataset type {taskDef.metadataDatasetName};" 

371 f" this could happen due to inconsistent options between QuantumGraph generation" 

372 f" and execution") from exc 

373 butler.put(metadata, ref[0]) 

374 

375 def initGlobals(self, quantum, butler): 

376 """Initialize global state needed for task execution. 

377 

378 Parameters 

379 ---------- 

380 quantum : `~lsst.daf.butler.Quantum` 

381 Single Quantum instance. 

382 butler : `~lsst.daf.butler.Butler` 

383 Data butler. 

384 

385 Notes 

386 ----- 

387 There is an issue with initializing filters singleton which is done 

388 by instrument, to avoid requiring tasks to do it in runQuantum() 

389 we do it here when any dataId has an instrument dimension. Also for 

390 now we only allow single instrument, verify that all instrument 

391 names in all dataIds are identical. 

392 

393 This will need revision when filter singleton disappears. 

394 """ 

395 oneInstrument = None 

396 for datasetRefs in chain(quantum.inputs.values(), quantum.outputs.values()): 

397 for datasetRef in datasetRefs: 

398 dataId = datasetRef.dataId 

399 instrument = dataId.get("instrument") 

400 if instrument is not None: 

401 if oneInstrument is not None: 

402 assert instrument == oneInstrument, \ 

403 "Currently require that only one instrument is used per graph" 

404 else: 

405 oneInstrument = instrument 

406 Instrument.fromName(instrument, butler.registry)