Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22__all__ = ['SingleQuantumExecutor'] 

23 

24# ------------------------------- 

25# Imports of standard modules -- 

26# ------------------------------- 

27from collections import defaultdict 

28import logging 

29from itertools import chain 

30import sys 

31import time 

32 

33# ----------------------------- 

34# Imports for other modules -- 

35# ----------------------------- 

36from .quantumGraphExecutor import QuantumExecutor 

37from lsst.daf.base import PropertyList, PropertySet 

38from lsst.obs.base import Instrument 

39from lsst.pipe.base import ( 

40 AdjustQuantumHelper, 

41 ButlerQuantumContext, 

42 InvalidQuantumError, 

43 NoWorkFound, 

44 RepeatableQuantumError, 

45 logInfo, 

46) 

47from lsst.daf.butler import Quantum, ButlerMDC 

48 

49# ---------------------------------- 

50# Local non-exported definitions -- 

51# ---------------------------------- 

52 

53_LOG = logging.getLogger(__name__.partition(".")[2]) 

54 

55 

56class SingleQuantumExecutor(QuantumExecutor): 

57 """Executor class which runs one Quantum at a time. 

58 

59 Parameters 

60 ---------- 

61 butler : `~lsst.daf.butler.Butler` 

62 Data butler. 

63 taskFactory : `~lsst.pipe.base.TaskFactory` 

64 Instance of a task factory. 

65 skipExisting : `bool`, optional 

66 If `True`, then quanta that succeeded will not be rerun. 

67 clobberOutputs : `bool`, optional 

68 If `True`, then existing outputs will be overwritten. If 

69 `skipExisting` is also `True`, only outputs from failed quanta will 

70 be overwritten. 

71 enableLsstDebug : `bool`, optional 

72 Enable debugging with ``lsstDebug`` facility for a task. 

73 exitOnKnownError : `bool`, optional 

74 If `True`, call `sys.exit` with the appropriate exit code for special 

75 known exceptions, after printing a traceback, instead of letting the 

76 exception propagate up to calling. This is always the behavior for 

77 InvalidQuantumError. 

78 """ 

79 def __init__(self, taskFactory, skipExisting=False, clobberOutputs=False, enableLsstDebug=False, 

80 exitOnKnownError=False): 

81 self.taskFactory = taskFactory 

82 self.skipExisting = skipExisting 

83 self.enableLsstDebug = enableLsstDebug 

84 self.clobberOutputs = clobberOutputs 

85 self.exitOnKnownError = exitOnKnownError 

86 

87 def execute(self, taskDef, quantum, butler): 

88 

89 startTime = time.time() 

90 

91 # Save detailed resource usage before task start to metadata. 

92 quantumMetadata = PropertyList() 

93 logInfo(None, "prep", metadata=quantumMetadata) 

94 

95 # Docstring inherited from QuantumExecutor.execute 

96 self.setupLogging(taskDef, quantum) 

97 taskClass, label, config = taskDef.taskClass, taskDef.label, taskDef.config 

98 

99 # check whether to skip or delete old outputs 

100 if self.checkExistingOutputs(quantum, butler, taskDef): 

101 _LOG.info("Skipping already-successful quantum for label=%s dataId=%s.", label, quantum.dataId) 

102 return 

103 try: 

104 quantum = self.updatedQuantumInputs(quantum, butler, taskDef) 

105 except NoWorkFound as exc: 

106 _LOG.info("Nothing to do for task '%s' on quantum %s; saving metadata and skipping: %s", 

107 taskDef.label, quantum.dataId, str(exc)) 

108 # Make empty metadata that looks something like what a do-nothing 

109 # task would write (but we don't bother with empty nested 

110 # PropertySets for subtasks). This is slightly duplicative with 

111 # logic in pipe_base that we can't easily call from here; we'll fix 

112 # this on DM-29761. 

113 logInfo(None, "end", metadata=quantumMetadata) 

114 fullMetadata = PropertySet() 

115 fullMetadata[taskDef.label] = PropertyList() 

116 fullMetadata["quantum"] = quantumMetadata 

117 self.writeMetadata(quantum, fullMetadata, taskDef, butler) 

118 return 

119 

120 # enable lsstDebug debugging 

121 if self.enableLsstDebug: 

122 try: 

123 _LOG.debug("Will try to import debug.py") 

124 import debug # noqa:F401 

125 except ImportError: 

126 _LOG.warn("No 'debug' module found.") 

127 

128 # initialize global state 

129 self.initGlobals(quantum, butler) 

130 

131 # Ensure that we are executing a frozen config 

132 config.freeze() 

133 logInfo(None, "init", metadata=quantumMetadata) 

134 task = self.makeTask(taskClass, label, config, butler) 

135 logInfo(None, "start", metadata=quantumMetadata) 

136 self.runQuantum(task, quantum, taskDef, butler) 

137 logInfo(None, "end", metadata=quantumMetadata) 

138 fullMetadata = task.getFullMetadata() 

139 fullMetadata["quantum"] = quantumMetadata 

140 self.writeMetadata(quantum, fullMetadata, taskDef, butler) 

141 stopTime = time.time() 

142 _LOG.info("Execution of task '%s' on quantum %s took %.3f seconds", 

143 taskDef.label, quantum.dataId, stopTime - startTime) 

144 

145 def setupLogging(self, taskDef, quantum): 

146 """Configure logging system for execution of this task. 

147 

148 Ths method can setup logging to attach task- or 

149 quantum-specific information to log messages. Potentially this can 

150 take into account some info from task configuration as well. 

151 

152 Parameters 

153 ---------- 

154 taskDef : `lsst.pipe.base.TaskDef` 

155 The task definition. 

156 quantum : `~lsst.daf.butler.Quantum` 

157 Single Quantum instance. 

158 """ 

159 # include quantum dataId and task label into MDC 

160 label = taskDef.label 

161 if quantum.dataId: 

162 label += f":{quantum.dataId}" 

163 

164 ButlerMDC.MDC("LABEL", label) 

165 

166 def checkExistingOutputs(self, quantum, butler, taskDef): 

167 """Decide whether this quantum needs to be executed. 

168 

169 If only partial outputs exist then they are removed if 

170 ``clobberOutputs`` is True, otherwise an exception is raised. 

171 

172 Parameters 

173 ---------- 

174 quantum : `~lsst.daf.butler.Quantum` 

175 Quantum to check for existing outputs 

176 butler : `~lsst.daf.butler.Butler` 

177 Data butler. 

178 taskDef : `~lsst.pipe.base.TaskDef` 

179 Task definition structure. 

180 

181 Returns 

182 ------- 

183 exist : `bool` 

184 `True` if ``self.skipExisting`` is `True`, and a previous execution 

185 of this quanta appears to have completed successfully (either 

186 because metadata was written or all datasets were written). 

187 `False` otherwise. 

188 

189 Raises 

190 ------ 

191 RuntimeError 

192 Raised if some outputs exist and some not. 

193 """ 

194 collection = butler.run 

195 registry = butler.registry 

196 

197 if self.skipExisting and taskDef.metadataDatasetName is not None: 

198 # Metadata output exists; this is sufficient to assume the previous 

199 # run was successful and should be skipped. 

200 if (ref := butler.registry.findDataset(taskDef.metadataDatasetName, quantum.dataId)) is not None: 

201 if butler.datastore.exists(ref): 

202 return True 

203 

204 existingRefs = [] 

205 missingRefs = [] 

206 for datasetRefs in quantum.outputs.values(): 

207 for datasetRef in datasetRefs: 

208 ref = registry.findDataset(datasetRef.datasetType, datasetRef.dataId, 

209 collections=butler.run) 

210 if ref is None: 

211 missingRefs.append(datasetRef) 

212 else: 

213 if butler.datastore.exists(ref): 

214 existingRefs.append(ref) 

215 else: 

216 missingRefs.append(datasetRef) 

217 if existingRefs and missingRefs: 

218 # Some outputs exist and some don't, either delete existing ones 

219 # or complain. 

220 _LOG.debug("Partial outputs exist for task %s dataId=%s collection=%s " 

221 "existingRefs=%s missingRefs=%s", 

222 taskDef, quantum.dataId, collection, existingRefs, missingRefs) 

223 if self.clobberOutputs: 

224 _LOG.info("Removing partial outputs for task %s: %s", taskDef, existingRefs) 

225 butler.pruneDatasets(existingRefs, disassociate=True, unstore=True, purge=True) 

226 return False 

227 else: 

228 raise RuntimeError(f"Registry inconsistency while checking for existing outputs:" 

229 f" collection={collection} existingRefs={existingRefs}" 

230 f" missingRefs={missingRefs}") 

231 elif existingRefs: 

232 # complete outputs exist, this is fine only if skipExisting is set 

233 return self.skipExisting 

234 else: 

235 # no outputs exist 

236 return False 

237 

238 def makeTask(self, taskClass, name, config, butler): 

239 """Make new task instance. 

240 

241 Parameters 

242 ---------- 

243 taskClass : `type` 

244 Sub-class of `~lsst.pipe.base.PipelineTask`. 

245 name : `str` 

246 Name for this task. 

247 config : `~lsst.pipe.base.PipelineTaskConfig` 

248 Configuration object for this task 

249 

250 Returns 

251 ------- 

252 task : `~lsst.pipe.base.PipelineTask` 

253 Instance of ``taskClass`` type. 

254 butler : `~lsst.daf.butler.Butler` 

255 Data butler. 

256 """ 

257 # call task factory for that 

258 return self.taskFactory.makeTask(taskClass, name, config, None, butler) 

259 

260 def updatedQuantumInputs(self, quantum, butler, taskDef): 

261 """Update quantum with extra information, returns a new updated 

262 Quantum. 

263 

264 Some methods may require input DatasetRefs to have non-None 

265 ``dataset_id``, but in case of intermediate dataset it may not be 

266 filled during QuantumGraph construction. This method will retrieve 

267 missing info from registry. 

268 

269 Parameters 

270 ---------- 

271 quantum : `~lsst.daf.butler.Quantum` 

272 Single Quantum instance. 

273 butler : `~lsst.daf.butler.Butler` 

274 Data butler. 

275 taskDef : `~lsst.pipe.base.TaskDef` 

276 Task definition structure. 

277 

278 Returns 

279 ------- 

280 update : `~lsst.daf.butler.Quantum` 

281 Updated Quantum instance 

282 """ 

283 anyChanges = False 

284 updatedInputs = defaultdict(list) 

285 for key, refsForDatasetType in quantum.inputs.items(): 

286 newRefsForDatasetType = updatedInputs[key] 

287 for ref in refsForDatasetType: 

288 if ref.id is None: 

289 resolvedRef = butler.registry.findDataset(ref.datasetType, ref.dataId, 

290 collections=butler.collections) 

291 if resolvedRef is None: 

292 _LOG.debug("No dataset found for %s", ref) 

293 continue 

294 else: 

295 _LOG.debug("Updated dataset ID for %s", ref) 

296 else: 

297 resolvedRef = ref 

298 # We need to ask datastore if the dataset actually exists 

299 # because the Registry of a local "execution butler" cannot 

300 # know this (because we prepopulate it with all of the datasets 

301 # that might be created). 

302 if butler.datastore.exists(resolvedRef): 

303 newRefsForDatasetType.append(resolvedRef) 

304 if len(newRefsForDatasetType) != len(refsForDatasetType): 

305 anyChanges = True 

306 # If we removed any input datasets, let the task check if it has enough 

307 # to proceed and/or prune related datasets that it also doesn't 

308 # need/produce anymore. It will raise NoWorkFound if it can't run, 

309 # which we'll let propagate up. This is exactly what we run during QG 

310 # generation, because a task shouldn't care whether an input is missing 

311 # because some previous task didn't produce it, or because it just 

312 # wasn't there during QG generation. 

313 helper = AdjustQuantumHelper(updatedInputs, quantum.outputs) 

314 if anyChanges: 

315 helper.adjust_in_place(taskDef.connections, label=taskDef.label, data_id=quantum.dataId) 

316 return Quantum(taskName=quantum.taskName, 

317 taskClass=quantum.taskClass, 

318 dataId=quantum.dataId, 

319 initInputs=quantum.initInputs, 

320 inputs=helper.inputs, 

321 outputs=helper.outputs 

322 ) 

323 

324 def runQuantum(self, task, quantum, taskDef, butler): 

325 """Execute task on a single quantum. 

326 

327 Parameters 

328 ---------- 

329 task : `~lsst.pipe.base.PipelineTask` 

330 Task object. 

331 quantum : `~lsst.daf.butler.Quantum` 

332 Single Quantum instance. 

333 taskDef : `~lsst.pipe.base.TaskDef` 

334 Task definition structure. 

335 butler : `~lsst.daf.butler.Butler` 

336 Data butler. 

337 """ 

338 # Create a butler that operates in the context of a quantum 

339 butlerQC = ButlerQuantumContext(butler, quantum) 

340 

341 # Get the input and output references for the task 

342 inputRefs, outputRefs = taskDef.connections.buildDatasetRefs(quantum) 

343 

344 # Call task runQuantum() method. Catch a few known failure modes and 

345 # translate them into specific 

346 try: 

347 task.runQuantum(butlerQC, inputRefs, outputRefs) 

348 except NoWorkFound as err: 

349 # Not an error, just an early exit. 

350 _LOG.info("Task '%s' on quantum %s exited early: %s", 

351 taskDef.label, quantum.dataId, str(err)) 

352 pass 

353 except RepeatableQuantumError as err: 

354 if self.exitOnKnownError: 

355 _LOG.warning("Caught repeatable quantum error for %s (%s):", taskDef, quantum.dataId) 

356 _LOG.warning(err, exc_info=True) 

357 sys.exit(err.EXIT_CODE) 

358 else: 

359 raise 

360 except InvalidQuantumError as err: 

361 _LOG.fatal("Invalid quantum error for %s (%s): %s", taskDef, quantum.dataId) 

362 _LOG.fatal(err, exc_info=True) 

363 sys.exit(err.EXIT_CODE) 

364 

365 def writeMetadata(self, quantum, metadata, taskDef, butler): 

366 if taskDef.metadataDatasetName is not None: 

367 # DatasetRef has to be in the Quantum outputs, can lookup by name 

368 try: 

369 ref = quantum.outputs[taskDef.metadataDatasetName] 

370 except LookupError as exc: 

371 raise InvalidQuantumError( 

372 f"Quantum outputs is missing metadata dataset type {taskDef.metadataDatasetName};" 

373 f" this could happen due to inconsistent options between QuantumGraph generation" 

374 f" and execution") from exc 

375 butler.put(metadata, ref[0]) 

376 

377 def initGlobals(self, quantum, butler): 

378 """Initialize global state needed for task execution. 

379 

380 Parameters 

381 ---------- 

382 quantum : `~lsst.daf.butler.Quantum` 

383 Single Quantum instance. 

384 butler : `~lsst.daf.butler.Butler` 

385 Data butler. 

386 

387 Notes 

388 ----- 

389 There is an issue with initializing filters singleton which is done 

390 by instrument, to avoid requiring tasks to do it in runQuantum() 

391 we do it here when any dataId has an instrument dimension. Also for 

392 now we only allow single instrument, verify that all instrument 

393 names in all dataIds are identical. 

394 

395 This will need revision when filter singleton disappears. 

396 """ 

397 oneInstrument = None 

398 for datasetRefs in chain(quantum.inputs.values(), quantum.outputs.values()): 

399 for datasetRef in datasetRefs: 

400 dataId = datasetRef.dataId 

401 instrument = dataId.get("instrument") 

402 if instrument is not None: 

403 if oneInstrument is not None: 

404 assert instrument == oneInstrument, \ 

405 "Currently require that only one instrument is used per graph" 

406 else: 

407 oneInstrument = instrument 

408 Instrument.fromName(instrument, butler.registry)