Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22__all__ = ['SingleQuantumExecutor'] 

23 

24# ------------------------------- 

25# Imports of standard modules -- 

26# ------------------------------- 

27from collections import defaultdict 

28import logging 

29from itertools import chain 

30import sys 

31import time 

32from typing import List 

33 

34# ----------------------------- 

35# Imports for other modules -- 

36# ----------------------------- 

37from .quantumGraphExecutor import QuantumExecutor 

38from lsst.daf.base import PropertyList, PropertySet 

39from lsst.obs.base import Instrument 

40from lsst.pipe.base import ( 

41 AdjustQuantumHelper, 

42 ButlerQuantumContext, 

43 InvalidQuantumError, 

44 NoWorkFound, 

45 RepeatableQuantumError, 

46 logInfo, 

47) 

48from lsst.daf.butler import Quantum, ButlerMDC, NamedKeyDict, DatasetRef, DatasetType 

49 

50# ---------------------------------- 

51# Local non-exported definitions -- 

52# ---------------------------------- 

53 

54_LOG = logging.getLogger(__name__.partition(".")[2]) 

55 

56 

57class SingleQuantumExecutor(QuantumExecutor): 

58 """Executor class which runs one Quantum at a time. 

59 

60 Parameters 

61 ---------- 

62 butler : `~lsst.daf.butler.Butler` 

63 Data butler. 

64 taskFactory : `~lsst.pipe.base.TaskFactory` 

65 Instance of a task factory. 

66 skipExisting : `bool`, optional 

67 If `True`, then quanta that succeeded will not be rerun. 

68 clobberOutputs : `bool`, optional 

69 If `True`, then existing outputs will be overwritten. If 

70 `skipExisting` is also `True`, only outputs from failed quanta will 

71 be overwritten. 

72 enableLsstDebug : `bool`, optional 

73 Enable debugging with ``lsstDebug`` facility for a task. 

74 exitOnKnownError : `bool`, optional 

75 If `True`, call `sys.exit` with the appropriate exit code for special 

76 known exceptions, after printing a traceback, instead of letting the 

77 exception propagate up to calling. This is always the behavior for 

78 InvalidQuantumError. 

79 """ 

80 def __init__(self, taskFactory, skipExisting=False, clobberOutputs=False, enableLsstDebug=False, 

81 exitOnKnownError=False): 

82 self.taskFactory = taskFactory 

83 self.skipExisting = skipExisting 

84 self.enableLsstDebug = enableLsstDebug 

85 self.clobberOutputs = clobberOutputs 

86 self.exitOnKnownError = exitOnKnownError 

87 

88 def execute(self, taskDef, quantum, butler): 

89 

90 startTime = time.time() 

91 

92 # Save detailed resource usage before task start to metadata. 

93 quantumMetadata = PropertyList() 

94 logInfo(None, "prep", metadata=quantumMetadata) 

95 

96 # Docstring inherited from QuantumExecutor.execute 

97 self.setupLogging(taskDef, quantum) 

98 taskClass, label, config = taskDef.taskClass, taskDef.label, taskDef.config 

99 

100 # check whether to skip or delete old outputs 

101 if self.checkExistingOutputs(quantum, butler, taskDef): 

102 _LOG.info("Skipping already-successful quantum for label=%s dataId=%s.", label, quantum.dataId) 

103 return 

104 try: 

105 quantum = self.updatedQuantumInputs(quantum, butler, taskDef) 

106 except NoWorkFound as exc: 

107 _LOG.info("Nothing to do for task '%s' on quantum %s; saving metadata and skipping: %s", 

108 taskDef.label, quantum.dataId, str(exc)) 

109 # Make empty metadata that looks something like what a do-nothing 

110 # task would write (but we don't bother with empty nested 

111 # PropertySets for subtasks). This is slightly duplicative with 

112 # logic in pipe_base that we can't easily call from here; we'll fix 

113 # this on DM-29761. 

114 logInfo(None, "end", metadata=quantumMetadata) 

115 fullMetadata = PropertySet() 

116 fullMetadata[taskDef.label] = PropertyList() 

117 fullMetadata["quantum"] = quantumMetadata 

118 self.writeMetadata(quantum, fullMetadata, taskDef, butler) 

119 return 

120 

121 # enable lsstDebug debugging 

122 if self.enableLsstDebug: 

123 try: 

124 _LOG.debug("Will try to import debug.py") 

125 import debug # noqa:F401 

126 except ImportError: 

127 _LOG.warn("No 'debug' module found.") 

128 

129 # initialize global state 

130 self.initGlobals(quantum, butler) 

131 

132 # Ensure that we are executing a frozen config 

133 config.freeze() 

134 logInfo(None, "init", metadata=quantumMetadata) 

135 task = self.makeTask(taskClass, label, config, butler) 

136 logInfo(None, "start", metadata=quantumMetadata) 

137 self.runQuantum(task, quantum, taskDef, butler) 

138 logInfo(None, "end", metadata=quantumMetadata) 

139 fullMetadata = task.getFullMetadata() 

140 fullMetadata["quantum"] = quantumMetadata 

141 self.writeMetadata(quantum, fullMetadata, taskDef, butler) 

142 stopTime = time.time() 

143 _LOG.info("Execution of task '%s' on quantum %s took %.3f seconds", 

144 taskDef.label, quantum.dataId, stopTime - startTime) 

145 

146 def setupLogging(self, taskDef, quantum): 

147 """Configure logging system for execution of this task. 

148 

149 Ths method can setup logging to attach task- or 

150 quantum-specific information to log messages. Potentially this can 

151 take into account some info from task configuration as well. 

152 

153 Parameters 

154 ---------- 

155 taskDef : `lsst.pipe.base.TaskDef` 

156 The task definition. 

157 quantum : `~lsst.daf.butler.Quantum` 

158 Single Quantum instance. 

159 """ 

160 # include quantum dataId and task label into MDC 

161 label = taskDef.label 

162 if quantum.dataId: 

163 label += f":{quantum.dataId}" 

164 

165 ButlerMDC.MDC("LABEL", label) 

166 

167 def checkExistingOutputs(self, quantum, butler, taskDef): 

168 """Decide whether this quantum needs to be executed. 

169 

170 If only partial outputs exist then they are removed if 

171 ``clobberOutputs`` is True, otherwise an exception is raised. 

172 

173 Parameters 

174 ---------- 

175 quantum : `~lsst.daf.butler.Quantum` 

176 Quantum to check for existing outputs 

177 butler : `~lsst.daf.butler.Butler` 

178 Data butler. 

179 taskDef : `~lsst.pipe.base.TaskDef` 

180 Task definition structure. 

181 

182 Returns 

183 ------- 

184 exist : `bool` 

185 `True` if ``self.skipExisting`` is `True`, and a previous execution 

186 of this quanta appears to have completed successfully (either 

187 because metadata was written or all datasets were written). 

188 `False` otherwise. 

189 

190 Raises 

191 ------ 

192 RuntimeError 

193 Raised if some outputs exist and some not. 

194 """ 

195 collection = butler.run 

196 registry = butler.registry 

197 

198 if self.skipExisting and taskDef.metadataDatasetName is not None: 

199 # Metadata output exists; this is sufficient to assume the previous 

200 # run was successful and should be skipped. 

201 if (ref := butler.registry.findDataset(taskDef.metadataDatasetName, quantum.dataId)) is not None: 

202 if butler.datastore.exists(ref): 

203 return True 

204 

205 existingRefs = [] 

206 missingRefs = [] 

207 for datasetRefs in quantum.outputs.values(): 

208 for datasetRef in datasetRefs: 

209 ref = registry.findDataset(datasetRef.datasetType, datasetRef.dataId, 

210 collections=butler.run) 

211 if ref is None: 

212 missingRefs.append(datasetRef) 

213 else: 

214 if butler.datastore.exists(ref): 

215 existingRefs.append(ref) 

216 else: 

217 missingRefs.append(datasetRef) 

218 if existingRefs and missingRefs: 

219 # Some outputs exist and some don't, either delete existing ones 

220 # or complain. 

221 _LOG.debug("Partial outputs exist for task %s dataId=%s collection=%s " 

222 "existingRefs=%s missingRefs=%s", 

223 taskDef, quantum.dataId, collection, existingRefs, missingRefs) 

224 if self.clobberOutputs: 

225 _LOG.info("Removing partial outputs for task %s: %s", taskDef, existingRefs) 

226 butler.pruneDatasets(existingRefs, disassociate=True, unstore=True, purge=True) 

227 return False 

228 else: 

229 raise RuntimeError(f"Registry inconsistency while checking for existing outputs:" 

230 f" collection={collection} existingRefs={existingRefs}" 

231 f" missingRefs={missingRefs}") 

232 elif existingRefs: 

233 # complete outputs exist, this is fine only if skipExisting is set 

234 return self.skipExisting 

235 else: 

236 # no outputs exist 

237 return False 

238 

239 def makeTask(self, taskClass, name, config, butler): 

240 """Make new task instance. 

241 

242 Parameters 

243 ---------- 

244 taskClass : `type` 

245 Sub-class of `~lsst.pipe.base.PipelineTask`. 

246 name : `str` 

247 Name for this task. 

248 config : `~lsst.pipe.base.PipelineTaskConfig` 

249 Configuration object for this task 

250 

251 Returns 

252 ------- 

253 task : `~lsst.pipe.base.PipelineTask` 

254 Instance of ``taskClass`` type. 

255 butler : `~lsst.daf.butler.Butler` 

256 Data butler. 

257 """ 

258 # call task factory for that 

259 return self.taskFactory.makeTask(taskClass, name, config, None, butler) 

260 

261 def updatedQuantumInputs(self, quantum, butler, taskDef): 

262 """Update quantum with extra information, returns a new updated 

263 Quantum. 

264 

265 Some methods may require input DatasetRefs to have non-None 

266 ``dataset_id``, but in case of intermediate dataset it may not be 

267 filled during QuantumGraph construction. This method will retrieve 

268 missing info from registry. 

269 

270 Parameters 

271 ---------- 

272 quantum : `~lsst.daf.butler.Quantum` 

273 Single Quantum instance. 

274 butler : `~lsst.daf.butler.Butler` 

275 Data butler. 

276 taskDef : `~lsst.pipe.base.TaskDef` 

277 Task definition structure. 

278 

279 Returns 

280 ------- 

281 update : `~lsst.daf.butler.Quantum` 

282 Updated Quantum instance 

283 """ 

284 anyChanges = False 

285 updatedInputs = defaultdict(list) 

286 for key, refsForDatasetType in quantum.inputs.items(): 

287 newRefsForDatasetType = updatedInputs[key] 

288 for ref in refsForDatasetType: 

289 if ref.id is None: 

290 resolvedRef = butler.registry.findDataset(ref.datasetType, ref.dataId, 

291 collections=butler.collections) 

292 if resolvedRef is None: 

293 _LOG.info("No dataset found for %s", ref) 

294 continue 

295 else: 

296 _LOG.debug("Updated dataset ID for %s", ref) 

297 else: 

298 resolvedRef = ref 

299 # We need to ask datastore if the dataset actually exists 

300 # because the Registry of a local "execution butler" cannot 

301 # know this (because we prepopulate it with all of the datasets 

302 # that might be created). 

303 if butler.datastore.exists(resolvedRef): 

304 newRefsForDatasetType.append(resolvedRef) 

305 if len(newRefsForDatasetType) != len(refsForDatasetType): 

306 anyChanges = True 

307 # If we removed any input datasets, let the task check if it has enough 

308 # to proceed and/or prune related datasets that it also doesn't 

309 # need/produce anymore. It will raise NoWorkFound if it can't run, 

310 # which we'll let propagate up. This is exactly what we run during QG 

311 # generation, because a task shouldn't care whether an input is missing 

312 # because some previous task didn't produce it, or because it just 

313 # wasn't there during QG generation. 

314 updatedInputs = NamedKeyDict[DatasetType, List[DatasetRef]](updatedInputs.items()) 

315 helper = AdjustQuantumHelper(updatedInputs, quantum.outputs) 

316 if anyChanges: 

317 helper.adjust_in_place(taskDef.connections, label=taskDef.label, data_id=quantum.dataId) 

318 return Quantum(taskName=quantum.taskName, 

319 taskClass=quantum.taskClass, 

320 dataId=quantum.dataId, 

321 initInputs=quantum.initInputs, 

322 inputs=helper.inputs, 

323 outputs=helper.outputs 

324 ) 

325 

326 def runQuantum(self, task, quantum, taskDef, butler): 

327 """Execute task on a single quantum. 

328 

329 Parameters 

330 ---------- 

331 task : `~lsst.pipe.base.PipelineTask` 

332 Task object. 

333 quantum : `~lsst.daf.butler.Quantum` 

334 Single Quantum instance. 

335 taskDef : `~lsst.pipe.base.TaskDef` 

336 Task definition structure. 

337 butler : `~lsst.daf.butler.Butler` 

338 Data butler. 

339 """ 

340 # Create a butler that operates in the context of a quantum 

341 butlerQC = ButlerQuantumContext(butler, quantum) 

342 

343 # Get the input and output references for the task 

344 inputRefs, outputRefs = taskDef.connections.buildDatasetRefs(quantum) 

345 

346 # Call task runQuantum() method. Catch a few known failure modes and 

347 # translate them into specific 

348 try: 

349 task.runQuantum(butlerQC, inputRefs, outputRefs) 

350 except NoWorkFound as err: 

351 # Not an error, just an early exit. 

352 _LOG.info("Task '%s' on quantum %s exited early: %s", 

353 taskDef.label, quantum.dataId, str(err)) 

354 pass 

355 except RepeatableQuantumError as err: 

356 if self.exitOnKnownError: 

357 _LOG.warning("Caught repeatable quantum error for %s (%s):", taskDef, quantum.dataId) 

358 _LOG.warning(err, exc_info=True) 

359 sys.exit(err.EXIT_CODE) 

360 else: 

361 raise 

362 except InvalidQuantumError as err: 

363 _LOG.fatal("Invalid quantum error for %s (%s): %s", taskDef, quantum.dataId) 

364 _LOG.fatal(err, exc_info=True) 

365 sys.exit(err.EXIT_CODE) 

366 

367 def writeMetadata(self, quantum, metadata, taskDef, butler): 

368 if taskDef.metadataDatasetName is not None: 

369 # DatasetRef has to be in the Quantum outputs, can lookup by name 

370 try: 

371 ref = quantum.outputs[taskDef.metadataDatasetName] 

372 except LookupError as exc: 

373 raise InvalidQuantumError( 

374 f"Quantum outputs is missing metadata dataset type {taskDef.metadataDatasetName};" 

375 f" this could happen due to inconsistent options between QuantumGraph generation" 

376 f" and execution") from exc 

377 butler.put(metadata, ref[0]) 

378 

379 def initGlobals(self, quantum, butler): 

380 """Initialize global state needed for task execution. 

381 

382 Parameters 

383 ---------- 

384 quantum : `~lsst.daf.butler.Quantum` 

385 Single Quantum instance. 

386 butler : `~lsst.daf.butler.Butler` 

387 Data butler. 

388 

389 Notes 

390 ----- 

391 There is an issue with initializing filters singleton which is done 

392 by instrument, to avoid requiring tasks to do it in runQuantum() 

393 we do it here when any dataId has an instrument dimension. Also for 

394 now we only allow single instrument, verify that all instrument 

395 names in all dataIds are identical. 

396 

397 This will need revision when filter singleton disappears. 

398 """ 

399 oneInstrument = None 

400 for datasetRefs in chain(quantum.inputs.values(), quantum.outputs.values()): 

401 for datasetRef in datasetRefs: 

402 dataId = datasetRef.dataId 

403 instrument = dataId.get("instrument") 

404 if instrument is not None: 

405 if oneInstrument is not None: 

406 assert instrument == oneInstrument, \ 

407 "Currently require that only one instrument is used per graph" 

408 else: 

409 oneInstrument = instrument 

410 Instrument.fromName(instrument, butler.registry)