Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22__all__ = ['SingleQuantumExecutor'] 

23 

24# ------------------------------- 

25# Imports of standard modules -- 

26# ------------------------------- 

27import logging 

28import sys 

29import tempfile 

30import time 

31from contextlib import contextmanager 

32from collections import defaultdict 

33from itertools import chain 

34from logging import FileHandler 

35from typing import List 

36 

37# ----------------------------- 

38# Imports for other modules -- 

39# ----------------------------- 

40from .quantumGraphExecutor import QuantumExecutor 

41from lsst.daf.base import PropertyList, PropertySet 

42from lsst.obs.base import Instrument 

43from lsst.pipe.base import ( 

44 AdjustQuantumHelper, 

45 ButlerQuantumContext, 

46 InvalidQuantumError, 

47 NoWorkFound, 

48 RepeatableQuantumError, 

49 logInfo, 

50) 

51from lsst.daf.butler import ( 

52 DatasetRef, 

53 DatasetType, 

54 FileDataset, 

55 NamedKeyDict, 

56 Quantum, 

57) 

58from lsst.daf.butler.core.logging import ( 

59 ButlerLogRecordHandler, 

60 ButlerMDC, 

61 JsonLogFormatter, 

62) 

63# ---------------------------------- 

64# Local non-exported definitions -- 

65# ---------------------------------- 

66 

67_LOG = logging.getLogger(__name__.partition(".")[2]) 

68 

69 

70class SingleQuantumExecutor(QuantumExecutor): 

71 """Executor class which runs one Quantum at a time. 

72 

73 Parameters 

74 ---------- 

75 butler : `~lsst.daf.butler.Butler` 

76 Data butler. 

77 taskFactory : `~lsst.pipe.base.TaskFactory` 

78 Instance of a task factory. 

79 skipExisting : `bool`, optional 

80 If `True`, then quanta that succeeded will not be rerun. 

81 clobberOutputs : `bool`, optional 

82 If `True`, then existing outputs will be overwritten. If 

83 `skipExisting` is also `True`, only outputs from failed quanta will 

84 be overwritten. 

85 enableLsstDebug : `bool`, optional 

86 Enable debugging with ``lsstDebug`` facility for a task. 

87 exitOnKnownError : `bool`, optional 

88 If `True`, call `sys.exit` with the appropriate exit code for special 

89 known exceptions, after printing a traceback, instead of letting the 

90 exception propagate up to calling. This is always the behavior for 

91 InvalidQuantumError. 

92 """ 

93 

94 stream_json_logs = True 

95 """If True each log record is written to a temporary file and ingested 

96 when quantum completes. If False the records are accumulated in memory 

97 and stored in butler on quantum completion.""" 

98 

99 def __init__(self, taskFactory, skipExisting=False, clobberOutputs=False, enableLsstDebug=False, 

100 exitOnKnownError=False): 

101 self.taskFactory = taskFactory 

102 self.skipExisting = skipExisting 

103 self.enableLsstDebug = enableLsstDebug 

104 self.clobberOutputs = clobberOutputs 

105 self.exitOnKnownError = exitOnKnownError 

106 self.log_handler = None 

107 

108 def execute(self, taskDef, quantum, butler): 

109 # Docstring inherited from QuantumExecutor.execute 

110 startTime = time.time() 

111 

112 with self.captureLogging(taskDef, quantum, butler): 

113 # Save detailed resource usage before task start to metadata. 

114 quantumMetadata = PropertyList() 

115 logInfo(None, "prep", metadata=quantumMetadata) 

116 

117 taskClass, label, config = taskDef.taskClass, taskDef.label, taskDef.config 

118 

119 # check whether to skip or delete old outputs 

120 if self.checkExistingOutputs(quantum, butler, taskDef): 

121 _LOG.info("Skipping already-successful quantum for label=%s dataId=%s.", label, 

122 quantum.dataId) 

123 return 

124 try: 

125 quantum = self.updatedQuantumInputs(quantum, butler, taskDef) 

126 except NoWorkFound as exc: 

127 _LOG.info("Nothing to do for task '%s' on quantum %s; saving metadata and skipping: %s", 

128 taskDef.label, quantum.dataId, str(exc)) 

129 # Make empty metadata that looks something like what a 

130 # do-nothing task would write (but we don't bother with empty 

131 # nested PropertySets for subtasks). This is slightly 

132 # duplicative with logic in pipe_base that we can't easily call 

133 # from here; we'll fix this on DM-29761. 

134 logInfo(None, "end", metadata=quantumMetadata) 

135 fullMetadata = PropertySet() 

136 fullMetadata[taskDef.label] = PropertyList() 

137 fullMetadata["quantum"] = quantumMetadata 

138 self.writeMetadata(quantum, fullMetadata, taskDef, butler) 

139 return 

140 

141 # enable lsstDebug debugging 

142 if self.enableLsstDebug: 

143 try: 

144 _LOG.debug("Will try to import debug.py") 

145 import debug # noqa:F401 

146 except ImportError: 

147 _LOG.warn("No 'debug' module found.") 

148 

149 # initialize global state 

150 self.initGlobals(quantum, butler) 

151 

152 # Ensure that we are executing a frozen config 

153 config.freeze() 

154 logInfo(None, "init", metadata=quantumMetadata) 

155 task = self.makeTask(taskClass, label, config, butler) 

156 logInfo(None, "start", metadata=quantumMetadata) 

157 try: 

158 self.runQuantum(task, quantum, taskDef, butler) 

159 except Exception: 

160 _LOG.exception("Execution of task '%s' on quantum %s failed", 

161 taskDef.label, quantum.dataId) 

162 raise 

163 logInfo(None, "end", metadata=quantumMetadata) 

164 fullMetadata = task.getFullMetadata() 

165 fullMetadata["quantum"] = quantumMetadata 

166 self.writeMetadata(quantum, fullMetadata, taskDef, butler) 

167 stopTime = time.time() 

168 _LOG.info("Execution of task '%s' on quantum %s took %.3f seconds", 

169 taskDef.label, quantum.dataId, stopTime - startTime) 

170 

171 @contextmanager 

172 def captureLogging(self, taskDef, quantum, butler): 

173 """Configure logging system to capture logs for execution of this task. 

174 

175 Parameters 

176 ---------- 

177 taskDef : `lsst.pipe.base.TaskDef` 

178 The task definition. 

179 quantum : `~lsst.daf.butler.Quantum` 

180 Single Quantum instance. 

181 butler : `~lsst.daf.butler.Butler` 

182 Butler to write logs to. 

183 

184 Notes 

185 ----- 

186 Expected to be used as a context manager to ensure that logging 

187 records are inserted into the butler once the quantum has been 

188 executed: 

189 

190 .. code-block:: py 

191 

192 with self.captureLogging(taskDef, quantum, butler): 

193 # Run quantum and capture logs. 

194 

195 Ths method can also setup logging to attach task- or 

196 quantum-specific information to log messages. Potentially this can 

197 take into account some info from task configuration as well. 

198 """ 

199 # Add a handler to the root logger to capture execution log output. 

200 # How does it get removed reliably? 

201 if taskDef.logOutputDatasetName is not None: 

202 # Either accumulate into ButlerLogRecords or stream 

203 # JSON records to file and ingest that. 

204 if self.stream_json_logs: 

205 tmp = tempfile.NamedTemporaryFile(mode="w", 

206 suffix=".json", 

207 prefix=f"butler-log-{taskDef.label}-", 

208 delete=False) 

209 self.log_handler = FileHandler(tmp.name) 

210 tmp.close() 

211 self.log_handler.setFormatter(JsonLogFormatter()) 

212 else: 

213 self.log_handler = ButlerLogRecordHandler() 

214 

215 logging.getLogger().addHandler(self.log_handler) 

216 

217 # include quantum dataId and task label into MDC 

218 label = taskDef.label 

219 if quantum.dataId: 

220 label += f":{quantum.dataId}" 

221 

222 try: 

223 with ButlerMDC.set_mdc({"LABEL": label}): 

224 yield 

225 finally: 

226 # Ensure that the logs are stored in butler. 

227 self.writeLogRecords(quantum, taskDef, butler) 

228 

229 def checkExistingOutputs(self, quantum, butler, taskDef): 

230 """Decide whether this quantum needs to be executed. 

231 

232 If only partial outputs exist then they are removed if 

233 ``clobberOutputs`` is True, otherwise an exception is raised. 

234 

235 Parameters 

236 ---------- 

237 quantum : `~lsst.daf.butler.Quantum` 

238 Quantum to check for existing outputs 

239 butler : `~lsst.daf.butler.Butler` 

240 Data butler. 

241 taskDef : `~lsst.pipe.base.TaskDef` 

242 Task definition structure. 

243 

244 Returns 

245 ------- 

246 exist : `bool` 

247 `True` if ``self.skipExisting`` is `True`, and a previous execution 

248 of this quanta appears to have completed successfully (either 

249 because metadata was written or all datasets were written). 

250 `False` otherwise. 

251 

252 Raises 

253 ------ 

254 RuntimeError 

255 Raised if some outputs exist and some not. 

256 """ 

257 collection = butler.run 

258 registry = butler.registry 

259 

260 if self.skipExisting and taskDef.metadataDatasetName is not None: 

261 # Metadata output exists; this is sufficient to assume the previous 

262 # run was successful and should be skipped. 

263 if (ref := butler.registry.findDataset(taskDef.metadataDatasetName, quantum.dataId)) is not None: 

264 if butler.datastore.exists(ref): 

265 return True 

266 

267 existingRefs = [] 

268 missingRefs = [] 

269 for datasetRefs in quantum.outputs.values(): 

270 for datasetRef in datasetRefs: 

271 ref = registry.findDataset(datasetRef.datasetType, datasetRef.dataId, 

272 collections=butler.run) 

273 if ref is None: 

274 missingRefs.append(datasetRef) 

275 else: 

276 if butler.datastore.exists(ref): 

277 existingRefs.append(ref) 

278 else: 

279 missingRefs.append(datasetRef) 

280 if existingRefs and missingRefs: 

281 # Some outputs exist and some don't, either delete existing ones 

282 # or complain. 

283 _LOG.debug("Partial outputs exist for task %s dataId=%s collection=%s " 

284 "existingRefs=%s missingRefs=%s", 

285 taskDef, quantum.dataId, collection, existingRefs, missingRefs) 

286 if self.clobberOutputs: 

287 _LOG.info("Removing partial outputs for task %s: %s", taskDef, existingRefs) 

288 butler.pruneDatasets(existingRefs, disassociate=True, unstore=True, purge=True) 

289 return False 

290 else: 

291 raise RuntimeError(f"Registry inconsistency while checking for existing outputs:" 

292 f" collection={collection} existingRefs={existingRefs}" 

293 f" missingRefs={missingRefs}") 

294 elif existingRefs: 

295 # complete outputs exist, this is fine only if skipExisting is set 

296 return self.skipExisting 

297 else: 

298 # no outputs exist 

299 return False 

300 

301 def makeTask(self, taskClass, name, config, butler): 

302 """Make new task instance. 

303 

304 Parameters 

305 ---------- 

306 taskClass : `type` 

307 Sub-class of `~lsst.pipe.base.PipelineTask`. 

308 name : `str` 

309 Name for this task. 

310 config : `~lsst.pipe.base.PipelineTaskConfig` 

311 Configuration object for this task 

312 

313 Returns 

314 ------- 

315 task : `~lsst.pipe.base.PipelineTask` 

316 Instance of ``taskClass`` type. 

317 butler : `~lsst.daf.butler.Butler` 

318 Data butler. 

319 """ 

320 # call task factory for that 

321 return self.taskFactory.makeTask(taskClass, name, config, None, butler) 

322 

323 def updatedQuantumInputs(self, quantum, butler, taskDef): 

324 """Update quantum with extra information, returns a new updated 

325 Quantum. 

326 

327 Some methods may require input DatasetRefs to have non-None 

328 ``dataset_id``, but in case of intermediate dataset it may not be 

329 filled during QuantumGraph construction. This method will retrieve 

330 missing info from registry. 

331 

332 Parameters 

333 ---------- 

334 quantum : `~lsst.daf.butler.Quantum` 

335 Single Quantum instance. 

336 butler : `~lsst.daf.butler.Butler` 

337 Data butler. 

338 taskDef : `~lsst.pipe.base.TaskDef` 

339 Task definition structure. 

340 

341 Returns 

342 ------- 

343 update : `~lsst.daf.butler.Quantum` 

344 Updated Quantum instance 

345 """ 

346 anyChanges = False 

347 updatedInputs = defaultdict(list) 

348 for key, refsForDatasetType in quantum.inputs.items(): 

349 newRefsForDatasetType = updatedInputs[key] 

350 for ref in refsForDatasetType: 

351 if ref.id is None: 

352 resolvedRef = butler.registry.findDataset(ref.datasetType, ref.dataId, 

353 collections=butler.collections) 

354 if resolvedRef is None: 

355 _LOG.info("No dataset found for %s", ref) 

356 continue 

357 else: 

358 _LOG.debug("Updated dataset ID for %s", ref) 

359 else: 

360 resolvedRef = ref 

361 # We need to ask datastore if the dataset actually exists 

362 # because the Registry of a local "execution butler" cannot 

363 # know this (because we prepopulate it with all of the datasets 

364 # that might be created). 

365 if butler.datastore.exists(resolvedRef): 

366 newRefsForDatasetType.append(resolvedRef) 

367 if len(newRefsForDatasetType) != len(refsForDatasetType): 

368 anyChanges = True 

369 # If we removed any input datasets, let the task check if it has enough 

370 # to proceed and/or prune related datasets that it also doesn't 

371 # need/produce anymore. It will raise NoWorkFound if it can't run, 

372 # which we'll let propagate up. This is exactly what we run during QG 

373 # generation, because a task shouldn't care whether an input is missing 

374 # because some previous task didn't produce it, or because it just 

375 # wasn't there during QG generation. 

376 updatedInputs = NamedKeyDict[DatasetType, List[DatasetRef]](updatedInputs.items()) 

377 helper = AdjustQuantumHelper(updatedInputs, quantum.outputs) 

378 if anyChanges: 

379 helper.adjust_in_place(taskDef.connections, label=taskDef.label, data_id=quantum.dataId) 

380 return Quantum(taskName=quantum.taskName, 

381 taskClass=quantum.taskClass, 

382 dataId=quantum.dataId, 

383 initInputs=quantum.initInputs, 

384 inputs=helper.inputs, 

385 outputs=helper.outputs 

386 ) 

387 

388 def runQuantum(self, task, quantum, taskDef, butler): 

389 """Execute task on a single quantum. 

390 

391 Parameters 

392 ---------- 

393 task : `~lsst.pipe.base.PipelineTask` 

394 Task object. 

395 quantum : `~lsst.daf.butler.Quantum` 

396 Single Quantum instance. 

397 taskDef : `~lsst.pipe.base.TaskDef` 

398 Task definition structure. 

399 butler : `~lsst.daf.butler.Butler` 

400 Data butler. 

401 """ 

402 # Create a butler that operates in the context of a quantum 

403 butlerQC = ButlerQuantumContext(butler, quantum) 

404 

405 # Get the input and output references for the task 

406 inputRefs, outputRefs = taskDef.connections.buildDatasetRefs(quantum) 

407 

408 # Call task runQuantum() method. Catch a few known failure modes and 

409 # translate them into specific 

410 try: 

411 task.runQuantum(butlerQC, inputRefs, outputRefs) 

412 except NoWorkFound as err: 

413 # Not an error, just an early exit. 

414 _LOG.info("Task '%s' on quantum %s exited early: %s", 

415 taskDef.label, quantum.dataId, str(err)) 

416 pass 

417 except RepeatableQuantumError as err: 

418 if self.exitOnKnownError: 

419 _LOG.warning("Caught repeatable quantum error for %s (%s):", taskDef, quantum.dataId) 

420 _LOG.warning(err, exc_info=True) 

421 sys.exit(err.EXIT_CODE) 

422 else: 

423 raise 

424 except InvalidQuantumError as err: 

425 _LOG.fatal("Invalid quantum error for %s (%s): %s", taskDef, quantum.dataId) 

426 _LOG.fatal(err, exc_info=True) 

427 sys.exit(err.EXIT_CODE) 

428 

429 def writeMetadata(self, quantum, metadata, taskDef, butler): 

430 if taskDef.metadataDatasetName is not None: 

431 # DatasetRef has to be in the Quantum outputs, can lookup by name 

432 try: 

433 ref = quantum.outputs[taskDef.metadataDatasetName] 

434 except LookupError as exc: 

435 raise InvalidQuantumError( 

436 f"Quantum outputs is missing metadata dataset type {taskDef.metadataDatasetName};" 

437 f" this could happen due to inconsistent options between QuantumGraph generation" 

438 f" and execution") from exc 

439 butler.put(metadata, ref[0]) 

440 

441 def writeLogRecords(self, quantum, taskDef, butler): 

442 # If we are logging to an external file we must always try to 

443 # close it. 

444 filename = None 

445 if isinstance(self.log_handler, FileHandler): 

446 filename = self.log_handler.stream.name 

447 self.log_handler.close() 

448 

449 if self.log_handler is not None: 

450 # Remove the handler so we stop accumulating log messages. 

451 logging.getLogger().removeHandler(self.log_handler) 

452 

453 if taskDef.logOutputDatasetName is not None and self.log_handler is not None: 

454 # DatasetRef has to be in the Quantum outputs, can lookup by name 

455 try: 

456 ref = quantum.outputs[taskDef.logOutputDatasetName] 

457 except LookupError as exc: 

458 raise InvalidQuantumError( 

459 f"Quantum outputs is missing log output dataset type {taskDef.logOutputDatasetName};" 

460 f" this could happen due to inconsistent options between QuantumGraph generation" 

461 f" and execution") from exc 

462 

463 if isinstance(self.log_handler, ButlerLogRecordHandler): 

464 butler.put(self.log_handler.records, ref[0]) 

465 

466 # Clear the records in case the handler is reused. 

467 self.log_handler.records.clear() 

468 else: 

469 assert filename is not None, "Somehow unable to extract filename from file handler" 

470 

471 # Need to ingest this file directly into butler. 

472 dataset = FileDataset(path=filename, refs=ref[0]) 

473 try: 

474 butler.ingest(dataset, transfer="move") 

475 except NotImplementedError: 

476 # Some datastores can't receive files (e.g. in-memory 

477 # datastore when testing) so skip log storage for those. 

478 # Alternative is to read the file as a ButlerLogRecords 

479 # object and put it. 

480 _LOG.info("Log records could not be stored in this butler because the" 

481 " datastore can not ingest files.") 

482 pass 

483 

484 def initGlobals(self, quantum, butler): 

485 """Initialize global state needed for task execution. 

486 

487 Parameters 

488 ---------- 

489 quantum : `~lsst.daf.butler.Quantum` 

490 Single Quantum instance. 

491 butler : `~lsst.daf.butler.Butler` 

492 Data butler. 

493 

494 Notes 

495 ----- 

496 There is an issue with initializing filters singleton which is done 

497 by instrument, to avoid requiring tasks to do it in runQuantum() 

498 we do it here when any dataId has an instrument dimension. Also for 

499 now we only allow single instrument, verify that all instrument 

500 names in all dataIds are identical. 

501 

502 This will need revision when filter singleton disappears. 

503 """ 

504 oneInstrument = None 

505 for datasetRefs in chain(quantum.inputs.values(), quantum.outputs.values()): 

506 for datasetRef in datasetRefs: 

507 dataId = datasetRef.dataId 

508 instrument = dataId.get("instrument") 

509 if instrument is not None: 

510 if oneInstrument is not None: 

511 assert instrument == oneInstrument, \ 

512 "Currently require that only one instrument is used per graph" 

513 else: 

514 oneInstrument = instrument 

515 Instrument.fromName(instrument, butler.registry)