Coverage for python/lsst/ctrl/mpexec/singleQuantumExecutor.py: 11%

288 statements  

« prev     ^ index     » next       coverage.py v6.4.1, created at 2022-06-09 03:00 -0700

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22__all__ = ["SingleQuantumExecutor"] 

23 

24# ------------------------------- 

25# Imports of standard modules -- 

26# ------------------------------- 

27import logging 

28import os 

29import shutil 

30import sys 

31import tempfile 

32import time 

33from collections import defaultdict 

34from contextlib import contextmanager 

35from itertools import chain 

36from logging import FileHandler 

37from typing import Any, Iterator, Optional, Union 

38 

39from lsst.daf.butler import Butler, DatasetRef, DatasetType, FileDataset, NamedKeyDict, Quantum 

40from lsst.daf.butler.core.logging import ButlerLogRecordHandler, ButlerLogRecords, ButlerMDC, JsonLogFormatter 

41from lsst.pipe.base import ( 

42 AdjustQuantumHelper, 

43 ButlerQuantumContext, 

44 Instrument, 

45 InvalidQuantumError, 

46 NoWorkFound, 

47 PipelineTask, 

48 PipelineTaskConfig, 

49 RepeatableQuantumError, 

50 TaskDef, 

51 TaskFactory, 

52) 

53from lsst.pipe.base.configOverrides import ConfigOverrides 

54 

55# During metadata transition phase, determine metadata class by 

56# asking pipe_base 

57from lsst.pipe.base.task import _TASK_FULL_METADATA_TYPE, _TASK_METADATA_TYPE 

58from lsst.utils.timer import logInfo 

59 

60# ----------------------------- 

61# Imports for other modules -- 

62# ----------------------------- 

63from .cli.utils import _PipelineAction 

64from .mock_task import MockButlerQuantumContext, MockPipelineTask 

65from .quantumGraphExecutor import QuantumExecutor 

66from .reports import QuantumReport 

67 

68# ---------------------------------- 

69# Local non-exported definitions -- 

70# ---------------------------------- 

71 

72_LOG = logging.getLogger(__name__) 

73 

74 

75class _LogCaptureFlag: 

76 """Simple flag to enable/disable log-to-butler saving.""" 

77 

78 store: bool = True 

79 

80 

81class SingleQuantumExecutor(QuantumExecutor): 

82 """Executor class which runs one Quantum at a time. 

83 

84 Parameters 

85 ---------- 

86 butler : `~lsst.daf.butler.Butler` 

87 Data butler. 

88 taskFactory : `~lsst.pipe.base.TaskFactory` 

89 Instance of a task factory. 

90 skipExistingIn : `list` [ `str` ], optional 

91 Accepts list of collections, if all Quantum outputs already exist in 

92 the specified list of collections then that Quantum will not be rerun. 

93 clobberOutputs : `bool`, optional 

94 If `True`, then existing outputs in output run collection will be 

95 overwritten. If ``skipExistingIn`` is defined, only outputs from 

96 failed quanta will be overwritten. 

97 enableLsstDebug : `bool`, optional 

98 Enable debugging with ``lsstDebug`` facility for a task. 

99 exitOnKnownError : `bool`, optional 

100 If `True`, call `sys.exit` with the appropriate exit code for special 

101 known exceptions, after printing a traceback, instead of letting the 

102 exception propagate up to calling. This is always the behavior for 

103 InvalidQuantumError. 

104 mock : `bool`, optional 

105 If `True` then mock task execution. 

106 mock_configs : `list` [ `_PipelineAction` ], optional 

107 Optional config overrides for mock tasks. 

108 """ 

109 

110 stream_json_logs = True 

111 """If True each log record is written to a temporary file and ingested 

112 when quantum completes. If False the records are accumulated in memory 

113 and stored in butler on quantum completion.""" 

114 

115 def __init__( 

116 self, 

117 taskFactory: TaskFactory, 

118 skipExistingIn: Optional[list[str]] = None, 

119 clobberOutputs: bool = False, 

120 enableLsstDebug: bool = False, 

121 exitOnKnownError: bool = False, 

122 mock: bool = False, 

123 mock_configs: Optional[list[_PipelineAction]] = None, 

124 ): 

125 self.taskFactory = taskFactory 

126 self.skipExistingIn = skipExistingIn 

127 self.enableLsstDebug = enableLsstDebug 

128 self.clobberOutputs = clobberOutputs 

129 self.exitOnKnownError = exitOnKnownError 

130 self.mock = mock 

131 self.mock_configs = mock_configs if mock_configs is not None else [] 

132 self.log_handler: Optional[logging.Handler] = None 

133 self.report: Optional[QuantumReport] = None 

134 

135 def execute(self, taskDef: TaskDef, quantum: Quantum, butler: Butler) -> Quantum: 

136 # Docstring inherited from QuantumExecutor.execute 

137 

138 # Catch any exception and make a report based on that. 

139 try: 

140 result = self._execute(taskDef, quantum, butler) 

141 self.report = QuantumReport(dataId=quantum.dataId, taskLabel=taskDef.label) 

142 return result 

143 except Exception as exc: 

144 assert quantum.dataId is not None, "Quantum DataId cannot be None" 

145 self.report = QuantumReport.from_exception( 

146 exception=exc, 

147 dataId=quantum.dataId, 

148 taskLabel=taskDef.label, 

149 ) 

150 raise 

151 

152 def _execute(self, taskDef: TaskDef, quantum: Quantum, butler: Butler) -> Quantum: 

153 """Internal implementation of execute()""" 

154 startTime = time.time() 

155 

156 with self.captureLogging(taskDef, quantum, butler) as captureLog: 

157 

158 # Save detailed resource usage before task start to metadata. 

159 quantumMetadata = _TASK_METADATA_TYPE() 

160 logInfo(None, "prep", metadata=quantumMetadata) # type: ignore 

161 

162 taskClass, label, config = taskDef.taskClass, taskDef.label, taskDef.config 

163 

164 # check whether to skip or delete old outputs, if it returns True 

165 # or raises an exception do not try to store logs, as they may be 

166 # already in butler. 

167 captureLog.store = False 

168 if self.checkExistingOutputs(quantum, butler, taskDef): 

169 _LOG.info( 

170 "Skipping already-successful quantum for label=%s dataId=%s.", label, quantum.dataId 

171 ) 

172 return quantum 

173 captureLog.store = True 

174 

175 try: 

176 quantum = self.updatedQuantumInputs(quantum, butler, taskDef) 

177 except NoWorkFound as exc: 

178 _LOG.info( 

179 "Nothing to do for task '%s' on quantum %s; saving metadata and skipping: %s", 

180 taskDef.label, 

181 quantum.dataId, 

182 str(exc), 

183 ) 

184 # Make empty metadata that looks something like what a 

185 # do-nothing task would write (but we don't bother with empty 

186 # nested PropertySets for subtasks). This is slightly 

187 # duplicative with logic in pipe_base that we can't easily call 

188 # from here; we'll fix this on DM-29761. 

189 logInfo(None, "end", metadata=quantumMetadata) # type: ignore 

190 fullMetadata = _TASK_FULL_METADATA_TYPE() 

191 fullMetadata[taskDef.label] = _TASK_METADATA_TYPE() 

192 fullMetadata["quantum"] = quantumMetadata 

193 self.writeMetadata(quantum, fullMetadata, taskDef, butler) 

194 return quantum 

195 

196 # enable lsstDebug debugging 

197 if self.enableLsstDebug: 

198 try: 

199 _LOG.debug("Will try to import debug.py") 

200 import debug # type: ignore # noqa:F401 

201 except ImportError: 

202 _LOG.warn("No 'debug' module found.") 

203 

204 # initialize global state 

205 self.initGlobals(quantum, butler) 

206 

207 # Ensure that we are executing a frozen config 

208 config.freeze() 

209 logInfo(None, "init", metadata=quantumMetadata) # type: ignore 

210 task = self.makeTask(taskClass, label, config, butler) 

211 logInfo(None, "start", metadata=quantumMetadata) # type: ignore 

212 try: 

213 if self.mock: 

214 # Use mock task instance to execute method. 

215 runTask = self._makeMockTask(taskDef) 

216 else: 

217 runTask = task 

218 self.runQuantum(runTask, quantum, taskDef, butler) 

219 except Exception as e: 

220 _LOG.error( 

221 "Execution of task '%s' on quantum %s failed. Exception %s: %s", 

222 taskDef.label, 

223 quantum.dataId, 

224 e.__class__.__name__, 

225 str(e), 

226 ) 

227 raise 

228 logInfo(None, "end", metadata=quantumMetadata) # type: ignore 

229 fullMetadata = task.getFullMetadata() 

230 fullMetadata["quantum"] = quantumMetadata 

231 self.writeMetadata(quantum, fullMetadata, taskDef, butler) 

232 stopTime = time.time() 

233 _LOG.info( 

234 "Execution of task '%s' on quantum %s took %.3f seconds", 

235 taskDef.label, 

236 quantum.dataId, 

237 stopTime - startTime, 

238 ) 

239 return quantum 

240 

241 def _makeMockTask(self, taskDef: TaskDef) -> PipelineTask: 

242 """Make an instance of mock task for given TaskDef.""" 

243 # Make config instance and apply overrides 

244 overrides = ConfigOverrides() 

245 for action in self.mock_configs: 

246 if action.label == taskDef.label + "-mock": 

247 if action.action == "config": 

248 key, _, value = action.value.partition("=") 

249 overrides.addValueOverride(key, value) 

250 elif action.action == "configfile": 

251 overrides.addFileOverride(os.path.expandvars(action.value)) 

252 else: 

253 raise ValueError(f"Unexpected action for mock task config overrides: {action}") 

254 config = MockPipelineTask.ConfigClass() 

255 overrides.applyTo(config) 

256 

257 task = MockPipelineTask(config=config, name=taskDef.label) 

258 return task 

259 

260 @contextmanager 

261 def captureLogging(self, taskDef: TaskDef, quantum: Quantum, butler: Butler) -> Iterator: 

262 """Configure logging system to capture logs for execution of this task. 

263 

264 Parameters 

265 ---------- 

266 taskDef : `lsst.pipe.base.TaskDef` 

267 The task definition. 

268 quantum : `~lsst.daf.butler.Quantum` 

269 Single Quantum instance. 

270 butler : `~lsst.daf.butler.Butler` 

271 Butler to write logs to. 

272 

273 Notes 

274 ----- 

275 Expected to be used as a context manager to ensure that logging 

276 records are inserted into the butler once the quantum has been 

277 executed: 

278 

279 .. code-block:: py 

280 

281 with self.captureLogging(taskDef, quantum, butler): 

282 # Run quantum and capture logs. 

283 

284 Ths method can also setup logging to attach task- or 

285 quantum-specific information to log messages. Potentially this can 

286 take into account some info from task configuration as well. 

287 """ 

288 # Add a handler to the root logger to capture execution log output. 

289 # How does it get removed reliably? 

290 if taskDef.logOutputDatasetName is not None: 

291 # Either accumulate into ButlerLogRecords or stream 

292 # JSON records to file and ingest that. 

293 tmpdir = None 

294 if self.stream_json_logs: 

295 # Create the log file in a temporary directory rather than 

296 # creating a temporary file. This is necessary because 

297 # temporary files are created with restrictive permissions 

298 # and during file ingest these permissions persist in the 

299 # datastore. Using a temp directory allows us to create 

300 # a file with umask default permissions. 

301 tmpdir = tempfile.mkdtemp(prefix="butler-temp-logs-") 

302 

303 # Construct a file to receive the log records and "touch" it. 

304 log_file = os.path.join(tmpdir, f"butler-log-{taskDef.label}.json") 

305 with open(log_file, "w"): 

306 pass 

307 self.log_handler = FileHandler(log_file) 

308 self.log_handler.setFormatter(JsonLogFormatter()) 

309 else: 

310 self.log_handler = ButlerLogRecordHandler() 

311 

312 logging.getLogger().addHandler(self.log_handler) 

313 

314 # include quantum dataId and task label into MDC 

315 label = taskDef.label 

316 if quantum.dataId: 

317 label += f":{quantum.dataId}" 

318 

319 ctx = _LogCaptureFlag() 

320 try: 

321 with ButlerMDC.set_mdc({"LABEL": label, "RUN": butler.run or ""}): 

322 yield ctx 

323 finally: 

324 # Ensure that the logs are stored in butler. 

325 self.writeLogRecords(quantum, taskDef, butler, ctx.store) 

326 if tmpdir: 

327 shutil.rmtree(tmpdir, ignore_errors=True) 

328 

329 def checkExistingOutputs(self, quantum: Quantum, butler: Butler, taskDef: TaskDef) -> bool: 

330 """Decide whether this quantum needs to be executed. 

331 

332 If only partial outputs exist then they are removed if 

333 ``clobberOutputs`` is True, otherwise an exception is raised. 

334 

335 Parameters 

336 ---------- 

337 quantum : `~lsst.daf.butler.Quantum` 

338 Quantum to check for existing outputs 

339 butler : `~lsst.daf.butler.Butler` 

340 Data butler. 

341 taskDef : `~lsst.pipe.base.TaskDef` 

342 Task definition structure. 

343 

344 Returns 

345 ------- 

346 exist : `bool` 

347 `True` if ``self.skipExistingIn`` is defined, and a previous 

348 execution of this quanta appears to have completed successfully 

349 (either because metadata was written or all datasets were written). 

350 `False` otherwise. 

351 

352 Raises 

353 ------ 

354 RuntimeError 

355 Raised if some outputs exist and some not. 

356 """ 

357 if self.skipExistingIn and taskDef.metadataDatasetName is not None: 

358 # Metadata output exists; this is sufficient to assume the previous 

359 # run was successful and should be skipped. 

360 ref = butler.registry.findDataset( 

361 taskDef.metadataDatasetName, quantum.dataId, collections=self.skipExistingIn 

362 ) 

363 if ref is not None: 

364 if butler.datastore.exists(ref): 

365 return True 

366 

367 # Previously we always checked for existing outputs in `butler.run`, 

368 # now logic gets more complicated as we only want to skip quantum 

369 # whose outputs exist in `self.skipExistingIn` but pruning should only 

370 # be done for outputs existing in `butler.run`. 

371 

372 def findOutputs( 

373 collections: Optional[Union[str, list[str]]] 

374 ) -> tuple[list[DatasetRef], list[DatasetRef]]: 

375 """Find quantum outputs in specified collections.""" 

376 existingRefs = [] 

377 missingRefs = [] 

378 for datasetRefs in quantum.outputs.values(): 

379 checkRefs: list[DatasetRef] = [] 

380 registryRefToQuantumRef: dict[DatasetRef, DatasetRef] = {} 

381 for datasetRef in datasetRefs: 

382 ref = butler.registry.findDataset( 

383 datasetRef.datasetType, datasetRef.dataId, collections=collections 

384 ) 

385 if ref is None: 

386 missingRefs.append(datasetRef) 

387 else: 

388 checkRefs.append(ref) 

389 registryRefToQuantumRef[ref] = datasetRef 

390 

391 # More efficient to ask the datastore in bulk for ref 

392 # existence rather than one at a time. 

393 existence = butler.datastore.mexists(checkRefs) 

394 for ref, exists in existence.items(): 

395 if exists: 

396 existingRefs.append(ref) 

397 else: 

398 missingRefs.append(registryRefToQuantumRef[ref]) 

399 return existingRefs, missingRefs 

400 

401 existingRefs, missingRefs = findOutputs(self.skipExistingIn) 

402 if self.skipExistingIn: 

403 if existingRefs and not missingRefs: 

404 # everything is already there 

405 return True 

406 

407 # If we are to re-run quantum then prune datasets that exists in 

408 # output run collection, only if `self.clobberOutputs` is set. 

409 if existingRefs: 

410 existingRefs, missingRefs = findOutputs(butler.run) 

411 if existingRefs and missingRefs: 

412 _LOG.debug( 

413 "Partial outputs exist for task %s dataId=%s collection=%s " 

414 "existingRefs=%s missingRefs=%s", 

415 taskDef, 

416 quantum.dataId, 

417 butler.run, 

418 existingRefs, 

419 missingRefs, 

420 ) 

421 if self.clobberOutputs: 

422 # only prune 

423 _LOG.info("Removing partial outputs for task %s: %s", taskDef, existingRefs) 

424 # Do not purge registry records if this looks like 

425 # an execution butler. This ensures that the UUID 

426 # of the dataset doesn't change. 

427 if butler._allow_put_of_predefined_dataset: 

428 purge = False 

429 disassociate = False 

430 else: 

431 purge = True 

432 disassociate = True 

433 butler.pruneDatasets(existingRefs, disassociate=disassociate, unstore=True, purge=purge) 

434 return False 

435 else: 

436 raise RuntimeError( 

437 f"Registry inconsistency while checking for existing outputs:" 

438 f" collection={butler.run} existingRefs={existingRefs}" 

439 f" missingRefs={missingRefs}" 

440 ) 

441 

442 # need to re-run 

443 return False 

444 

445 def makeTask( 

446 self, taskClass: type[PipelineTask], name: str, config: PipelineTaskConfig, butler: Butler 

447 ) -> PipelineTask: 

448 """Make new task instance. 

449 

450 Parameters 

451 ---------- 

452 taskClass : `type` 

453 Sub-class of `~lsst.pipe.base.PipelineTask`. 

454 name : `str` 

455 Name for this task. 

456 config : `~lsst.pipe.base.PipelineTaskConfig` 

457 Configuration object for this task 

458 

459 Returns 

460 ------- 

461 task : `~lsst.pipe.base.PipelineTask` 

462 Instance of ``taskClass`` type. 

463 butler : `~lsst.daf.butler.Butler` 

464 Data butler. 

465 """ 

466 # call task factory for that 

467 return self.taskFactory.makeTask(taskClass, name, config, None, butler) 

468 

469 def updatedQuantumInputs(self, quantum: Quantum, butler: Butler, taskDef: TaskDef) -> Quantum: 

470 """Update quantum with extra information, returns a new updated 

471 Quantum. 

472 

473 Some methods may require input DatasetRefs to have non-None 

474 ``dataset_id``, but in case of intermediate dataset it may not be 

475 filled during QuantumGraph construction. This method will retrieve 

476 missing info from registry. 

477 

478 Parameters 

479 ---------- 

480 quantum : `~lsst.daf.butler.Quantum` 

481 Single Quantum instance. 

482 butler : `~lsst.daf.butler.Butler` 

483 Data butler. 

484 taskDef : `~lsst.pipe.base.TaskDef` 

485 Task definition structure. 

486 

487 Returns 

488 ------- 

489 update : `~lsst.daf.butler.Quantum` 

490 Updated Quantum instance 

491 """ 

492 anyChanges = False 

493 updatedInputs: defaultdict[DatasetType, list] = defaultdict(list) 

494 for key, refsForDatasetType in quantum.inputs.items(): 

495 newRefsForDatasetType = updatedInputs[key] 

496 for ref in refsForDatasetType: 

497 if ref.id is None: 

498 resolvedRef = butler.registry.findDataset( 

499 ref.datasetType, ref.dataId, collections=butler.collections 

500 ) 

501 if resolvedRef is None: 

502 _LOG.info("No dataset found for %s", ref) 

503 continue 

504 else: 

505 _LOG.debug("Updated dataset ID for %s", ref) 

506 else: 

507 resolvedRef = ref 

508 # We need to ask datastore if the dataset actually exists 

509 # because the Registry of a local "execution butler" cannot 

510 # know this (because we prepopulate it with all of the datasets 

511 # that might be created). In case of mock execution we check 

512 # that mock dataset exists instead. 

513 if self.mock: 

514 try: 

515 typeName, component = ref.datasetType.nameAndComponent() 

516 if component is not None: 

517 mockDatasetTypeName = MockButlerQuantumContext.mockDatasetTypeName(typeName) 

518 else: 

519 mockDatasetTypeName = MockButlerQuantumContext.mockDatasetTypeName( 

520 ref.datasetType.name 

521 ) 

522 

523 mockDatasetType = butler.registry.getDatasetType(mockDatasetTypeName) 

524 except KeyError: 

525 # means that mock dataset type is not there and this 

526 # should be a pre-existing dataset 

527 _LOG.debug("No mock dataset type for %s", ref) 

528 if butler.datastore.exists(resolvedRef): 

529 newRefsForDatasetType.append(resolvedRef) 

530 else: 

531 mockRef = DatasetRef(mockDatasetType, ref.dataId) 

532 resolvedMockRef = butler.registry.findDataset( 

533 mockRef.datasetType, mockRef.dataId, collections=butler.collections 

534 ) 

535 _LOG.debug("mockRef=%s resolvedMockRef=%s", mockRef, resolvedMockRef) 

536 if resolvedMockRef is not None and butler.datastore.exists(resolvedMockRef): 

537 _LOG.debug("resolvedMockRef dataset exists") 

538 newRefsForDatasetType.append(resolvedRef) 

539 elif butler.datastore.exists(resolvedRef): 

540 newRefsForDatasetType.append(resolvedRef) 

541 

542 if len(newRefsForDatasetType) != len(refsForDatasetType): 

543 anyChanges = True 

544 # If we removed any input datasets, let the task check if it has enough 

545 # to proceed and/or prune related datasets that it also doesn't 

546 # need/produce anymore. It will raise NoWorkFound if it can't run, 

547 # which we'll let propagate up. This is exactly what we run during QG 

548 # generation, because a task shouldn't care whether an input is missing 

549 # because some previous task didn't produce it, or because it just 

550 # wasn't there during QG generation. 

551 namedUpdatedInputs = NamedKeyDict[DatasetType, list[DatasetRef]](updatedInputs.items()) 

552 helper = AdjustQuantumHelper(namedUpdatedInputs, quantum.outputs) 

553 if anyChanges: 

554 assert quantum.dataId is not None, "Quantum DataId cannot be None" 

555 helper.adjust_in_place(taskDef.connections, label=taskDef.label, data_id=quantum.dataId) 

556 return Quantum( 

557 taskName=quantum.taskName, 

558 taskClass=quantum.taskClass, 

559 dataId=quantum.dataId, 

560 initInputs=quantum.initInputs, 

561 inputs=helper.inputs, 

562 outputs=helper.outputs, 

563 ) 

564 

565 def runQuantum(self, task: PipelineTask, quantum: Quantum, taskDef: TaskDef, butler: Butler) -> None: 

566 """Execute task on a single quantum. 

567 

568 Parameters 

569 ---------- 

570 task : `~lsst.pipe.base.PipelineTask` 

571 Task object. 

572 quantum : `~lsst.daf.butler.Quantum` 

573 Single Quantum instance. 

574 taskDef : `~lsst.pipe.base.TaskDef` 

575 Task definition structure. 

576 butler : `~lsst.daf.butler.Butler` 

577 Data butler. 

578 """ 

579 # Create a butler that operates in the context of a quantum 

580 if not self.mock: 

581 butlerQC = ButlerQuantumContext(butler, quantum) 

582 else: 

583 butlerQC = MockButlerQuantumContext(butler, quantum) 

584 

585 # Get the input and output references for the task 

586 inputRefs, outputRefs = taskDef.connections.buildDatasetRefs(quantum) 

587 

588 # Call task runQuantum() method. Catch a few known failure modes and 

589 # translate them into specific 

590 try: 

591 task.runQuantum(butlerQC, inputRefs, outputRefs) 

592 except NoWorkFound as err: 

593 # Not an error, just an early exit. 

594 _LOG.info("Task '%s' on quantum %s exited early: %s", taskDef.label, quantum.dataId, str(err)) 

595 pass 

596 except RepeatableQuantumError as err: 

597 if self.exitOnKnownError: 

598 _LOG.warning("Caught repeatable quantum error for %s (%s):", taskDef, quantum.dataId) 

599 _LOG.warning(err, exc_info=True) 

600 sys.exit(err.EXIT_CODE) 

601 else: 

602 raise 

603 except InvalidQuantumError as err: 

604 _LOG.fatal("Invalid quantum error for %s (%s): %s", taskDef, quantum.dataId) 

605 _LOG.fatal(err, exc_info=True) 

606 sys.exit(err.EXIT_CODE) 

607 

608 def writeMetadata(self, quantum: Quantum, metadata: Any, taskDef: TaskDef, butler: Butler) -> None: 

609 if taskDef.metadataDatasetName is not None: 

610 # DatasetRef has to be in the Quantum outputs, can lookup by name 

611 try: 

612 ref = quantum.outputs[taskDef.metadataDatasetName] 

613 except LookupError as exc: 

614 raise InvalidQuantumError( 

615 f"Quantum outputs is missing metadata dataset type {taskDef.metadataDatasetName};" 

616 f" this could happen due to inconsistent options between QuantumGraph generation" 

617 f" and execution" 

618 ) from exc 

619 butler.put(metadata, ref[0]) 

620 

621 def writeLogRecords(self, quantum: Quantum, taskDef: TaskDef, butler: Butler, store: bool) -> None: 

622 # If we are logging to an external file we must always try to 

623 # close it. 

624 filename = None 

625 if isinstance(self.log_handler, FileHandler): 

626 filename = self.log_handler.stream.name 

627 self.log_handler.close() 

628 

629 if self.log_handler is not None: 

630 # Remove the handler so we stop accumulating log messages. 

631 logging.getLogger().removeHandler(self.log_handler) 

632 

633 try: 

634 if store and taskDef.logOutputDatasetName is not None and self.log_handler is not None: 

635 # DatasetRef has to be in the Quantum outputs, can lookup by 

636 # name 

637 try: 

638 ref = quantum.outputs[taskDef.logOutputDatasetName] 

639 except LookupError as exc: 

640 raise InvalidQuantumError( 

641 f"Quantum outputs is missing log output dataset type {taskDef.logOutputDatasetName};" 

642 f" this could happen due to inconsistent options between QuantumGraph generation" 

643 f" and execution" 

644 ) from exc 

645 

646 if isinstance(self.log_handler, ButlerLogRecordHandler): 

647 butler.put(self.log_handler.records, ref[0]) 

648 

649 # Clear the records in case the handler is reused. 

650 self.log_handler.records.clear() 

651 else: 

652 assert filename is not None, "Somehow unable to extract filename from file handler" 

653 

654 # Need to ingest this file directly into butler. 

655 dataset = FileDataset(path=filename, refs=ref[0]) 

656 try: 

657 butler.ingest(dataset, transfer="move") 

658 filename = None 

659 except NotImplementedError: 

660 # Some datastores can't receive files (e.g. in-memory 

661 # datastore when testing), we store empty list for 

662 # those just to have a dataset. Alternative is to read 

663 # the file as a ButlerLogRecords object and put it. 

664 _LOG.info( 

665 "Log records could not be stored in this butler because the" 

666 " datastore can not ingest files, empty record list is stored instead." 

667 ) 

668 records = ButlerLogRecords.from_records([]) 

669 butler.put(records, ref[0]) 

670 finally: 

671 # remove file if it is not ingested 

672 if filename is not None: 

673 try: 

674 os.remove(filename) 

675 except OSError: 

676 pass 

677 

678 def initGlobals(self, quantum: Quantum, butler: Butler) -> None: 

679 """Initialize global state needed for task execution. 

680 

681 Parameters 

682 ---------- 

683 quantum : `~lsst.daf.butler.Quantum` 

684 Single Quantum instance. 

685 butler : `~lsst.daf.butler.Butler` 

686 Data butler. 

687 

688 Notes 

689 ----- 

690 There is an issue with initializing filters singleton which is done 

691 by instrument, to avoid requiring tasks to do it in runQuantum() 

692 we do it here when any dataId has an instrument dimension. Also for 

693 now we only allow single instrument, verify that all instrument 

694 names in all dataIds are identical. 

695 

696 This will need revision when filter singleton disappears. 

697 """ 

698 oneInstrument = None 

699 for datasetRefs in chain(quantum.inputs.values(), quantum.outputs.values()): 

700 for datasetRef in datasetRefs: 

701 dataId = datasetRef.dataId 

702 instrument = dataId.get("instrument") 

703 if instrument is not None: 

704 if oneInstrument is not None: 

705 assert ( # type: ignore 

706 instrument == oneInstrument 

707 ), "Currently require that only one instrument is used per graph" 

708 else: 

709 oneInstrument = instrument 

710 Instrument.fromName(instrument, butler.registry) 

711 

712 def getReport(self) -> Optional[QuantumReport]: 

713 # Docstring inherited from base class 

714 if self.report is None: 

715 raise RuntimeError("getReport() called before execute()") 

716 return self.report