Coverage for python/lsst/ctrl/mpexec/singleQuantumExecutor.py: 11%

283 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-13 16:33 -0700

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22__all__ = ["SingleQuantumExecutor"] 

23 

24# ------------------------------- 

25# Imports of standard modules -- 

26# ------------------------------- 

27import logging 

28import os 

29import shutil 

30import sys 

31import tempfile 

32import time 

33from collections import defaultdict 

34from contextlib import contextmanager 

35from itertools import chain 

36from logging import FileHandler 

37from typing import Any, Iterator, Optional, Union 

38 

39from lsst.daf.butler import Butler, DatasetRef, DatasetType, FileDataset, NamedKeyDict, Quantum 

40from lsst.daf.butler.core.logging import ButlerLogRecordHandler, ButlerLogRecords, ButlerMDC, JsonLogFormatter 

41from lsst.pipe.base import ( 

42 AdjustQuantumHelper, 

43 ButlerQuantumContext, 

44 Instrument, 

45 InvalidQuantumError, 

46 NoWorkFound, 

47 PipelineTask, 

48 PipelineTaskConfig, 

49 RepeatableQuantumError, 

50 TaskDef, 

51 TaskFactory, 

52) 

53from lsst.pipe.base.configOverrides import ConfigOverrides 

54 

55# During metadata transition phase, determine metadata class by 

56# asking pipe_base 

57from lsst.pipe.base.task import _TASK_FULL_METADATA_TYPE, _TASK_METADATA_TYPE 

58from lsst.utils.timer import logInfo 

59 

60# ----------------------------- 

61# Imports for other modules -- 

62# ----------------------------- 

63from .cli.utils import _PipelineAction 

64from .mock_task import MockButlerQuantumContext, MockPipelineTask 

65from .quantumGraphExecutor import QuantumExecutor 

66from .reports import QuantumReport 

67 

68# ---------------------------------- 

69# Local non-exported definitions -- 

70# ---------------------------------- 

71 

72_LOG = logging.getLogger(__name__) 

73 

74 

75class _LogCaptureFlag: 

76 """Simple flag to enable/disable log-to-butler saving.""" 

77 

78 store: bool = True 

79 

80 

81class SingleQuantumExecutor(QuantumExecutor): 

82 """Executor class which runs one Quantum at a time. 

83 

84 Parameters 

85 ---------- 

86 butler : `~lsst.daf.butler.Butler` 

87 Data butler. 

88 taskFactory : `~lsst.pipe.base.TaskFactory` 

89 Instance of a task factory. 

90 skipExistingIn : `list` [ `str` ], optional 

91 Accepts list of collections, if all Quantum outputs already exist in 

92 the specified list of collections then that Quantum will not be rerun. 

93 clobberOutputs : `bool`, optional 

94 If `True`, then existing outputs in output run collection will be 

95 overwritten. If ``skipExistingIn`` is defined, only outputs from 

96 failed quanta will be overwritten. 

97 enableLsstDebug : `bool`, optional 

98 Enable debugging with ``lsstDebug`` facility for a task. 

99 exitOnKnownError : `bool`, optional 

100 If `True`, call `sys.exit` with the appropriate exit code for special 

101 known exceptions, after printing a traceback, instead of letting the 

102 exception propagate up to calling. This is always the behavior for 

103 InvalidQuantumError. 

104 mock : `bool`, optional 

105 If `True` then mock task execution. 

106 mock_configs : `list` [ `_PipelineAction` ], optional 

107 Optional config overrides for mock tasks. 

108 """ 

109 

110 stream_json_logs = True 

111 """If True each log record is written to a temporary file and ingested 

112 when quantum completes. If False the records are accumulated in memory 

113 and stored in butler on quantum completion.""" 

114 

115 def __init__( 

116 self, 

117 taskFactory: TaskFactory, 

118 skipExistingIn: Optional[list[str]] = None, 

119 clobberOutputs: bool = False, 

120 enableLsstDebug: bool = False, 

121 exitOnKnownError: bool = False, 

122 mock: bool = False, 

123 mock_configs: Optional[list[_PipelineAction]] = None, 

124 ): 

125 self.taskFactory = taskFactory 

126 self.skipExistingIn = skipExistingIn 

127 self.enableLsstDebug = enableLsstDebug 

128 self.clobberOutputs = clobberOutputs 

129 self.exitOnKnownError = exitOnKnownError 

130 self.mock = mock 

131 self.mock_configs = mock_configs if mock_configs is not None else [] 

132 self.log_handler: Optional[logging.Handler] = None 

133 self.report: Optional[QuantumReport] = None 

134 

135 def execute(self, taskDef: TaskDef, quantum: Quantum, butler: Butler) -> Quantum: 

136 # Docstring inherited from QuantumExecutor.execute 

137 assert quantum.dataId is not None, "Quantum DataId cannot be None" 

138 # Catch any exception and make a report based on that. 

139 try: 

140 result = self._execute(taskDef, quantum, butler) 

141 self.report = QuantumReport(dataId=quantum.dataId, taskLabel=taskDef.label) 

142 return result 

143 except Exception as exc: 

144 self.report = QuantumReport.from_exception( 

145 exception=exc, 

146 dataId=quantum.dataId, 

147 taskLabel=taskDef.label, 

148 ) 

149 raise 

150 

151 def _execute(self, taskDef: TaskDef, quantum: Quantum, butler: Butler) -> Quantum: 

152 """Internal implementation of execute()""" 

153 startTime = time.time() 

154 

155 with self.captureLogging(taskDef, quantum, butler) as captureLog: 

156 # Save detailed resource usage before task start to metadata. 

157 quantumMetadata = _TASK_METADATA_TYPE() 

158 logInfo(None, "prep", metadata=quantumMetadata) # type: ignore 

159 

160 taskClass, label, config = taskDef.taskClass, taskDef.label, taskDef.config 

161 

162 # check whether to skip or delete old outputs, if it returns True 

163 # or raises an exception do not try to store logs, as they may be 

164 # already in butler. 

165 captureLog.store = False 

166 if self.checkExistingOutputs(quantum, butler, taskDef): 

167 _LOG.info( 

168 "Skipping already-successful quantum for label=%s dataId=%s.", label, quantum.dataId 

169 ) 

170 return quantum 

171 captureLog.store = True 

172 

173 try: 

174 quantum = self.updatedQuantumInputs(quantum, butler, taskDef) 

175 except NoWorkFound as exc: 

176 _LOG.info( 

177 "Nothing to do for task '%s' on quantum %s; saving metadata and skipping: %s", 

178 taskDef.label, 

179 quantum.dataId, 

180 str(exc), 

181 ) 

182 # Make empty metadata that looks something like what a 

183 # do-nothing task would write (but we don't bother with empty 

184 # nested PropertySets for subtasks). This is slightly 

185 # duplicative with logic in pipe_base that we can't easily call 

186 # from here; we'll fix this on DM-29761. 

187 logInfo(None, "end", metadata=quantumMetadata) # type: ignore 

188 fullMetadata = _TASK_FULL_METADATA_TYPE() 

189 fullMetadata[taskDef.label] = _TASK_METADATA_TYPE() 

190 fullMetadata["quantum"] = quantumMetadata 

191 self.writeMetadata(quantum, fullMetadata, taskDef, butler) 

192 return quantum 

193 

194 # enable lsstDebug debugging 

195 if self.enableLsstDebug: 

196 try: 

197 _LOG.debug("Will try to import debug.py") 

198 import debug # type: ignore # noqa:F401 

199 except ImportError: 

200 _LOG.warn("No 'debug' module found.") 

201 

202 # initialize global state 

203 self.initGlobals(quantum, butler) 

204 

205 # Ensure that we are executing a frozen config 

206 config.freeze() 

207 logInfo(None, "init", metadata=quantumMetadata) # type: ignore 

208 task = self.makeTask(taskClass, label, config, butler) 

209 logInfo(None, "start", metadata=quantumMetadata) # type: ignore 

210 try: 

211 if self.mock: 

212 # Use mock task instance to execute method. 

213 runTask = self._makeMockTask(taskDef) 

214 else: 

215 runTask = task 

216 self.runQuantum(runTask, quantum, taskDef, butler) 

217 except Exception as e: 

218 _LOG.error( 

219 "Execution of task '%s' on quantum %s failed. Exception %s: %s", 

220 taskDef.label, 

221 quantum.dataId, 

222 e.__class__.__name__, 

223 str(e), 

224 ) 

225 raise 

226 logInfo(None, "end", metadata=quantumMetadata) # type: ignore 

227 fullMetadata = task.getFullMetadata() 

228 fullMetadata["quantum"] = quantumMetadata 

229 self.writeMetadata(quantum, fullMetadata, taskDef, butler) 

230 stopTime = time.time() 

231 _LOG.info( 

232 "Execution of task '%s' on quantum %s took %.3f seconds", 

233 taskDef.label, 

234 quantum.dataId, 

235 stopTime - startTime, 

236 ) 

237 return quantum 

238 

239 def _makeMockTask(self, taskDef: TaskDef) -> PipelineTask: 

240 """Make an instance of mock task for given TaskDef.""" 

241 # Make config instance and apply overrides 

242 overrides = ConfigOverrides() 

243 for action in self.mock_configs: 

244 if action.label == taskDef.label + "-mock": 

245 if action.action == "config": 

246 key, _, value = action.value.partition("=") 

247 overrides.addValueOverride(key, value) 

248 elif action.action == "configfile": 

249 overrides.addFileOverride(os.path.expandvars(action.value)) 

250 else: 

251 raise ValueError(f"Unexpected action for mock task config overrides: {action}") 

252 config = MockPipelineTask.ConfigClass() 

253 overrides.applyTo(config) 

254 

255 task = MockPipelineTask(config=config, name=taskDef.label) 

256 return task 

257 

258 @contextmanager 

259 def captureLogging(self, taskDef: TaskDef, quantum: Quantum, butler: Butler) -> Iterator: 

260 """Configure logging system to capture logs for execution of this task. 

261 

262 Parameters 

263 ---------- 

264 taskDef : `lsst.pipe.base.TaskDef` 

265 The task definition. 

266 quantum : `~lsst.daf.butler.Quantum` 

267 Single Quantum instance. 

268 butler : `~lsst.daf.butler.Butler` 

269 Butler to write logs to. 

270 

271 Notes 

272 ----- 

273 Expected to be used as a context manager to ensure that logging 

274 records are inserted into the butler once the quantum has been 

275 executed: 

276 

277 .. code-block:: py 

278 

279 with self.captureLogging(taskDef, quantum, butler): 

280 # Run quantum and capture logs. 

281 

282 Ths method can also setup logging to attach task- or 

283 quantum-specific information to log messages. Potentially this can 

284 take into account some info from task configuration as well. 

285 """ 

286 # Add a handler to the root logger to capture execution log output. 

287 # How does it get removed reliably? 

288 if taskDef.logOutputDatasetName is not None: 

289 # Either accumulate into ButlerLogRecords or stream 

290 # JSON records to file and ingest that. 

291 tmpdir = None 

292 if self.stream_json_logs: 

293 # Create the log file in a temporary directory rather than 

294 # creating a temporary file. This is necessary because 

295 # temporary files are created with restrictive permissions 

296 # and during file ingest these permissions persist in the 

297 # datastore. Using a temp directory allows us to create 

298 # a file with umask default permissions. 

299 tmpdir = tempfile.mkdtemp(prefix="butler-temp-logs-") 

300 

301 # Construct a file to receive the log records and "touch" it. 

302 log_file = os.path.join(tmpdir, f"butler-log-{taskDef.label}.json") 

303 with open(log_file, "w"): 

304 pass 

305 self.log_handler = FileHandler(log_file) 

306 self.log_handler.setFormatter(JsonLogFormatter()) 

307 else: 

308 self.log_handler = ButlerLogRecordHandler() 

309 

310 logging.getLogger().addHandler(self.log_handler) 

311 

312 # include quantum dataId and task label into MDC 

313 label = taskDef.label 

314 if quantum.dataId: 

315 label += f":{quantum.dataId}" 

316 

317 ctx = _LogCaptureFlag() 

318 try: 

319 with ButlerMDC.set_mdc({"LABEL": label, "RUN": butler.run or ""}): 

320 yield ctx 

321 finally: 

322 # Ensure that the logs are stored in butler. 

323 self.writeLogRecords(quantum, taskDef, butler, ctx.store) 

324 if tmpdir: 

325 shutil.rmtree(tmpdir, ignore_errors=True) 

326 

327 def checkExistingOutputs(self, quantum: Quantum, butler: Butler, taskDef: TaskDef) -> bool: 

328 """Decide whether this quantum needs to be executed. 

329 

330 If only partial outputs exist then they are removed if 

331 ``clobberOutputs`` is True, otherwise an exception is raised. 

332 

333 Parameters 

334 ---------- 

335 quantum : `~lsst.daf.butler.Quantum` 

336 Quantum to check for existing outputs 

337 butler : `~lsst.daf.butler.Butler` 

338 Data butler. 

339 taskDef : `~lsst.pipe.base.TaskDef` 

340 Task definition structure. 

341 

342 Returns 

343 ------- 

344 exist : `bool` 

345 `True` if ``self.skipExistingIn`` is defined, and a previous 

346 execution of this quanta appears to have completed successfully 

347 (either because metadata was written or all datasets were written). 

348 `False` otherwise. 

349 

350 Raises 

351 ------ 

352 RuntimeError 

353 Raised if some outputs exist and some not. 

354 """ 

355 if self.skipExistingIn and taskDef.metadataDatasetName is not None: 

356 # Metadata output exists; this is sufficient to assume the previous 

357 # run was successful and should be skipped. 

358 ref = butler.registry.findDataset( 

359 taskDef.metadataDatasetName, quantum.dataId, collections=self.skipExistingIn 

360 ) 

361 if ref is not None: 

362 if butler.datastore.exists(ref): 

363 return True 

364 

365 # Previously we always checked for existing outputs in `butler.run`, 

366 # now logic gets more complicated as we only want to skip quantum 

367 # whose outputs exist in `self.skipExistingIn` but pruning should only 

368 # be done for outputs existing in `butler.run`. 

369 

370 def findOutputs( 

371 collections: Optional[Union[str, list[str]]] 

372 ) -> tuple[list[DatasetRef], list[DatasetRef]]: 

373 """Find quantum outputs in specified collections.""" 

374 existingRefs = [] 

375 missingRefs = [] 

376 for datasetRefs in quantum.outputs.values(): 

377 checkRefs: list[DatasetRef] = [] 

378 registryRefToQuantumRef: dict[DatasetRef, DatasetRef] = {} 

379 for datasetRef in datasetRefs: 

380 ref = butler.registry.findDataset( 

381 datasetRef.datasetType, datasetRef.dataId, collections=collections 

382 ) 

383 if ref is None: 

384 missingRefs.append(datasetRef) 

385 else: 

386 checkRefs.append(ref) 

387 registryRefToQuantumRef[ref] = datasetRef 

388 

389 # More efficient to ask the datastore in bulk for ref 

390 # existence rather than one at a time. 

391 existence = butler.datastore.mexists(checkRefs) 

392 for ref, exists in existence.items(): 

393 if exists: 

394 existingRefs.append(ref) 

395 else: 

396 missingRefs.append(registryRefToQuantumRef[ref]) 

397 return existingRefs, missingRefs 

398 

399 existingRefs, missingRefs = findOutputs(self.skipExistingIn) 

400 if self.skipExistingIn: 

401 if existingRefs and not missingRefs: 

402 # everything is already there 

403 return True 

404 

405 # If we are to re-run quantum then prune datasets that exists in 

406 # output run collection, only if `self.clobberOutputs` is set. 

407 if existingRefs: 

408 existingRefs, missingRefs = findOutputs(butler.run) 

409 if existingRefs and missingRefs: 

410 _LOG.debug( 

411 "Partial outputs exist for task %s dataId=%s collection=%s " 

412 "existingRefs=%s missingRefs=%s", 

413 taskDef, 

414 quantum.dataId, 

415 butler.run, 

416 existingRefs, 

417 missingRefs, 

418 ) 

419 if self.clobberOutputs: 

420 # only prune 

421 _LOG.info("Removing partial outputs for task %s: %s", taskDef, existingRefs) 

422 butler.pruneDatasets(existingRefs, disassociate=True, unstore=True, purge=True) 

423 return False 

424 else: 

425 raise RuntimeError( 

426 f"Registry inconsistency while checking for existing outputs:" 

427 f" collection={butler.run} existingRefs={existingRefs}" 

428 f" missingRefs={missingRefs}" 

429 ) 

430 

431 # need to re-run 

432 return False 

433 

434 def makeTask( 

435 self, taskClass: type[PipelineTask], name: str, config: PipelineTaskConfig, butler: Butler 

436 ) -> PipelineTask: 

437 """Make new task instance. 

438 

439 Parameters 

440 ---------- 

441 taskClass : `type` 

442 Sub-class of `~lsst.pipe.base.PipelineTask`. 

443 name : `str` 

444 Name for this task. 

445 config : `~lsst.pipe.base.PipelineTaskConfig` 

446 Configuration object for this task 

447 

448 Returns 

449 ------- 

450 task : `~lsst.pipe.base.PipelineTask` 

451 Instance of ``taskClass`` type. 

452 butler : `~lsst.daf.butler.Butler` 

453 Data butler. 

454 """ 

455 # call task factory for that 

456 return self.taskFactory.makeTask(taskClass, name, config, None, butler) 

457 

458 def updatedQuantumInputs(self, quantum: Quantum, butler: Butler, taskDef: TaskDef) -> Quantum: 

459 """Update quantum with extra information, returns a new updated 

460 Quantum. 

461 

462 Some methods may require input DatasetRefs to have non-None 

463 ``dataset_id``, but in case of intermediate dataset it may not be 

464 filled during QuantumGraph construction. This method will retrieve 

465 missing info from registry. 

466 

467 Parameters 

468 ---------- 

469 quantum : `~lsst.daf.butler.Quantum` 

470 Single Quantum instance. 

471 butler : `~lsst.daf.butler.Butler` 

472 Data butler. 

473 taskDef : `~lsst.pipe.base.TaskDef` 

474 Task definition structure. 

475 

476 Returns 

477 ------- 

478 update : `~lsst.daf.butler.Quantum` 

479 Updated Quantum instance 

480 """ 

481 anyChanges = False 

482 updatedInputs: defaultdict[DatasetType, list] = defaultdict(list) 

483 for key, refsForDatasetType in quantum.inputs.items(): 

484 newRefsForDatasetType = updatedInputs[key] 

485 for ref in refsForDatasetType: 

486 if ref.id is None: 

487 resolvedRef = butler.registry.findDataset( 

488 ref.datasetType, ref.dataId, collections=butler.collections 

489 ) 

490 if resolvedRef is None: 

491 _LOG.info("No dataset found for %s", ref) 

492 continue 

493 else: 

494 _LOG.debug("Updated dataset ID for %s", ref) 

495 else: 

496 resolvedRef = ref 

497 # We need to ask datastore if the dataset actually exists 

498 # because the Registry of a local "execution butler" cannot 

499 # know this (because we prepopulate it with all of the datasets 

500 # that might be created). In case of mock execution we check 

501 # that mock dataset exists instead. 

502 if self.mock: 

503 try: 

504 typeName, component = ref.datasetType.nameAndComponent() 

505 if component is not None: 

506 mockDatasetTypeName = MockButlerQuantumContext.mockDatasetTypeName(typeName) 

507 else: 

508 mockDatasetTypeName = MockButlerQuantumContext.mockDatasetTypeName( 

509 ref.datasetType.name 

510 ) 

511 

512 mockDatasetType = butler.registry.getDatasetType(mockDatasetTypeName) 

513 except KeyError: 

514 # means that mock dataset type is not there and this 

515 # should be a pre-existing dataset 

516 _LOG.debug("No mock dataset type for %s", ref) 

517 if butler.datastore.exists(resolvedRef): 

518 newRefsForDatasetType.append(resolvedRef) 

519 else: 

520 mockRef = DatasetRef(mockDatasetType, ref.dataId) 

521 resolvedMockRef = butler.registry.findDataset( 

522 mockRef.datasetType, mockRef.dataId, collections=butler.collections 

523 ) 

524 _LOG.debug("mockRef=%s resolvedMockRef=%s", mockRef, resolvedMockRef) 

525 if resolvedMockRef is not None and butler.datastore.exists(resolvedMockRef): 

526 _LOG.debug("resolvedMockRef dataset exists") 

527 newRefsForDatasetType.append(resolvedRef) 

528 elif butler.datastore.exists(resolvedRef): 

529 newRefsForDatasetType.append(resolvedRef) 

530 

531 if len(newRefsForDatasetType) != len(refsForDatasetType): 

532 anyChanges = True 

533 # If we removed any input datasets, let the task check if it has enough 

534 # to proceed and/or prune related datasets that it also doesn't 

535 # need/produce anymore. It will raise NoWorkFound if it can't run, 

536 # which we'll let propagate up. This is exactly what we run during QG 

537 # generation, because a task shouldn't care whether an input is missing 

538 # because some previous task didn't produce it, or because it just 

539 # wasn't there during QG generation. 

540 namedUpdatedInputs = NamedKeyDict[DatasetType, list[DatasetRef]](updatedInputs.items()) 

541 helper = AdjustQuantumHelper(namedUpdatedInputs, quantum.outputs) 

542 if anyChanges: 

543 assert quantum.dataId is not None, "Quantum DataId cannot be None" 

544 helper.adjust_in_place(taskDef.connections, label=taskDef.label, data_id=quantum.dataId) 

545 return Quantum( 

546 taskName=quantum.taskName, 

547 taskClass=quantum.taskClass, 

548 dataId=quantum.dataId, 

549 initInputs=quantum.initInputs, 

550 inputs=helper.inputs, 

551 outputs=helper.outputs, 

552 ) 

553 

554 def runQuantum(self, task: PipelineTask, quantum: Quantum, taskDef: TaskDef, butler: Butler) -> None: 

555 """Execute task on a single quantum. 

556 

557 Parameters 

558 ---------- 

559 task : `~lsst.pipe.base.PipelineTask` 

560 Task object. 

561 quantum : `~lsst.daf.butler.Quantum` 

562 Single Quantum instance. 

563 taskDef : `~lsst.pipe.base.TaskDef` 

564 Task definition structure. 

565 butler : `~lsst.daf.butler.Butler` 

566 Data butler. 

567 """ 

568 # Create a butler that operates in the context of a quantum 

569 if not self.mock: 

570 butlerQC = ButlerQuantumContext(butler, quantum) 

571 else: 

572 butlerQC = MockButlerQuantumContext(butler, quantum) 

573 

574 # Get the input and output references for the task 

575 inputRefs, outputRefs = taskDef.connections.buildDatasetRefs(quantum) 

576 

577 # Call task runQuantum() method. Catch a few known failure modes and 

578 # translate them into specific 

579 try: 

580 task.runQuantum(butlerQC, inputRefs, outputRefs) 

581 except NoWorkFound as err: 

582 # Not an error, just an early exit. 

583 _LOG.info("Task '%s' on quantum %s exited early: %s", taskDef.label, quantum.dataId, str(err)) 

584 pass 

585 except RepeatableQuantumError as err: 

586 if self.exitOnKnownError: 

587 _LOG.warning("Caught repeatable quantum error for %s (%s):", taskDef, quantum.dataId) 

588 _LOG.warning(err, exc_info=True) 

589 sys.exit(err.EXIT_CODE) 

590 else: 

591 raise 

592 except InvalidQuantumError as err: 

593 _LOG.fatal("Invalid quantum error for %s (%s): %s", taskDef, quantum.dataId) 

594 _LOG.fatal(err, exc_info=True) 

595 sys.exit(err.EXIT_CODE) 

596 

597 def writeMetadata(self, quantum: Quantum, metadata: Any, taskDef: TaskDef, butler: Butler) -> None: 

598 if taskDef.metadataDatasetName is not None: 

599 # DatasetRef has to be in the Quantum outputs, can lookup by name 

600 try: 

601 ref = quantum.outputs[taskDef.metadataDatasetName] 

602 except LookupError as exc: 

603 raise InvalidQuantumError( 

604 f"Quantum outputs is missing metadata dataset type {taskDef.metadataDatasetName};" 

605 f" this could happen due to inconsistent options between QuantumGraph generation" 

606 f" and execution" 

607 ) from exc 

608 butler.put(metadata, ref[0]) 

609 

610 def writeLogRecords(self, quantum: Quantum, taskDef: TaskDef, butler: Butler, store: bool) -> None: 

611 # If we are logging to an external file we must always try to 

612 # close it. 

613 filename = None 

614 if isinstance(self.log_handler, FileHandler): 

615 filename = self.log_handler.stream.name 

616 self.log_handler.close() 

617 

618 if self.log_handler is not None: 

619 # Remove the handler so we stop accumulating log messages. 

620 logging.getLogger().removeHandler(self.log_handler) 

621 

622 try: 

623 if store and taskDef.logOutputDatasetName is not None and self.log_handler is not None: 

624 # DatasetRef has to be in the Quantum outputs, can lookup by 

625 # name 

626 try: 

627 ref = quantum.outputs[taskDef.logOutputDatasetName] 

628 except LookupError as exc: 

629 raise InvalidQuantumError( 

630 f"Quantum outputs is missing log output dataset type {taskDef.logOutputDatasetName};" 

631 f" this could happen due to inconsistent options between QuantumGraph generation" 

632 f" and execution" 

633 ) from exc 

634 

635 if isinstance(self.log_handler, ButlerLogRecordHandler): 

636 butler.put(self.log_handler.records, ref[0]) 

637 

638 # Clear the records in case the handler is reused. 

639 self.log_handler.records.clear() 

640 else: 

641 assert filename is not None, "Somehow unable to extract filename from file handler" 

642 

643 # Need to ingest this file directly into butler. 

644 dataset = FileDataset(path=filename, refs=ref[0]) 

645 try: 

646 butler.ingest(dataset, transfer="move") 

647 filename = None 

648 except NotImplementedError: 

649 # Some datastores can't receive files (e.g. in-memory 

650 # datastore when testing), we store empty list for 

651 # those just to have a dataset. Alternative is to read 

652 # the file as a ButlerLogRecords object and put it. 

653 _LOG.info( 

654 "Log records could not be stored in this butler because the" 

655 " datastore can not ingest files, empty record list is stored instead." 

656 ) 

657 records = ButlerLogRecords.from_records([]) 

658 butler.put(records, ref[0]) 

659 finally: 

660 # remove file if it is not ingested 

661 if filename is not None: 

662 try: 

663 os.remove(filename) 

664 except OSError: 

665 pass 

666 

667 def initGlobals(self, quantum: Quantum, butler: Butler) -> None: 

668 """Initialize global state needed for task execution. 

669 

670 Parameters 

671 ---------- 

672 quantum : `~lsst.daf.butler.Quantum` 

673 Single Quantum instance. 

674 butler : `~lsst.daf.butler.Butler` 

675 Data butler. 

676 

677 Notes 

678 ----- 

679 There is an issue with initializing filters singleton which is done 

680 by instrument, to avoid requiring tasks to do it in runQuantum() 

681 we do it here when any dataId has an instrument dimension. Also for 

682 now we only allow single instrument, verify that all instrument 

683 names in all dataIds are identical. 

684 

685 This will need revision when filter singleton disappears. 

686 """ 

687 oneInstrument = None 

688 for datasetRefs in chain(quantum.inputs.values(), quantum.outputs.values()): 

689 for datasetRef in datasetRefs: 

690 dataId = datasetRef.dataId 

691 instrument = dataId.get("instrument") 

692 if instrument is not None: 

693 if oneInstrument is not None: 

694 assert ( # type: ignore 

695 instrument == oneInstrument 

696 ), "Currently require that only one instrument is used per graph" 

697 else: 

698 oneInstrument = instrument 

699 Instrument.fromName(instrument, butler.registry) 

700 

701 def getReport(self) -> Optional[QuantumReport]: 

702 # Docstring inherited from base class 

703 if self.report is None: 

704 raise RuntimeError("getReport() called before execute()") 

705 return self.report