Coverage for python/lsst/ctrl/mpexec/singleQuantumExecutor.py: 11%

283 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-11-12 10:40 +0000

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22__all__ = ["SingleQuantumExecutor"] 

23 

24# ------------------------------- 

25# Imports of standard modules -- 

26# ------------------------------- 

27import logging 

28import os 

29import shutil 

30import sys 

31import tempfile 

32import time 

33from collections import defaultdict 

34from contextlib import contextmanager 

35from itertools import chain 

36from logging import FileHandler 

37from typing import Any, Iterator, Optional, Union 

38 

39from lsst.daf.butler import Butler, DatasetRef, DatasetType, FileDataset, NamedKeyDict, Quantum 

40from lsst.daf.butler.core.logging import ButlerLogRecordHandler, ButlerLogRecords, ButlerMDC, JsonLogFormatter 

41from lsst.pipe.base import ( 

42 AdjustQuantumHelper, 

43 ButlerQuantumContext, 

44 Instrument, 

45 InvalidQuantumError, 

46 NoWorkFound, 

47 PipelineTask, 

48 PipelineTaskConfig, 

49 RepeatableQuantumError, 

50 TaskDef, 

51 TaskFactory, 

52) 

53from lsst.pipe.base.configOverrides import ConfigOverrides 

54 

55# During metadata transition phase, determine metadata class by 

56# asking pipe_base 

57from lsst.pipe.base.task import _TASK_FULL_METADATA_TYPE, _TASK_METADATA_TYPE 

58from lsst.utils.timer import logInfo 

59 

60# ----------------------------- 

61# Imports for other modules -- 

62# ----------------------------- 

63from .cli.utils import _PipelineAction 

64from .mock_task import MockButlerQuantumContext, MockPipelineTask 

65from .quantumGraphExecutor import QuantumExecutor 

66from .reports import QuantumReport 

67 

68# ---------------------------------- 

69# Local non-exported definitions -- 

70# ---------------------------------- 

71 

72_LOG = logging.getLogger(__name__) 

73 

74 

75class _LogCaptureFlag: 

76 """Simple flag to enable/disable log-to-butler saving.""" 

77 

78 store: bool = True 

79 

80 

81class SingleQuantumExecutor(QuantumExecutor): 

82 """Executor class which runs one Quantum at a time. 

83 

84 Parameters 

85 ---------- 

86 butler : `~lsst.daf.butler.Butler` 

87 Data butler. 

88 taskFactory : `~lsst.pipe.base.TaskFactory` 

89 Instance of a task factory. 

90 skipExistingIn : `list` [ `str` ], optional 

91 Accepts list of collections, if all Quantum outputs already exist in 

92 the specified list of collections then that Quantum will not be rerun. 

93 clobberOutputs : `bool`, optional 

94 If `True`, then existing outputs in output run collection will be 

95 overwritten. If ``skipExistingIn`` is defined, only outputs from 

96 failed quanta will be overwritten. 

97 enableLsstDebug : `bool`, optional 

98 Enable debugging with ``lsstDebug`` facility for a task. 

99 exitOnKnownError : `bool`, optional 

100 If `True`, call `sys.exit` with the appropriate exit code for special 

101 known exceptions, after printing a traceback, instead of letting the 

102 exception propagate up to calling. This is always the behavior for 

103 InvalidQuantumError. 

104 mock : `bool`, optional 

105 If `True` then mock task execution. 

106 mock_configs : `list` [ `_PipelineAction` ], optional 

107 Optional config overrides for mock tasks. 

108 """ 

109 

110 stream_json_logs = True 

111 """If True each log record is written to a temporary file and ingested 

112 when quantum completes. If False the records are accumulated in memory 

113 and stored in butler on quantum completion.""" 

114 

115 def __init__( 

116 self, 

117 taskFactory: TaskFactory, 

118 skipExistingIn: Optional[list[str]] = None, 

119 clobberOutputs: bool = False, 

120 enableLsstDebug: bool = False, 

121 exitOnKnownError: bool = False, 

122 mock: bool = False, 

123 mock_configs: Optional[list[_PipelineAction]] = None, 

124 ): 

125 self.taskFactory = taskFactory 

126 self.skipExistingIn = skipExistingIn 

127 self.enableLsstDebug = enableLsstDebug 

128 self.clobberOutputs = clobberOutputs 

129 self.exitOnKnownError = exitOnKnownError 

130 self.mock = mock 

131 self.mock_configs = mock_configs if mock_configs is not None else [] 

132 self.log_handler: Optional[logging.Handler] = None 

133 self.report: Optional[QuantumReport] = None 

134 

135 def execute(self, taskDef: TaskDef, quantum: Quantum, butler: Butler) -> Quantum: 

136 # Docstring inherited from QuantumExecutor.execute 

137 assert quantum.dataId is not None, "Quantum DataId cannot be None" 

138 # Catch any exception and make a report based on that. 

139 try: 

140 result = self._execute(taskDef, quantum, butler) 

141 self.report = QuantumReport(dataId=quantum.dataId, taskLabel=taskDef.label) 

142 return result 

143 except Exception as exc: 

144 self.report = QuantumReport.from_exception( 

145 exception=exc, 

146 dataId=quantum.dataId, 

147 taskLabel=taskDef.label, 

148 ) 

149 raise 

150 

151 def _execute(self, taskDef: TaskDef, quantum: Quantum, butler: Butler) -> Quantum: 

152 """Internal implementation of execute()""" 

153 startTime = time.time() 

154 

155 with self.captureLogging(taskDef, quantum, butler) as captureLog: 

156 

157 # Save detailed resource usage before task start to metadata. 

158 quantumMetadata = _TASK_METADATA_TYPE() 

159 logInfo(None, "prep", metadata=quantumMetadata) # type: ignore 

160 

161 taskClass, label, config = taskDef.taskClass, taskDef.label, taskDef.config 

162 

163 # check whether to skip or delete old outputs, if it returns True 

164 # or raises an exception do not try to store logs, as they may be 

165 # already in butler. 

166 captureLog.store = False 

167 if self.checkExistingOutputs(quantum, butler, taskDef): 

168 _LOG.info( 

169 "Skipping already-successful quantum for label=%s dataId=%s.", label, quantum.dataId 

170 ) 

171 return quantum 

172 captureLog.store = True 

173 

174 try: 

175 quantum = self.updatedQuantumInputs(quantum, butler, taskDef) 

176 except NoWorkFound as exc: 

177 _LOG.info( 

178 "Nothing to do for task '%s' on quantum %s; saving metadata and skipping: %s", 

179 taskDef.label, 

180 quantum.dataId, 

181 str(exc), 

182 ) 

183 # Make empty metadata that looks something like what a 

184 # do-nothing task would write (but we don't bother with empty 

185 # nested PropertySets for subtasks). This is slightly 

186 # duplicative with logic in pipe_base that we can't easily call 

187 # from here; we'll fix this on DM-29761. 

188 logInfo(None, "end", metadata=quantumMetadata) # type: ignore 

189 fullMetadata = _TASK_FULL_METADATA_TYPE() 

190 fullMetadata[taskDef.label] = _TASK_METADATA_TYPE() 

191 fullMetadata["quantum"] = quantumMetadata 

192 self.writeMetadata(quantum, fullMetadata, taskDef, butler) 

193 return quantum 

194 

195 # enable lsstDebug debugging 

196 if self.enableLsstDebug: 

197 try: 

198 _LOG.debug("Will try to import debug.py") 

199 import debug # type: ignore # noqa:F401 

200 except ImportError: 

201 _LOG.warn("No 'debug' module found.") 

202 

203 # initialize global state 

204 self.initGlobals(quantum, butler) 

205 

206 # Ensure that we are executing a frozen config 

207 config.freeze() 

208 logInfo(None, "init", metadata=quantumMetadata) # type: ignore 

209 task = self.makeTask(taskClass, label, config, butler) 

210 logInfo(None, "start", metadata=quantumMetadata) # type: ignore 

211 try: 

212 if self.mock: 

213 # Use mock task instance to execute method. 

214 runTask = self._makeMockTask(taskDef) 

215 else: 

216 runTask = task 

217 self.runQuantum(runTask, quantum, taskDef, butler) 

218 except Exception as e: 

219 _LOG.error( 

220 "Execution of task '%s' on quantum %s failed. Exception %s: %s", 

221 taskDef.label, 

222 quantum.dataId, 

223 e.__class__.__name__, 

224 str(e), 

225 ) 

226 raise 

227 logInfo(None, "end", metadata=quantumMetadata) # type: ignore 

228 fullMetadata = task.getFullMetadata() 

229 fullMetadata["quantum"] = quantumMetadata 

230 self.writeMetadata(quantum, fullMetadata, taskDef, butler) 

231 stopTime = time.time() 

232 _LOG.info( 

233 "Execution of task '%s' on quantum %s took %.3f seconds", 

234 taskDef.label, 

235 quantum.dataId, 

236 stopTime - startTime, 

237 ) 

238 return quantum 

239 

240 def _makeMockTask(self, taskDef: TaskDef) -> PipelineTask: 

241 """Make an instance of mock task for given TaskDef.""" 

242 # Make config instance and apply overrides 

243 overrides = ConfigOverrides() 

244 for action in self.mock_configs: 

245 if action.label == taskDef.label + "-mock": 

246 if action.action == "config": 

247 key, _, value = action.value.partition("=") 

248 overrides.addValueOverride(key, value) 

249 elif action.action == "configfile": 

250 overrides.addFileOverride(os.path.expandvars(action.value)) 

251 else: 

252 raise ValueError(f"Unexpected action for mock task config overrides: {action}") 

253 config = MockPipelineTask.ConfigClass() 

254 overrides.applyTo(config) 

255 

256 task = MockPipelineTask(config=config, name=taskDef.label) 

257 return task 

258 

259 @contextmanager 

260 def captureLogging(self, taskDef: TaskDef, quantum: Quantum, butler: Butler) -> Iterator: 

261 """Configure logging system to capture logs for execution of this task. 

262 

263 Parameters 

264 ---------- 

265 taskDef : `lsst.pipe.base.TaskDef` 

266 The task definition. 

267 quantum : `~lsst.daf.butler.Quantum` 

268 Single Quantum instance. 

269 butler : `~lsst.daf.butler.Butler` 

270 Butler to write logs to. 

271 

272 Notes 

273 ----- 

274 Expected to be used as a context manager to ensure that logging 

275 records are inserted into the butler once the quantum has been 

276 executed: 

277 

278 .. code-block:: py 

279 

280 with self.captureLogging(taskDef, quantum, butler): 

281 # Run quantum and capture logs. 

282 

283 Ths method can also setup logging to attach task- or 

284 quantum-specific information to log messages. Potentially this can 

285 take into account some info from task configuration as well. 

286 """ 

287 # Add a handler to the root logger to capture execution log output. 

288 # How does it get removed reliably? 

289 if taskDef.logOutputDatasetName is not None: 

290 # Either accumulate into ButlerLogRecords or stream 

291 # JSON records to file and ingest that. 

292 tmpdir = None 

293 if self.stream_json_logs: 

294 # Create the log file in a temporary directory rather than 

295 # creating a temporary file. This is necessary because 

296 # temporary files are created with restrictive permissions 

297 # and during file ingest these permissions persist in the 

298 # datastore. Using a temp directory allows us to create 

299 # a file with umask default permissions. 

300 tmpdir = tempfile.mkdtemp(prefix="butler-temp-logs-") 

301 

302 # Construct a file to receive the log records and "touch" it. 

303 log_file = os.path.join(tmpdir, f"butler-log-{taskDef.label}.json") 

304 with open(log_file, "w"): 

305 pass 

306 self.log_handler = FileHandler(log_file) 

307 self.log_handler.setFormatter(JsonLogFormatter()) 

308 else: 

309 self.log_handler = ButlerLogRecordHandler() 

310 

311 logging.getLogger().addHandler(self.log_handler) 

312 

313 # include quantum dataId and task label into MDC 

314 label = taskDef.label 

315 if quantum.dataId: 

316 label += f":{quantum.dataId}" 

317 

318 ctx = _LogCaptureFlag() 

319 try: 

320 with ButlerMDC.set_mdc({"LABEL": label, "RUN": butler.run or ""}): 

321 yield ctx 

322 finally: 

323 # Ensure that the logs are stored in butler. 

324 self.writeLogRecords(quantum, taskDef, butler, ctx.store) 

325 if tmpdir: 

326 shutil.rmtree(tmpdir, ignore_errors=True) 

327 

328 def checkExistingOutputs(self, quantum: Quantum, butler: Butler, taskDef: TaskDef) -> bool: 

329 """Decide whether this quantum needs to be executed. 

330 

331 If only partial outputs exist then they are removed if 

332 ``clobberOutputs`` is True, otherwise an exception is raised. 

333 

334 Parameters 

335 ---------- 

336 quantum : `~lsst.daf.butler.Quantum` 

337 Quantum to check for existing outputs 

338 butler : `~lsst.daf.butler.Butler` 

339 Data butler. 

340 taskDef : `~lsst.pipe.base.TaskDef` 

341 Task definition structure. 

342 

343 Returns 

344 ------- 

345 exist : `bool` 

346 `True` if ``self.skipExistingIn`` is defined, and a previous 

347 execution of this quanta appears to have completed successfully 

348 (either because metadata was written or all datasets were written). 

349 `False` otherwise. 

350 

351 Raises 

352 ------ 

353 RuntimeError 

354 Raised if some outputs exist and some not. 

355 """ 

356 if self.skipExistingIn and taskDef.metadataDatasetName is not None: 

357 # Metadata output exists; this is sufficient to assume the previous 

358 # run was successful and should be skipped. 

359 ref = butler.registry.findDataset( 

360 taskDef.metadataDatasetName, quantum.dataId, collections=self.skipExistingIn 

361 ) 

362 if ref is not None: 

363 if butler.datastore.exists(ref): 

364 return True 

365 

366 # Previously we always checked for existing outputs in `butler.run`, 

367 # now logic gets more complicated as we only want to skip quantum 

368 # whose outputs exist in `self.skipExistingIn` but pruning should only 

369 # be done for outputs existing in `butler.run`. 

370 

371 def findOutputs( 

372 collections: Optional[Union[str, list[str]]] 

373 ) -> tuple[list[DatasetRef], list[DatasetRef]]: 

374 """Find quantum outputs in specified collections.""" 

375 existingRefs = [] 

376 missingRefs = [] 

377 for datasetRefs in quantum.outputs.values(): 

378 checkRefs: list[DatasetRef] = [] 

379 registryRefToQuantumRef: dict[DatasetRef, DatasetRef] = {} 

380 for datasetRef in datasetRefs: 

381 ref = butler.registry.findDataset( 

382 datasetRef.datasetType, datasetRef.dataId, collections=collections 

383 ) 

384 if ref is None: 

385 missingRefs.append(datasetRef) 

386 else: 

387 checkRefs.append(ref) 

388 registryRefToQuantumRef[ref] = datasetRef 

389 

390 # More efficient to ask the datastore in bulk for ref 

391 # existence rather than one at a time. 

392 existence = butler.datastore.mexists(checkRefs) 

393 for ref, exists in existence.items(): 

394 if exists: 

395 existingRefs.append(ref) 

396 else: 

397 missingRefs.append(registryRefToQuantumRef[ref]) 

398 return existingRefs, missingRefs 

399 

400 existingRefs, missingRefs = findOutputs(self.skipExistingIn) 

401 if self.skipExistingIn: 

402 if existingRefs and not missingRefs: 

403 # everything is already there 

404 return True 

405 

406 # If we are to re-run quantum then prune datasets that exists in 

407 # output run collection, only if `self.clobberOutputs` is set. 

408 if existingRefs: 

409 existingRefs, missingRefs = findOutputs(butler.run) 

410 if existingRefs and missingRefs: 

411 _LOG.debug( 

412 "Partial outputs exist for task %s dataId=%s collection=%s " 

413 "existingRefs=%s missingRefs=%s", 

414 taskDef, 

415 quantum.dataId, 

416 butler.run, 

417 existingRefs, 

418 missingRefs, 

419 ) 

420 if self.clobberOutputs: 

421 # only prune 

422 _LOG.info("Removing partial outputs for task %s: %s", taskDef, existingRefs) 

423 butler.pruneDatasets(existingRefs, disassociate=True, unstore=True, purge=True) 

424 return False 

425 else: 

426 raise RuntimeError( 

427 f"Registry inconsistency while checking for existing outputs:" 

428 f" collection={butler.run} existingRefs={existingRefs}" 

429 f" missingRefs={missingRefs}" 

430 ) 

431 

432 # need to re-run 

433 return False 

434 

435 def makeTask( 

436 self, taskClass: type[PipelineTask], name: str, config: PipelineTaskConfig, butler: Butler 

437 ) -> PipelineTask: 

438 """Make new task instance. 

439 

440 Parameters 

441 ---------- 

442 taskClass : `type` 

443 Sub-class of `~lsst.pipe.base.PipelineTask`. 

444 name : `str` 

445 Name for this task. 

446 config : `~lsst.pipe.base.PipelineTaskConfig` 

447 Configuration object for this task 

448 

449 Returns 

450 ------- 

451 task : `~lsst.pipe.base.PipelineTask` 

452 Instance of ``taskClass`` type. 

453 butler : `~lsst.daf.butler.Butler` 

454 Data butler. 

455 """ 

456 # call task factory for that 

457 return self.taskFactory.makeTask(taskClass, name, config, None, butler) 

458 

459 def updatedQuantumInputs(self, quantum: Quantum, butler: Butler, taskDef: TaskDef) -> Quantum: 

460 """Update quantum with extra information, returns a new updated 

461 Quantum. 

462 

463 Some methods may require input DatasetRefs to have non-None 

464 ``dataset_id``, but in case of intermediate dataset it may not be 

465 filled during QuantumGraph construction. This method will retrieve 

466 missing info from registry. 

467 

468 Parameters 

469 ---------- 

470 quantum : `~lsst.daf.butler.Quantum` 

471 Single Quantum instance. 

472 butler : `~lsst.daf.butler.Butler` 

473 Data butler. 

474 taskDef : `~lsst.pipe.base.TaskDef` 

475 Task definition structure. 

476 

477 Returns 

478 ------- 

479 update : `~lsst.daf.butler.Quantum` 

480 Updated Quantum instance 

481 """ 

482 anyChanges = False 

483 updatedInputs: defaultdict[DatasetType, list] = defaultdict(list) 

484 for key, refsForDatasetType in quantum.inputs.items(): 

485 newRefsForDatasetType = updatedInputs[key] 

486 for ref in refsForDatasetType: 

487 if ref.id is None: 

488 resolvedRef = butler.registry.findDataset( 

489 ref.datasetType, ref.dataId, collections=butler.collections 

490 ) 

491 if resolvedRef is None: 

492 _LOG.info("No dataset found for %s", ref) 

493 continue 

494 else: 

495 _LOG.debug("Updated dataset ID for %s", ref) 

496 else: 

497 resolvedRef = ref 

498 # We need to ask datastore if the dataset actually exists 

499 # because the Registry of a local "execution butler" cannot 

500 # know this (because we prepopulate it with all of the datasets 

501 # that might be created). In case of mock execution we check 

502 # that mock dataset exists instead. 

503 if self.mock: 

504 try: 

505 typeName, component = ref.datasetType.nameAndComponent() 

506 if component is not None: 

507 mockDatasetTypeName = MockButlerQuantumContext.mockDatasetTypeName(typeName) 

508 else: 

509 mockDatasetTypeName = MockButlerQuantumContext.mockDatasetTypeName( 

510 ref.datasetType.name 

511 ) 

512 

513 mockDatasetType = butler.registry.getDatasetType(mockDatasetTypeName) 

514 except KeyError: 

515 # means that mock dataset type is not there and this 

516 # should be a pre-existing dataset 

517 _LOG.debug("No mock dataset type for %s", ref) 

518 if butler.datastore.exists(resolvedRef): 

519 newRefsForDatasetType.append(resolvedRef) 

520 else: 

521 mockRef = DatasetRef(mockDatasetType, ref.dataId) 

522 resolvedMockRef = butler.registry.findDataset( 

523 mockRef.datasetType, mockRef.dataId, collections=butler.collections 

524 ) 

525 _LOG.debug("mockRef=%s resolvedMockRef=%s", mockRef, resolvedMockRef) 

526 if resolvedMockRef is not None and butler.datastore.exists(resolvedMockRef): 

527 _LOG.debug("resolvedMockRef dataset exists") 

528 newRefsForDatasetType.append(resolvedRef) 

529 elif butler.datastore.exists(resolvedRef): 

530 newRefsForDatasetType.append(resolvedRef) 

531 

532 if len(newRefsForDatasetType) != len(refsForDatasetType): 

533 anyChanges = True 

534 # If we removed any input datasets, let the task check if it has enough 

535 # to proceed and/or prune related datasets that it also doesn't 

536 # need/produce anymore. It will raise NoWorkFound if it can't run, 

537 # which we'll let propagate up. This is exactly what we run during QG 

538 # generation, because a task shouldn't care whether an input is missing 

539 # because some previous task didn't produce it, or because it just 

540 # wasn't there during QG generation. 

541 namedUpdatedInputs = NamedKeyDict[DatasetType, list[DatasetRef]](updatedInputs.items()) 

542 helper = AdjustQuantumHelper(namedUpdatedInputs, quantum.outputs) 

543 if anyChanges: 

544 assert quantum.dataId is not None, "Quantum DataId cannot be None" 

545 helper.adjust_in_place(taskDef.connections, label=taskDef.label, data_id=quantum.dataId) 

546 return Quantum( 

547 taskName=quantum.taskName, 

548 taskClass=quantum.taskClass, 

549 dataId=quantum.dataId, 

550 initInputs=quantum.initInputs, 

551 inputs=helper.inputs, 

552 outputs=helper.outputs, 

553 ) 

554 

555 def runQuantum(self, task: PipelineTask, quantum: Quantum, taskDef: TaskDef, butler: Butler) -> None: 

556 """Execute task on a single quantum. 

557 

558 Parameters 

559 ---------- 

560 task : `~lsst.pipe.base.PipelineTask` 

561 Task object. 

562 quantum : `~lsst.daf.butler.Quantum` 

563 Single Quantum instance. 

564 taskDef : `~lsst.pipe.base.TaskDef` 

565 Task definition structure. 

566 butler : `~lsst.daf.butler.Butler` 

567 Data butler. 

568 """ 

569 # Create a butler that operates in the context of a quantum 

570 if not self.mock: 

571 butlerQC = ButlerQuantumContext(butler, quantum) 

572 else: 

573 butlerQC = MockButlerQuantumContext(butler, quantum) 

574 

575 # Get the input and output references for the task 

576 inputRefs, outputRefs = taskDef.connections.buildDatasetRefs(quantum) 

577 

578 # Call task runQuantum() method. Catch a few known failure modes and 

579 # translate them into specific 

580 try: 

581 task.runQuantum(butlerQC, inputRefs, outputRefs) 

582 except NoWorkFound as err: 

583 # Not an error, just an early exit. 

584 _LOG.info("Task '%s' on quantum %s exited early: %s", taskDef.label, quantum.dataId, str(err)) 

585 pass 

586 except RepeatableQuantumError as err: 

587 if self.exitOnKnownError: 

588 _LOG.warning("Caught repeatable quantum error for %s (%s):", taskDef, quantum.dataId) 

589 _LOG.warning(err, exc_info=True) 

590 sys.exit(err.EXIT_CODE) 

591 else: 

592 raise 

593 except InvalidQuantumError as err: 

594 _LOG.fatal("Invalid quantum error for %s (%s): %s", taskDef, quantum.dataId) 

595 _LOG.fatal(err, exc_info=True) 

596 sys.exit(err.EXIT_CODE) 

597 

598 def writeMetadata(self, quantum: Quantum, metadata: Any, taskDef: TaskDef, butler: Butler) -> None: 

599 if taskDef.metadataDatasetName is not None: 

600 # DatasetRef has to be in the Quantum outputs, can lookup by name 

601 try: 

602 ref = quantum.outputs[taskDef.metadataDatasetName] 

603 except LookupError as exc: 

604 raise InvalidQuantumError( 

605 f"Quantum outputs is missing metadata dataset type {taskDef.metadataDatasetName};" 

606 f" this could happen due to inconsistent options between QuantumGraph generation" 

607 f" and execution" 

608 ) from exc 

609 butler.put(metadata, ref[0]) 

610 

611 def writeLogRecords(self, quantum: Quantum, taskDef: TaskDef, butler: Butler, store: bool) -> None: 

612 # If we are logging to an external file we must always try to 

613 # close it. 

614 filename = None 

615 if isinstance(self.log_handler, FileHandler): 

616 filename = self.log_handler.stream.name 

617 self.log_handler.close() 

618 

619 if self.log_handler is not None: 

620 # Remove the handler so we stop accumulating log messages. 

621 logging.getLogger().removeHandler(self.log_handler) 

622 

623 try: 

624 if store and taskDef.logOutputDatasetName is not None and self.log_handler is not None: 

625 # DatasetRef has to be in the Quantum outputs, can lookup by 

626 # name 

627 try: 

628 ref = quantum.outputs[taskDef.logOutputDatasetName] 

629 except LookupError as exc: 

630 raise InvalidQuantumError( 

631 f"Quantum outputs is missing log output dataset type {taskDef.logOutputDatasetName};" 

632 f" this could happen due to inconsistent options between QuantumGraph generation" 

633 f" and execution" 

634 ) from exc 

635 

636 if isinstance(self.log_handler, ButlerLogRecordHandler): 

637 butler.put(self.log_handler.records, ref[0]) 

638 

639 # Clear the records in case the handler is reused. 

640 self.log_handler.records.clear() 

641 else: 

642 assert filename is not None, "Somehow unable to extract filename from file handler" 

643 

644 # Need to ingest this file directly into butler. 

645 dataset = FileDataset(path=filename, refs=ref[0]) 

646 try: 

647 butler.ingest(dataset, transfer="move") 

648 filename = None 

649 except NotImplementedError: 

650 # Some datastores can't receive files (e.g. in-memory 

651 # datastore when testing), we store empty list for 

652 # those just to have a dataset. Alternative is to read 

653 # the file as a ButlerLogRecords object and put it. 

654 _LOG.info( 

655 "Log records could not be stored in this butler because the" 

656 " datastore can not ingest files, empty record list is stored instead." 

657 ) 

658 records = ButlerLogRecords.from_records([]) 

659 butler.put(records, ref[0]) 

660 finally: 

661 # remove file if it is not ingested 

662 if filename is not None: 

663 try: 

664 os.remove(filename) 

665 except OSError: 

666 pass 

667 

668 def initGlobals(self, quantum: Quantum, butler: Butler) -> None: 

669 """Initialize global state needed for task execution. 

670 

671 Parameters 

672 ---------- 

673 quantum : `~lsst.daf.butler.Quantum` 

674 Single Quantum instance. 

675 butler : `~lsst.daf.butler.Butler` 

676 Data butler. 

677 

678 Notes 

679 ----- 

680 There is an issue with initializing filters singleton which is done 

681 by instrument, to avoid requiring tasks to do it in runQuantum() 

682 we do it here when any dataId has an instrument dimension. Also for 

683 now we only allow single instrument, verify that all instrument 

684 names in all dataIds are identical. 

685 

686 This will need revision when filter singleton disappears. 

687 """ 

688 oneInstrument = None 

689 for datasetRefs in chain(quantum.inputs.values(), quantum.outputs.values()): 

690 for datasetRef in datasetRefs: 

691 dataId = datasetRef.dataId 

692 instrument = dataId.get("instrument") 

693 if instrument is not None: 

694 if oneInstrument is not None: 

695 assert ( # type: ignore 

696 instrument == oneInstrument 

697 ), "Currently require that only one instrument is used per graph" 

698 else: 

699 oneInstrument = instrument 

700 Instrument.fromName(instrument, butler.registry) 

701 

702 def getReport(self) -> Optional[QuantumReport]: 

703 # Docstring inherited from base class 

704 if self.report is None: 

705 raise RuntimeError("getReport() called before execute()") 

706 return self.report