Coverage for python/lsst/ctrl/mpexec/singleQuantumExecutor.py: 11%

285 statements  

« prev     ^ index     » next       coverage.py v6.4, created at 2022-05-26 09:47 +0000

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22__all__ = ["SingleQuantumExecutor"] 

23 

24# ------------------------------- 

25# Imports of standard modules -- 

26# ------------------------------- 

27import logging 

28import os 

29import shutil 

30import sys 

31import tempfile 

32import time 

33from collections import defaultdict 

34from contextlib import contextmanager 

35from itertools import chain 

36from logging import FileHandler 

37from typing import Dict, List, Optional 

38 

39from lsst.daf.butler import DatasetRef, DatasetType, FileDataset, NamedKeyDict, Quantum 

40from lsst.daf.butler.core.logging import ButlerLogRecordHandler, ButlerLogRecords, ButlerMDC, JsonLogFormatter 

41from lsst.pipe.base import ( 

42 AdjustQuantumHelper, 

43 ButlerQuantumContext, 

44 Instrument, 

45 InvalidQuantumError, 

46 NoWorkFound, 

47 RepeatableQuantumError, 

48) 

49from lsst.pipe.base.configOverrides import ConfigOverrides 

50 

51# During metadata transition phase, determine metadata class by 

52# asking pipe_base 

53from lsst.pipe.base.task import _TASK_FULL_METADATA_TYPE, _TASK_METADATA_TYPE 

54from lsst.utils.timer import logInfo 

55 

56# ----------------------------- 

57# Imports for other modules -- 

58# ----------------------------- 

59from .mock_task import MockButlerQuantumContext, MockPipelineTask 

60from .quantumGraphExecutor import QuantumExecutor 

61from .reports import QuantumReport 

62 

63# ---------------------------------- 

64# Local non-exported definitions -- 

65# ---------------------------------- 

66 

67_LOG = logging.getLogger(__name__) 

68 

69 

70class _LogCaptureFlag: 

71 """Simple flag to enable/disable log-to-butler saving.""" 

72 

73 store: bool = True 

74 

75 

76class SingleQuantumExecutor(QuantumExecutor): 

77 """Executor class which runs one Quantum at a time. 

78 

79 Parameters 

80 ---------- 

81 butler : `~lsst.daf.butler.Butler` 

82 Data butler. 

83 taskFactory : `~lsst.pipe.base.TaskFactory` 

84 Instance of a task factory. 

85 skipExistingIn : `list` [ `str` ], optional 

86 Accepts list of collections, if all Quantum outputs already exist in 

87 the specified list of collections then that Quantum will not be rerun. 

88 clobberOutputs : `bool`, optional 

89 If `True`, then existing outputs in output run collection will be 

90 overwritten. If ``skipExistingIn`` is defined, only outputs from 

91 failed quanta will be overwritten. 

92 enableLsstDebug : `bool`, optional 

93 Enable debugging with ``lsstDebug`` facility for a task. 

94 exitOnKnownError : `bool`, optional 

95 If `True`, call `sys.exit` with the appropriate exit code for special 

96 known exceptions, after printing a traceback, instead of letting the 

97 exception propagate up to calling. This is always the behavior for 

98 InvalidQuantumError. 

99 mock : `bool`, optional 

100 If `True` then mock task execution. 

101 mock_configs : `list` [ `_PipelineAction` ], optional 

102 Optional config overrides for mock tasks. 

103 """ 

104 

105 stream_json_logs = True 

106 """If True each log record is written to a temporary file and ingested 

107 when quantum completes. If False the records are accumulated in memory 

108 and stored in butler on quantum completion.""" 

109 

110 def __init__( 

111 self, 

112 taskFactory, 

113 skipExistingIn=None, 

114 clobberOutputs=False, 

115 enableLsstDebug=False, 

116 exitOnKnownError=False, 

117 mock=False, 

118 mock_configs=None, 

119 ): 

120 self.taskFactory = taskFactory 

121 self.skipExistingIn = skipExistingIn 

122 self.enableLsstDebug = enableLsstDebug 

123 self.clobberOutputs = clobberOutputs 

124 self.exitOnKnownError = exitOnKnownError 

125 self.mock = mock 

126 self.mock_configs = mock_configs 

127 self.log_handler = None 

128 self.report: Optional[QuantumReport] = None 

129 

130 def execute(self, taskDef, quantum, butler): 

131 # Docstring inherited from QuantumExecutor.execute 

132 

133 # Catch any exception and make a report based on that. 

134 try: 

135 result = self._execute(taskDef, quantum, butler) 

136 self.report = QuantumReport(dataId=quantum.dataId, taskLabel=taskDef.label) 

137 return result 

138 except Exception as exc: 

139 self.report = QuantumReport.from_exception( 

140 exception=exc, 

141 dataId=quantum.dataId, 

142 taskLabel=taskDef.label, 

143 ) 

144 raise 

145 

146 def _execute(self, taskDef, quantum, butler): 

147 """Internal implementation of execute()""" 

148 startTime = time.time() 

149 

150 with self.captureLogging(taskDef, quantum, butler) as captureLog: 

151 

152 # Save detailed resource usage before task start to metadata. 

153 quantumMetadata = _TASK_METADATA_TYPE() 

154 logInfo(None, "prep", metadata=quantumMetadata) 

155 

156 taskClass, label, config = taskDef.taskClass, taskDef.label, taskDef.config 

157 

158 # check whether to skip or delete old outputs, if it returns True 

159 # or raises an exception do not try to store logs, as they may be 

160 # already in butler. 

161 captureLog.store = False 

162 if self.checkExistingOutputs(quantum, butler, taskDef): 

163 _LOG.info( 

164 "Skipping already-successful quantum for label=%s dataId=%s.", label, quantum.dataId 

165 ) 

166 return 

167 captureLog.store = True 

168 

169 try: 

170 quantum = self.updatedQuantumInputs(quantum, butler, taskDef) 

171 except NoWorkFound as exc: 

172 _LOG.info( 

173 "Nothing to do for task '%s' on quantum %s; saving metadata and skipping: %s", 

174 taskDef.label, 

175 quantum.dataId, 

176 str(exc), 

177 ) 

178 # Make empty metadata that looks something like what a 

179 # do-nothing task would write (but we don't bother with empty 

180 # nested PropertySets for subtasks). This is slightly 

181 # duplicative with logic in pipe_base that we can't easily call 

182 # from here; we'll fix this on DM-29761. 

183 logInfo(None, "end", metadata=quantumMetadata) 

184 fullMetadata = _TASK_FULL_METADATA_TYPE() 

185 fullMetadata[taskDef.label] = _TASK_METADATA_TYPE() 

186 fullMetadata["quantum"] = quantumMetadata 

187 self.writeMetadata(quantum, fullMetadata, taskDef, butler) 

188 return 

189 

190 # enable lsstDebug debugging 

191 if self.enableLsstDebug: 

192 try: 

193 _LOG.debug("Will try to import debug.py") 

194 import debug # noqa:F401 

195 except ImportError: 

196 _LOG.warn("No 'debug' module found.") 

197 

198 # initialize global state 

199 self.initGlobals(quantum, butler) 

200 

201 # Ensure that we are executing a frozen config 

202 config.freeze() 

203 logInfo(None, "init", metadata=quantumMetadata) 

204 task = self.makeTask(taskClass, label, config, butler) 

205 logInfo(None, "start", metadata=quantumMetadata) 

206 try: 

207 if self.mock: 

208 # Use mock task instance to execute method. 

209 runTask = self._makeMockTask(taskDef) 

210 else: 

211 runTask = task 

212 self.runQuantum(runTask, quantum, taskDef, butler) 

213 except Exception as e: 

214 _LOG.error( 

215 "Execution of task '%s' on quantum %s failed. Exception %s: %s", 

216 taskDef.label, 

217 quantum.dataId, 

218 e.__class__.__name__, 

219 str(e), 

220 ) 

221 raise 

222 logInfo(None, "end", metadata=quantumMetadata) 

223 fullMetadata = task.getFullMetadata() 

224 fullMetadata["quantum"] = quantumMetadata 

225 self.writeMetadata(quantum, fullMetadata, taskDef, butler) 

226 stopTime = time.time() 

227 _LOG.info( 

228 "Execution of task '%s' on quantum %s took %.3f seconds", 

229 taskDef.label, 

230 quantum.dataId, 

231 stopTime - startTime, 

232 ) 

233 return quantum 

234 

235 def _makeMockTask(self, taskDef): 

236 """Make an instance of mock task for given TaskDef.""" 

237 # Make config instance and apply overrides 

238 overrides = ConfigOverrides() 

239 for action in self.mock_configs: 

240 if action.label == taskDef.label + "-mock": 

241 if action.action == "config": 

242 key, _, value = action.value.partition("=") 

243 overrides.addValueOverride(key, value) 

244 elif action.action == "configfile": 

245 overrides.addFileOverride(os.path.expandvars(action.value)) 

246 else: 

247 raise ValueError(f"Unexpected action for mock task config overrides: {action}") 

248 config = MockPipelineTask.ConfigClass() 

249 overrides.applyTo(config) 

250 

251 task = MockPipelineTask(config=config, name=taskDef.label) 

252 return task 

253 

254 @contextmanager 

255 def captureLogging(self, taskDef, quantum, butler): 

256 """Configure logging system to capture logs for execution of this task. 

257 

258 Parameters 

259 ---------- 

260 taskDef : `lsst.pipe.base.TaskDef` 

261 The task definition. 

262 quantum : `~lsst.daf.butler.Quantum` 

263 Single Quantum instance. 

264 butler : `~lsst.daf.butler.Butler` 

265 Butler to write logs to. 

266 

267 Notes 

268 ----- 

269 Expected to be used as a context manager to ensure that logging 

270 records are inserted into the butler once the quantum has been 

271 executed: 

272 

273 .. code-block:: py 

274 

275 with self.captureLogging(taskDef, quantum, butler): 

276 # Run quantum and capture logs. 

277 

278 Ths method can also setup logging to attach task- or 

279 quantum-specific information to log messages. Potentially this can 

280 take into account some info from task configuration as well. 

281 """ 

282 # Add a handler to the root logger to capture execution log output. 

283 # How does it get removed reliably? 

284 if taskDef.logOutputDatasetName is not None: 

285 # Either accumulate into ButlerLogRecords or stream 

286 # JSON records to file and ingest that. 

287 tmpdir = None 

288 if self.stream_json_logs: 

289 # Create the log file in a temporary directory rather than 

290 # creating a temporary file. This is necessary because 

291 # temporary files are created with restrictive permissions 

292 # and during file ingest these permissions persist in the 

293 # datastore. Using a temp directory allows us to create 

294 # a file with umask default permissions. 

295 tmpdir = tempfile.mkdtemp(prefix="butler-temp-logs-") 

296 

297 # Construct a file to receive the log records and "touch" it. 

298 log_file = os.path.join(tmpdir, f"butler-log-{taskDef.label}.json") 

299 with open(log_file, "w"): 

300 pass 

301 self.log_handler = FileHandler(log_file) 

302 self.log_handler.setFormatter(JsonLogFormatter()) 

303 else: 

304 self.log_handler = ButlerLogRecordHandler() 

305 

306 logging.getLogger().addHandler(self.log_handler) 

307 

308 # include quantum dataId and task label into MDC 

309 label = taskDef.label 

310 if quantum.dataId: 

311 label += f":{quantum.dataId}" 

312 

313 ctx = _LogCaptureFlag() 

314 try: 

315 with ButlerMDC.set_mdc({"LABEL": label, "RUN": butler.run}): 

316 yield ctx 

317 finally: 

318 # Ensure that the logs are stored in butler. 

319 self.writeLogRecords(quantum, taskDef, butler, ctx.store) 

320 if tmpdir: 

321 shutil.rmtree(tmpdir, ignore_errors=True) 

322 

323 def checkExistingOutputs(self, quantum, butler, taskDef): 

324 """Decide whether this quantum needs to be executed. 

325 

326 If only partial outputs exist then they are removed if 

327 ``clobberOutputs`` is True, otherwise an exception is raised. 

328 

329 Parameters 

330 ---------- 

331 quantum : `~lsst.daf.butler.Quantum` 

332 Quantum to check for existing outputs 

333 butler : `~lsst.daf.butler.Butler` 

334 Data butler. 

335 taskDef : `~lsst.pipe.base.TaskDef` 

336 Task definition structure. 

337 

338 Returns 

339 ------- 

340 exist : `bool` 

341 `True` if ``self.skipExistingIn`` is defined, and a previous 

342 execution of this quanta appears to have completed successfully 

343 (either because metadata was written or all datasets were written). 

344 `False` otherwise. 

345 

346 Raises 

347 ------ 

348 RuntimeError 

349 Raised if some outputs exist and some not. 

350 """ 

351 if self.skipExistingIn and taskDef.metadataDatasetName is not None: 

352 # Metadata output exists; this is sufficient to assume the previous 

353 # run was successful and should be skipped. 

354 ref = butler.registry.findDataset( 

355 taskDef.metadataDatasetName, quantum.dataId, collections=self.skipExistingIn 

356 ) 

357 if ref is not None: 

358 if butler.datastore.exists(ref): 

359 return True 

360 

361 # Previously we always checked for existing outputs in `butler.run`, 

362 # now logic gets more complicated as we only want to skip quantum 

363 # whose outputs exist in `self.skipExistingIn` but pruning should only 

364 # be done for outputs existing in `butler.run`. 

365 

366 def findOutputs(collections): 

367 """Find quantum outputs in specified collections.""" 

368 existingRefs = [] 

369 missingRefs = [] 

370 for datasetRefs in quantum.outputs.values(): 

371 checkRefs: List[DatasetRef] = [] 

372 registryRefToQuantumRef: Dict[DatasetRef, DatasetRef] = {} 

373 for datasetRef in datasetRefs: 

374 ref = butler.registry.findDataset( 

375 datasetRef.datasetType, datasetRef.dataId, collections=collections 

376 ) 

377 if ref is None: 

378 missingRefs.append(datasetRef) 

379 else: 

380 checkRefs.append(ref) 

381 registryRefToQuantumRef[ref] = datasetRef 

382 

383 # More efficient to ask the datastore in bulk for ref 

384 # existence rather than one at a time. 

385 existence = butler.datastore.mexists(checkRefs) 

386 for ref, exists in existence.items(): 

387 if exists: 

388 existingRefs.append(ref) 

389 else: 

390 missingRefs.append(registryRefToQuantumRef[ref]) 

391 return existingRefs, missingRefs 

392 

393 existingRefs, missingRefs = findOutputs(self.skipExistingIn) 

394 if self.skipExistingIn: 

395 if existingRefs and not missingRefs: 

396 # everything is already there 

397 return True 

398 

399 # If we are to re-run quantum then prune datasets that exists in 

400 # output run collection, only if `self.clobberOutputs` is set. 

401 if existingRefs: 

402 existingRefs, missingRefs = findOutputs(butler.run) 

403 if existingRefs and missingRefs: 

404 _LOG.debug( 

405 "Partial outputs exist for task %s dataId=%s collection=%s " 

406 "existingRefs=%s missingRefs=%s", 

407 taskDef, 

408 quantum.dataId, 

409 butler.run, 

410 existingRefs, 

411 missingRefs, 

412 ) 

413 if self.clobberOutputs: 

414 # only prune 

415 _LOG.info("Removing partial outputs for task %s: %s", taskDef, existingRefs) 

416 # Do not purge registry records if this looks like 

417 # an execution butler. This ensures that the UUID 

418 # of the dataset doesn't change. 

419 if butler._allow_put_of_predefined_dataset: 

420 purge = False 

421 disassociate = False 

422 else: 

423 purge = True 

424 disassociate = True 

425 butler.pruneDatasets(existingRefs, disassociate=disassociate, unstore=True, purge=purge) 

426 return False 

427 else: 

428 raise RuntimeError( 

429 f"Registry inconsistency while checking for existing outputs:" 

430 f" collection={butler.run} existingRefs={existingRefs}" 

431 f" missingRefs={missingRefs}" 

432 ) 

433 

434 # need to re-run 

435 return False 

436 

437 def makeTask(self, taskClass, name, config, butler): 

438 """Make new task instance. 

439 

440 Parameters 

441 ---------- 

442 taskClass : `type` 

443 Sub-class of `~lsst.pipe.base.PipelineTask`. 

444 name : `str` 

445 Name for this task. 

446 config : `~lsst.pipe.base.PipelineTaskConfig` 

447 Configuration object for this task 

448 

449 Returns 

450 ------- 

451 task : `~lsst.pipe.base.PipelineTask` 

452 Instance of ``taskClass`` type. 

453 butler : `~lsst.daf.butler.Butler` 

454 Data butler. 

455 """ 

456 # call task factory for that 

457 return self.taskFactory.makeTask(taskClass, name, config, None, butler) 

458 

459 def updatedQuantumInputs(self, quantum, butler, taskDef): 

460 """Update quantum with extra information, returns a new updated 

461 Quantum. 

462 

463 Some methods may require input DatasetRefs to have non-None 

464 ``dataset_id``, but in case of intermediate dataset it may not be 

465 filled during QuantumGraph construction. This method will retrieve 

466 missing info from registry. 

467 

468 Parameters 

469 ---------- 

470 quantum : `~lsst.daf.butler.Quantum` 

471 Single Quantum instance. 

472 butler : `~lsst.daf.butler.Butler` 

473 Data butler. 

474 taskDef : `~lsst.pipe.base.TaskDef` 

475 Task definition structure. 

476 

477 Returns 

478 ------- 

479 update : `~lsst.daf.butler.Quantum` 

480 Updated Quantum instance 

481 """ 

482 anyChanges = False 

483 updatedInputs = defaultdict(list) 

484 for key, refsForDatasetType in quantum.inputs.items(): 

485 newRefsForDatasetType = updatedInputs[key] 

486 for ref in refsForDatasetType: 

487 if ref.id is None: 

488 resolvedRef = butler.registry.findDataset( 

489 ref.datasetType, ref.dataId, collections=butler.collections 

490 ) 

491 if resolvedRef is None: 

492 _LOG.info("No dataset found for %s", ref) 

493 continue 

494 else: 

495 _LOG.debug("Updated dataset ID for %s", ref) 

496 else: 

497 resolvedRef = ref 

498 # We need to ask datastore if the dataset actually exists 

499 # because the Registry of a local "execution butler" cannot 

500 # know this (because we prepopulate it with all of the datasets 

501 # that might be created). In case of mock execution we check 

502 # that mock dataset exists instead. 

503 if self.mock: 

504 try: 

505 typeName, component = ref.datasetType.nameAndComponent() 

506 if component is not None: 

507 mockDatasetTypeName = MockButlerQuantumContext.mockDatasetTypeName(typeName) 

508 else: 

509 mockDatasetTypeName = MockButlerQuantumContext.mockDatasetTypeName( 

510 ref.datasetType.name 

511 ) 

512 

513 mockDatasetType = butler.registry.getDatasetType(mockDatasetTypeName) 

514 except KeyError: 

515 # means that mock dataset type is not there and this 

516 # should be a pre-existing dataset 

517 _LOG.debug("No mock dataset type for %s", ref) 

518 if butler.datastore.exists(resolvedRef): 

519 newRefsForDatasetType.append(resolvedRef) 

520 else: 

521 mockRef = DatasetRef(mockDatasetType, ref.dataId) 

522 resolvedMockRef = butler.registry.findDataset( 

523 mockRef.datasetType, mockRef.dataId, collections=butler.collections 

524 ) 

525 _LOG.debug("mockRef=%s resolvedMockRef=%s", mockRef, resolvedMockRef) 

526 if resolvedMockRef is not None and butler.datastore.exists(resolvedMockRef): 

527 _LOG.debug("resolvedMockRef dataset exists") 

528 newRefsForDatasetType.append(resolvedRef) 

529 elif butler.datastore.exists(resolvedRef): 

530 newRefsForDatasetType.append(resolvedRef) 

531 

532 if len(newRefsForDatasetType) != len(refsForDatasetType): 

533 anyChanges = True 

534 # If we removed any input datasets, let the task check if it has enough 

535 # to proceed and/or prune related datasets that it also doesn't 

536 # need/produce anymore. It will raise NoWorkFound if it can't run, 

537 # which we'll let propagate up. This is exactly what we run during QG 

538 # generation, because a task shouldn't care whether an input is missing 

539 # because some previous task didn't produce it, or because it just 

540 # wasn't there during QG generation. 

541 updatedInputs = NamedKeyDict[DatasetType, List[DatasetRef]](updatedInputs.items()) 

542 helper = AdjustQuantumHelper(updatedInputs, quantum.outputs) 

543 if anyChanges: 

544 helper.adjust_in_place(taskDef.connections, label=taskDef.label, data_id=quantum.dataId) 

545 return Quantum( 

546 taskName=quantum.taskName, 

547 taskClass=quantum.taskClass, 

548 dataId=quantum.dataId, 

549 initInputs=quantum.initInputs, 

550 inputs=helper.inputs, 

551 outputs=helper.outputs, 

552 ) 

553 

554 def runQuantum(self, task, quantum, taskDef, butler): 

555 """Execute task on a single quantum. 

556 

557 Parameters 

558 ---------- 

559 task : `~lsst.pipe.base.PipelineTask` 

560 Task object. 

561 quantum : `~lsst.daf.butler.Quantum` 

562 Single Quantum instance. 

563 taskDef : `~lsst.pipe.base.TaskDef` 

564 Task definition structure. 

565 butler : `~lsst.daf.butler.Butler` 

566 Data butler. 

567 """ 

568 # Create a butler that operates in the context of a quantum 

569 if self.mock: 

570 butlerQC = MockButlerQuantumContext(butler, quantum) 

571 else: 

572 butlerQC = ButlerQuantumContext(butler, quantum) 

573 

574 # Get the input and output references for the task 

575 inputRefs, outputRefs = taskDef.connections.buildDatasetRefs(quantum) 

576 

577 # Call task runQuantum() method. Catch a few known failure modes and 

578 # translate them into specific 

579 try: 

580 task.runQuantum(butlerQC, inputRefs, outputRefs) 

581 except NoWorkFound as err: 

582 # Not an error, just an early exit. 

583 _LOG.info("Task '%s' on quantum %s exited early: %s", taskDef.label, quantum.dataId, str(err)) 

584 pass 

585 except RepeatableQuantumError as err: 

586 if self.exitOnKnownError: 

587 _LOG.warning("Caught repeatable quantum error for %s (%s):", taskDef, quantum.dataId) 

588 _LOG.warning(err, exc_info=True) 

589 sys.exit(err.EXIT_CODE) 

590 else: 

591 raise 

592 except InvalidQuantumError as err: 

593 _LOG.fatal("Invalid quantum error for %s (%s): %s", taskDef, quantum.dataId) 

594 _LOG.fatal(err, exc_info=True) 

595 sys.exit(err.EXIT_CODE) 

596 

597 def writeMetadata(self, quantum, metadata, taskDef, butler): 

598 if taskDef.metadataDatasetName is not None: 

599 # DatasetRef has to be in the Quantum outputs, can lookup by name 

600 try: 

601 ref = quantum.outputs[taskDef.metadataDatasetName] 

602 except LookupError as exc: 

603 raise InvalidQuantumError( 

604 f"Quantum outputs is missing metadata dataset type {taskDef.metadataDatasetName};" 

605 f" this could happen due to inconsistent options between QuantumGraph generation" 

606 f" and execution" 

607 ) from exc 

608 butler.put(metadata, ref[0]) 

609 

610 def writeLogRecords(self, quantum, taskDef, butler, store): 

611 # If we are logging to an external file we must always try to 

612 # close it. 

613 filename = None 

614 if isinstance(self.log_handler, FileHandler): 

615 filename = self.log_handler.stream.name 

616 self.log_handler.close() 

617 

618 if self.log_handler is not None: 

619 # Remove the handler so we stop accumulating log messages. 

620 logging.getLogger().removeHandler(self.log_handler) 

621 

622 try: 

623 if store and taskDef.logOutputDatasetName is not None and self.log_handler is not None: 

624 # DatasetRef has to be in the Quantum outputs, can lookup by 

625 # name 

626 try: 

627 ref = quantum.outputs[taskDef.logOutputDatasetName] 

628 except LookupError as exc: 

629 raise InvalidQuantumError( 

630 f"Quantum outputs is missing log output dataset type {taskDef.logOutputDatasetName};" 

631 f" this could happen due to inconsistent options between QuantumGraph generation" 

632 f" and execution" 

633 ) from exc 

634 

635 if isinstance(self.log_handler, ButlerLogRecordHandler): 

636 butler.put(self.log_handler.records, ref[0]) 

637 

638 # Clear the records in case the handler is reused. 

639 self.log_handler.records.clear() 

640 else: 

641 assert filename is not None, "Somehow unable to extract filename from file handler" 

642 

643 # Need to ingest this file directly into butler. 

644 dataset = FileDataset(path=filename, refs=ref[0]) 

645 try: 

646 butler.ingest(dataset, transfer="move") 

647 filename = None 

648 except NotImplementedError: 

649 # Some datastores can't receive files (e.g. in-memory 

650 # datastore when testing), we store empty list for 

651 # those just to have a dataset. Alternative is to read 

652 # the file as a ButlerLogRecords object and put it. 

653 _LOG.info( 

654 "Log records could not be stored in this butler because the" 

655 " datastore can not ingest files, empty record list is stored instead." 

656 ) 

657 records = ButlerLogRecords.from_records([]) 

658 butler.put(records, ref[0]) 

659 finally: 

660 # remove file if it is not ingested 

661 if filename is not None: 

662 try: 

663 os.remove(filename) 

664 except OSError: 

665 pass 

666 

667 def initGlobals(self, quantum, butler): 

668 """Initialize global state needed for task execution. 

669 

670 Parameters 

671 ---------- 

672 quantum : `~lsst.daf.butler.Quantum` 

673 Single Quantum instance. 

674 butler : `~lsst.daf.butler.Butler` 

675 Data butler. 

676 

677 Notes 

678 ----- 

679 There is an issue with initializing filters singleton which is done 

680 by instrument, to avoid requiring tasks to do it in runQuantum() 

681 we do it here when any dataId has an instrument dimension. Also for 

682 now we only allow single instrument, verify that all instrument 

683 names in all dataIds are identical. 

684 

685 This will need revision when filter singleton disappears. 

686 """ 

687 oneInstrument = None 

688 for datasetRefs in chain(quantum.inputs.values(), quantum.outputs.values()): 

689 for datasetRef in datasetRefs: 

690 dataId = datasetRef.dataId 

691 instrument = dataId.get("instrument") 

692 if instrument is not None: 

693 if oneInstrument is not None: 

694 assert ( 

695 instrument == oneInstrument 

696 ), "Currently require that only one instrument is used per graph" 

697 else: 

698 oneInstrument = instrument 

699 Instrument.fromName(instrument, butler.registry) 

700 

701 def getReport(self) -> Optional[QuantumReport]: 

702 # Docstring inherited from base class 

703 if self.report is None: 

704 raise RuntimeError("getReport() called before execute()") 

705 return self.report