Coverage for python/lsst/ctrl/mpexec/singleQuantumExecutor.py: 9%
258 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-03-30 02:51 -0700
« prev ^ index » next coverage.py v6.5.0, created at 2023-03-30 02:51 -0700
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22__all__ = ["SingleQuantumExecutor"]
24# -------------------------------
25# Imports of standard modules --
26# -------------------------------
27import logging
28import os
29import sys
30import time
31from collections import defaultdict
32from collections.abc import Callable
33from itertools import chain
34from typing import Any, Optional, Union
36from lsst.daf.butler import Butler, DatasetRef, DatasetType, LimitedButler, NamedKeyDict, Quantum
37from lsst.pipe.base import (
38 AdjustQuantumHelper,
39 ButlerQuantumContext,
40 Instrument,
41 InvalidQuantumError,
42 NoWorkFound,
43 PipelineTask,
44 RepeatableQuantumError,
45 TaskDef,
46 TaskFactory,
47)
48from lsst.pipe.base.configOverrides import ConfigOverrides
50# During metadata transition phase, determine metadata class by
51# asking pipe_base
52from lsst.pipe.base.task import _TASK_FULL_METADATA_TYPE, _TASK_METADATA_TYPE
53from lsst.utils.timer import logInfo
55# -----------------------------
56# Imports for other modules --
57# -----------------------------
58from .cli.utils import _PipelineAction
59from .log_capture import LogCapture
60from .mock_task import MockButlerQuantumContext, MockPipelineTask
61from .quantumGraphExecutor import QuantumExecutor
62from .reports import QuantumReport
64# ----------------------------------
65# Local non-exported definitions --
66# ----------------------------------
68_LOG = logging.getLogger(__name__)
71class SingleQuantumExecutor(QuantumExecutor):
72 """Executor class which runs one Quantum at a time.
74 Parameters
75 ----------
76 butler : `~lsst.daf.butler.Butler` or `None`
77 Data butler, `None` means that Quantum-backed butler should be used
78 instead.
79 taskFactory : `~lsst.pipe.base.TaskFactory`
80 Instance of a task factory.
81 skipExistingIn : `list` [ `str` ], optional
82 Accepts list of collections, if all Quantum outputs already exist in
83 the specified list of collections then that Quantum will not be rerun.
84 clobberOutputs : `bool`, optional
85 If `True`, then existing outputs in output run collection will be
86 overwritten. If ``skipExistingIn`` is defined, only outputs from
87 failed quanta will be overwritten. Only used when ``butler`` is not
88 `None`.
89 enableLsstDebug : `bool`, optional
90 Enable debugging with ``lsstDebug`` facility for a task.
91 exitOnKnownError : `bool`, optional
92 If `True`, call `sys.exit` with the appropriate exit code for special
93 known exceptions, after printing a traceback, instead of letting the
94 exception propagate up to calling. This is always the behavior for
95 InvalidQuantumError.
96 mock : `bool`, optional
97 If `True` then mock task execution.
98 mock_configs : `list` [ `_PipelineAction` ], optional
99 Optional config overrides for mock tasks.
100 limited_butler_factory : `Callable`, optional
101 A method that creates a `~lsst.daf.butler.LimitedButler` instance
102 for a given Quantum. This parameter must be defined if ``butler`` is
103 `None`. If ``butler`` is not `None` then this parameter is ignored.
104 """
106 def __init__(
107 self,
108 butler: Butler | None,
109 taskFactory: TaskFactory,
110 skipExistingIn: list[str] | None = None,
111 clobberOutputs: bool = False,
112 enableLsstDebug: bool = False,
113 exitOnKnownError: bool = False,
114 mock: bool = False,
115 mock_configs: list[_PipelineAction] | None = None,
116 limited_butler_factory: Callable[[Quantum], LimitedButler] | None = None,
117 ):
118 self.butler = butler
119 self.taskFactory = taskFactory
120 self.skipExistingIn = skipExistingIn
121 self.enableLsstDebug = enableLsstDebug
122 self.clobberOutputs = clobberOutputs
123 self.exitOnKnownError = exitOnKnownError
124 self.mock = mock
125 self.mock_configs = mock_configs if mock_configs is not None else []
126 self.limited_butler_factory = limited_butler_factory
127 self.report: Optional[QuantumReport] = None
129 if self.butler is None:
130 assert not self.mock, "Mock execution only possible with full butler"
131 assert limited_butler_factory is not None, "limited_butler_factory is needed when butler is None"
133 def execute(self, taskDef: TaskDef, quantum: Quantum) -> Quantum:
134 # Docstring inherited from QuantumExecutor.execute
135 assert quantum.dataId is not None, "Quantum DataId cannot be None"
137 if self.butler is not None:
138 self.butler.registry.refresh()
140 # Catch any exception and make a report based on that.
141 try:
142 result = self._execute(taskDef, quantum)
143 self.report = QuantumReport(dataId=quantum.dataId, taskLabel=taskDef.label)
144 return result
145 except Exception as exc:
146 self.report = QuantumReport.from_exception(
147 exception=exc,
148 dataId=quantum.dataId,
149 taskLabel=taskDef.label,
150 )
151 raise
153 def _resolve_ref(self, ref: DatasetRef, collections: Any = None) -> DatasetRef | None:
154 """Return resolved reference.
156 Parameters
157 ----------
158 ref : `DatasetRef`
159 Input reference, can be either resolved or unresolved.
160 collections :
161 Collections to search for the existing reference, only used when
162 running with full butler.
164 Notes
165 -----
166 When running with Quantum-backed butler it assumes that reference is
167 already resolved and returns input references without any checks. When
168 running with full butler, it always searches registry fof a reference
169 in specified collections, even if reference is already resolved.
170 """
171 if self.butler is not None:
172 # If running with full butler, need to re-resolve it in case
173 # collections are different.
174 ref = ref.unresolved()
175 return self.butler.registry.findDataset(ref.datasetType, ref.dataId, collections=collections)
176 else:
177 # In case of QBB all refs must be resolved already, do not check.
178 return ref
180 def _execute(self, taskDef: TaskDef, quantum: Quantum) -> Quantum:
181 """Internal implementation of execute()"""
182 startTime = time.time()
184 # Make a limited butler instance if needed (which should be QBB if full
185 # butler is not defined).
186 limited_butler: LimitedButler
187 if self.butler is not None:
188 limited_butler = self.butler
189 else:
190 # We check this in constructor, but mypy needs this check here.
191 assert self.limited_butler_factory is not None
192 limited_butler = self.limited_butler_factory(quantum)
194 if self.butler is not None:
195 log_capture = LogCapture.from_full(self.butler)
196 else:
197 log_capture = LogCapture.from_limited(limited_butler)
198 with log_capture.capture_logging(taskDef, quantum) as captureLog:
199 # Save detailed resource usage before task start to metadata.
200 quantumMetadata = _TASK_METADATA_TYPE()
201 logInfo(None, "prep", metadata=quantumMetadata) # type: ignore[arg-type]
203 # check whether to skip or delete old outputs, if it returns True
204 # or raises an exception do not try to store logs, as they may be
205 # already in butler.
206 captureLog.store = False
207 if self.checkExistingOutputs(quantum, taskDef, limited_butler):
208 _LOG.info(
209 "Skipping already-successful quantum for label=%s dataId=%s.",
210 taskDef.label,
211 quantum.dataId,
212 )
213 return quantum
214 captureLog.store = True
216 try:
217 quantum = self.updatedQuantumInputs(quantum, taskDef, limited_butler)
218 except NoWorkFound as exc:
219 _LOG.info(
220 "Nothing to do for task '%s' on quantum %s; saving metadata and skipping: %s",
221 taskDef.label,
222 quantum.dataId,
223 str(exc),
224 )
225 # Make empty metadata that looks something like what a
226 # do-nothing task would write (but we don't bother with empty
227 # nested PropertySets for subtasks). This is slightly
228 # duplicative with logic in pipe_base that we can't easily call
229 # from here; we'll fix this on DM-29761.
230 logInfo(None, "end", metadata=quantumMetadata) # type: ignore[arg-type]
231 fullMetadata = _TASK_FULL_METADATA_TYPE()
232 fullMetadata[taskDef.label] = _TASK_METADATA_TYPE()
233 fullMetadata["quantum"] = quantumMetadata
234 self.writeMetadata(quantum, fullMetadata, taskDef, limited_butler)
235 return quantum
237 # enable lsstDebug debugging
238 if self.enableLsstDebug:
239 try:
240 _LOG.debug("Will try to import debug.py")
241 import debug # type: ignore # noqa:F401
242 except ImportError:
243 _LOG.warn("No 'debug' module found.")
245 # initialize global state
246 self.initGlobals(quantum)
248 # Ensure that we are executing a frozen config
249 taskDef.config.freeze()
250 logInfo(None, "init", metadata=quantumMetadata) # type: ignore[arg-type]
251 init_input_refs = []
252 for ref in quantum.initInputs.values():
253 resolved = self._resolve_ref(ref)
254 if resolved is None:
255 raise ValueError(f"Failed to resolve init input reference {ref}")
256 init_input_refs.append(resolved)
257 task = self.taskFactory.makeTask(taskDef, limited_butler, init_input_refs)
258 logInfo(None, "start", metadata=quantumMetadata) # type: ignore[arg-type]
259 try:
260 if self.mock:
261 # Use mock task instance to execute method.
262 runTask = self._makeMockTask(taskDef)
263 else:
264 runTask = task
265 self.runQuantum(runTask, quantum, taskDef, limited_butler)
266 except Exception as e:
267 _LOG.error(
268 "Execution of task '%s' on quantum %s failed. Exception %s: %s",
269 taskDef.label,
270 quantum.dataId,
271 e.__class__.__name__,
272 str(e),
273 )
274 raise
275 logInfo(None, "end", metadata=quantumMetadata) # type: ignore[arg-type]
276 fullMetadata = task.getFullMetadata()
277 fullMetadata["quantum"] = quantumMetadata
278 self.writeMetadata(quantum, fullMetadata, taskDef, limited_butler)
279 stopTime = time.time()
280 _LOG.info(
281 "Execution of task '%s' on quantum %s took %.3f seconds",
282 taskDef.label,
283 quantum.dataId,
284 stopTime - startTime,
285 )
286 return quantum
288 def _makeMockTask(self, taskDef: TaskDef) -> PipelineTask:
289 """Make an instance of mock task for given TaskDef."""
290 # Make config instance and apply overrides
291 overrides = ConfigOverrides()
292 for action in self.mock_configs:
293 if action.label == taskDef.label + "-mock":
294 if action.action == "config":
295 key, _, value = action.value.partition("=")
296 overrides.addValueOverride(key, value)
297 elif action.action == "configfile":
298 overrides.addFileOverride(os.path.expandvars(action.value))
299 else:
300 raise ValueError(f"Unexpected action for mock task config overrides: {action}")
301 config = MockPipelineTask.ConfigClass()
302 overrides.applyTo(config)
304 task = MockPipelineTask(config=config, name=taskDef.label)
305 return task
307 def checkExistingOutputs(self, quantum: Quantum, taskDef: TaskDef, limited_butler: LimitedButler) -> bool:
308 """Decide whether this quantum needs to be executed.
310 If only partial outputs exist then they are removed if
311 ``clobberOutputs`` is True, otherwise an exception is raised.
313 Parameters
314 ----------
315 quantum : `~lsst.daf.butler.Quantum`
316 Quantum to check for existing outputs
317 taskDef : `~lsst.pipe.base.TaskDef`
318 Task definition structure.
320 Returns
321 -------
322 exist : `bool`
323 `True` if ``self.skipExistingIn`` is defined, and a previous
324 execution of this quanta appears to have completed successfully
325 (either because metadata was written or all datasets were written).
326 `False` otherwise.
328 Raises
329 ------
330 RuntimeError
331 Raised if some outputs exist and some not.
332 """
333 if self.skipExistingIn and taskDef.metadataDatasetName is not None:
334 # Metadata output exists; this is sufficient to assume the previous
335 # run was successful and should be skipped.
336 [metadata_ref] = quantum.outputs[taskDef.metadataDatasetName]
337 ref = self._resolve_ref(metadata_ref, self.skipExistingIn)
338 if ref is not None:
339 if limited_butler.datastore.exists(ref):
340 return True
342 # Previously we always checked for existing outputs in `butler.run`,
343 # now logic gets more complicated as we only want to skip quantum
344 # whose outputs exist in `self.skipExistingIn` but pruning should only
345 # be done for outputs existing in `butler.run`.
347 def findOutputs(
348 collections: Optional[Union[str, list[str]]]
349 ) -> tuple[list[DatasetRef], list[DatasetRef]]:
350 """Find quantum outputs in specified collections."""
351 existingRefs = []
352 missingRefs = []
353 for datasetRefs in quantum.outputs.values():
354 checkRefs: list[DatasetRef] = []
355 registryRefToQuantumRef: dict[DatasetRef, DatasetRef] = {}
356 for datasetRef in datasetRefs:
357 ref = self._resolve_ref(datasetRef, collections)
358 if ref is None:
359 missingRefs.append(datasetRef)
360 else:
361 checkRefs.append(ref)
362 registryRefToQuantumRef[ref] = datasetRef
364 # More efficient to ask the datastore in bulk for ref
365 # existence rather than one at a time.
366 existence = limited_butler.datastore.mexists(checkRefs)
367 for ref, exists in existence.items():
368 if exists:
369 existingRefs.append(ref)
370 else:
371 missingRefs.append(registryRefToQuantumRef[ref])
372 return existingRefs, missingRefs
374 existingRefs, missingRefs = findOutputs(self.skipExistingIn)
375 if self.skipExistingIn:
376 if existingRefs and not missingRefs:
377 # everything is already there
378 return True
380 # If we are to re-run quantum then prune datasets that exists in
381 # output run collection, only if `self.clobberOutputs` is set,
382 # that only works when we have full butler.
383 if existingRefs and self.butler is not None:
384 existingRefs, missingRefs = findOutputs(self.butler.run)
385 if existingRefs and missingRefs:
386 _LOG.debug(
387 "Partial outputs exist for task %s dataId=%s collection=%s "
388 "existingRefs=%s missingRefs=%s",
389 taskDef,
390 quantum.dataId,
391 self.butler.run,
392 existingRefs,
393 missingRefs,
394 )
395 if self.clobberOutputs:
396 # only prune
397 _LOG.info("Removing partial outputs for task %s: %s", taskDef, existingRefs)
398 self.butler.pruneDatasets(existingRefs, disassociate=True, unstore=True, purge=True)
399 return False
400 else:
401 raise RuntimeError(
402 "Registry inconsistency while checking for existing outputs:"
403 f" collection={self.butler.run} existingRefs={existingRefs}"
404 f" missingRefs={missingRefs}"
405 )
407 # need to re-run
408 return False
410 def updatedQuantumInputs(
411 self, quantum: Quantum, taskDef: TaskDef, limited_butler: LimitedButler
412 ) -> Quantum:
413 """Update quantum with extra information, returns a new updated
414 Quantum.
416 Some methods may require input DatasetRefs to have non-None
417 ``dataset_id``, but in case of intermediate dataset it may not be
418 filled during QuantumGraph construction. This method will retrieve
419 missing info from registry.
421 Parameters
422 ----------
423 quantum : `~lsst.daf.butler.Quantum`
424 Single Quantum instance.
425 taskDef : `~lsst.pipe.base.TaskDef`
426 Task definition structure.
428 Returns
429 -------
430 update : `~lsst.daf.butler.Quantum`
431 Updated Quantum instance
432 """
433 anyChanges = False
434 updatedInputs: defaultdict[DatasetType, list] = defaultdict(list)
435 for key, refsForDatasetType in quantum.inputs.items():
436 newRefsForDatasetType = updatedInputs[key]
437 for ref in refsForDatasetType:
438 # Inputs may already be resolved even if they do not exist, but
439 # we have to re-resolve them because IDs are ignored on output.
440 # Check datastore for existence first to cover calibration
441 # dataset types, as they would need a timespan for findDataset.
442 resolvedRef: DatasetRef | None
443 checked_datastore = False
444 if ref.id is not None and limited_butler.datastore.exists(ref):
445 resolvedRef = ref
446 checked_datastore = True
447 elif self.butler is not None:
448 # In case of full butler try to (re-)resolve it.
449 resolvedRef = self._resolve_ref(ref)
450 if resolvedRef is None:
451 _LOG.info("No dataset found for %s", ref)
452 continue
453 else:
454 _LOG.debug("Updated dataset ID for %s", ref)
455 else:
456 # QBB with missing intermediate
457 _LOG.info("No dataset found for %s", ref)
458 continue
460 # In case of mock execution we check that mock dataset exists
461 # instead. Mock execution is only possible with full butler.
462 if self.mock and self.butler is not None:
463 try:
464 typeName, component = ref.datasetType.nameAndComponent()
465 if component is not None:
466 mockDatasetTypeName = MockButlerQuantumContext.mockDatasetTypeName(typeName)
467 else:
468 mockDatasetTypeName = MockButlerQuantumContext.mockDatasetTypeName(
469 ref.datasetType.name
470 )
472 mockDatasetType = self.butler.registry.getDatasetType(mockDatasetTypeName)
473 except KeyError:
474 # means that mock dataset type is not there and this
475 # should be a pre-existing dataset
476 _LOG.debug("No mock dataset type for %s", ref)
477 if self.butler.datastore.exists(resolvedRef):
478 newRefsForDatasetType.append(resolvedRef)
479 else:
480 mockRef = DatasetRef(mockDatasetType, ref.dataId)
481 resolvedMockRef = self.butler.registry.findDataset(
482 mockRef.datasetType, mockRef.dataId, collections=self.butler.collections
483 )
484 _LOG.debug("mockRef=%s resolvedMockRef=%s", mockRef, resolvedMockRef)
485 if resolvedMockRef is not None and self.butler.datastore.exists(resolvedMockRef):
486 _LOG.debug("resolvedMockRef dataset exists")
487 newRefsForDatasetType.append(resolvedRef)
488 elif checked_datastore or limited_butler.datastore.exists(resolvedRef):
489 # We need to ask datastore if the dataset actually exists
490 # because the Registry of a local "execution butler"
491 # cannot know this (because we prepopulate it with all of
492 # the datasets that might be created).
493 newRefsForDatasetType.append(resolvedRef)
495 if len(newRefsForDatasetType) != len(refsForDatasetType):
496 anyChanges = True
497 # If we removed any input datasets, let the task check if it has enough
498 # to proceed and/or prune related datasets that it also doesn't
499 # need/produce anymore. It will raise NoWorkFound if it can't run,
500 # which we'll let propagate up. This is exactly what we run during QG
501 # generation, because a task shouldn't care whether an input is missing
502 # because some previous task didn't produce it, or because it just
503 # wasn't there during QG generation.
504 namedUpdatedInputs = NamedKeyDict[DatasetType, list[DatasetRef]](updatedInputs.items())
505 helper = AdjustQuantumHelper(namedUpdatedInputs, quantum.outputs)
506 if anyChanges:
507 assert quantum.dataId is not None, "Quantum DataId cannot be None"
508 helper.adjust_in_place(taskDef.connections, label=taskDef.label, data_id=quantum.dataId)
509 return Quantum(
510 taskName=quantum.taskName,
511 taskClass=quantum.taskClass,
512 dataId=quantum.dataId,
513 initInputs=quantum.initInputs,
514 inputs=helper.inputs,
515 outputs=helper.outputs,
516 )
518 def runQuantum(
519 self, task: PipelineTask, quantum: Quantum, taskDef: TaskDef, limited_butler: LimitedButler
520 ) -> None:
521 """Execute task on a single quantum.
523 Parameters
524 ----------
525 task : `~lsst.pipe.base.PipelineTask`
526 Task object.
527 quantum : `~lsst.daf.butler.Quantum`
528 Single Quantum instance.
529 taskDef : `~lsst.pipe.base.TaskDef`
530 Task definition structure.
531 """
532 # Create a butler that operates in the context of a quantum
533 if self.butler is None:
534 butlerQC = ButlerQuantumContext.from_limited(limited_butler, quantum)
535 else:
536 if self.mock:
537 butlerQC = MockButlerQuantumContext(self.butler, quantum)
538 else:
539 butlerQC = ButlerQuantumContext.from_full(self.butler, quantum)
541 # Get the input and output references for the task
542 inputRefs, outputRefs = taskDef.connections.buildDatasetRefs(quantum)
544 # Call task runQuantum() method. Catch a few known failure modes and
545 # translate them into specific
546 try:
547 task.runQuantum(butlerQC, inputRefs, outputRefs)
548 except NoWorkFound as err:
549 # Not an error, just an early exit.
550 _LOG.info("Task '%s' on quantum %s exited early: %s", taskDef.label, quantum.dataId, str(err))
551 pass
552 except RepeatableQuantumError as err:
553 if self.exitOnKnownError:
554 _LOG.warning("Caught repeatable quantum error for %s (%s):", taskDef, quantum.dataId)
555 _LOG.warning(err, exc_info=True)
556 sys.exit(err.EXIT_CODE)
557 else:
558 raise
559 except InvalidQuantumError as err:
560 _LOG.fatal("Invalid quantum error for %s (%s): %s", taskDef, quantum.dataId)
561 _LOG.fatal(err, exc_info=True)
562 sys.exit(err.EXIT_CODE)
564 def writeMetadata(
565 self, quantum: Quantum, metadata: Any, taskDef: TaskDef, limited_butler: LimitedButler
566 ) -> None:
567 if taskDef.metadataDatasetName is not None:
568 # DatasetRef has to be in the Quantum outputs, can lookup by name
569 try:
570 [ref] = quantum.outputs[taskDef.metadataDatasetName]
571 except LookupError as exc:
572 raise InvalidQuantumError(
573 f"Quantum outputs is missing metadata dataset type {taskDef.metadataDatasetName};"
574 " this could happen due to inconsistent options between QuantumGraph generation"
575 " and execution"
576 ) from exc
577 if self.butler is not None:
578 # Dataset ref can already be resolved, for non-QBB executor we
579 # have to ignore that because may be overriding run
580 # collection.
581 if ref.id is not None:
582 ref = ref.unresolved()
583 self.butler.put(metadata, ref)
584 else:
585 limited_butler.putDirect(metadata, ref)
587 def initGlobals(self, quantum: Quantum) -> None:
588 """Initialize global state needed for task execution.
590 Parameters
591 ----------
592 quantum : `~lsst.daf.butler.Quantum`
593 Single Quantum instance.
595 Notes
596 -----
597 There is an issue with initializing filters singleton which is done
598 by instrument, to avoid requiring tasks to do it in runQuantum()
599 we do it here when any dataId has an instrument dimension. Also for
600 now we only allow single instrument, verify that all instrument
601 names in all dataIds are identical.
603 This will need revision when filter singleton disappears.
604 """
605 # can only work for full butler
606 if self.butler is None:
607 return
608 oneInstrument = None
609 for datasetRefs in chain(quantum.inputs.values(), quantum.outputs.values()):
610 for datasetRef in datasetRefs:
611 dataId = datasetRef.dataId
612 instrument = dataId.get("instrument")
613 if instrument is not None:
614 if oneInstrument is not None:
615 assert ( # type: ignore
616 instrument == oneInstrument
617 ), "Currently require that only one instrument is used per graph"
618 else:
619 oneInstrument = instrument
620 Instrument.fromName(instrument, self.butler.registry)
622 def getReport(self) -> Optional[QuantumReport]:
623 # Docstring inherited from base class
624 if self.report is None:
625 raise RuntimeError("getReport() called before execute()")
626 return self.report