Coverage for python/lsst/ctrl/mpexec/singleQuantumExecutor.py: 8%
261 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-02-07 02:42 -0800
« prev ^ index » next coverage.py v6.5.0, created at 2023-02-07 02:42 -0800
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22__all__ = ["SingleQuantumExecutor"]
24# -------------------------------
25# Imports of standard modules --
26# -------------------------------
27import logging
28import os
29import sys
30import time
31from collections import defaultdict
32from itertools import chain
33from typing import Any, Optional, Union
35from lsst.daf.butler import (
36 Butler,
37 Config,
38 DatasetRef,
39 DatasetType,
40 DimensionUniverse,
41 LimitedButler,
42 NamedKeyDict,
43 Quantum,
44 QuantumBackedButler,
45)
46from lsst.pipe.base import (
47 AdjustQuantumHelper,
48 ButlerQuantumContext,
49 Instrument,
50 InvalidQuantumError,
51 NoWorkFound,
52 PipelineTask,
53 RepeatableQuantumError,
54 TaskDef,
55 TaskFactory,
56)
57from lsst.pipe.base.configOverrides import ConfigOverrides
59# During metadata transition phase, determine metadata class by
60# asking pipe_base
61from lsst.pipe.base.task import _TASK_FULL_METADATA_TYPE, _TASK_METADATA_TYPE
62from lsst.utils.timer import logInfo
64# -----------------------------
65# Imports for other modules --
66# -----------------------------
67from .cli.utils import _PipelineAction
68from .log_capture import LogCapture
69from .mock_task import MockButlerQuantumContext, MockPipelineTask
70from .quantumGraphExecutor import QuantumExecutor
71from .reports import QuantumReport
73# ----------------------------------
74# Local non-exported definitions --
75# ----------------------------------
77_LOG = logging.getLogger(__name__)
80class SingleQuantumExecutor(QuantumExecutor):
81 """Executor class which runs one Quantum at a time.
83 Parameters
84 ----------
85 butler : `~lsst.daf.butler.Butler` or `None`
86 Data butler, `None` means that Quantum-backed butler should be used
87 instead.
88 taskFactory : `~lsst.pipe.base.TaskFactory`
89 Instance of a task factory.
90 skipExistingIn : `list` [ `str` ], optional
91 Accepts list of collections, if all Quantum outputs already exist in
92 the specified list of collections then that Quantum will not be rerun.
93 clobberOutputs : `bool`, optional
94 If `True`, then existing outputs in output run collection will be
95 overwritten. If ``skipExistingIn`` is defined, only outputs from
96 failed quanta will be overwritten. Only used when ``butler`` is not
97 `None`.
98 enableLsstDebug : `bool`, optional
99 Enable debugging with ``lsstDebug`` facility for a task.
100 exitOnKnownError : `bool`, optional
101 If `True`, call `sys.exit` with the appropriate exit code for special
102 known exceptions, after printing a traceback, instead of letting the
103 exception propagate up to calling. This is always the behavior for
104 InvalidQuantumError.
105 mock : `bool`, optional
106 If `True` then mock task execution.
107 mock_configs : `list` [ `_PipelineAction` ], optional
108 Optional config overrides for mock tasks.
109 """
111 def __init__(
112 self,
113 butler: Butler | None,
114 taskFactory: TaskFactory,
115 skipExistingIn: list[str] | None = None,
116 clobberOutputs: bool = False,
117 enableLsstDebug: bool = False,
118 exitOnKnownError: bool = False,
119 mock: bool = False,
120 mock_configs: list[_PipelineAction] | None = None,
121 butler_config: Config | str | None = None,
122 universe: DimensionUniverse | None = None,
123 ):
124 self.butler = butler
125 self.taskFactory = taskFactory
126 self.skipExistingIn = skipExistingIn
127 self.enableLsstDebug = enableLsstDebug
128 self.clobberOutputs = clobberOutputs
129 self.exitOnKnownError = exitOnKnownError
130 self.mock = mock
131 self.mock_configs = mock_configs if mock_configs is not None else []
132 self.butler_config = butler_config
133 self.universe = universe
134 self.report: Optional[QuantumReport] = None
136 if self.butler is None:
137 assert not self.mock, "Mock execution only possible with full butler"
138 if self.butler is not None:
139 assert butler_config is None and universe is None
140 if self.butler is None:
141 assert butler_config is not None and universe is not None
143 def execute(self, taskDef: TaskDef, quantum: Quantum) -> Quantum:
144 # Docstring inherited from QuantumExecutor.execute
145 assert quantum.dataId is not None, "Quantum DataId cannot be None"
147 if self.butler is not None:
148 self.butler.registry.refresh()
150 # Catch any exception and make a report based on that.
151 try:
152 result = self._execute(taskDef, quantum)
153 self.report = QuantumReport(dataId=quantum.dataId, taskLabel=taskDef.label)
154 return result
155 except Exception as exc:
156 self.report = QuantumReport.from_exception(
157 exception=exc,
158 dataId=quantum.dataId,
159 taskLabel=taskDef.label,
160 )
161 raise
163 def _resolve_ref(self, ref: DatasetRef, collections: Any = None) -> DatasetRef | None:
164 """Return resolved reference.
166 Parameters
167 ----------
168 ref : `DatasetRef`
169 Input reference, can be either resolved or unresolved.
170 collections :
171 Collections to search for the existing reference, only used when
172 running with full butler.
174 Notes
175 -----
176 When running with Quantum-backed butler it assumes that reference is
177 already resolved and returns input references without any checks. When
178 running with full butler, it always searches registry fof a reference
179 in specified collections, even if reference is already resolved.
180 """
181 if self.butler is not None:
182 # If running with full butler, need to re-resolve it in case
183 # collections are different.
184 ref = ref.unresolved()
185 return self.butler.registry.findDataset(ref.datasetType, ref.dataId, collections=collections)
186 else:
187 # In case of QBB all refs must be resolved already, do not check.
188 return ref
190 def _execute(self, taskDef: TaskDef, quantum: Quantum) -> Quantum:
191 """Internal implementation of execute()"""
192 startTime = time.time()
194 # Make butler instance
195 limited_butler: LimitedButler
196 if self.butler is not None:
197 limited_butler = self.butler
198 else:
199 assert self.butler_config is not None and self.universe is not None
200 limited_butler = QuantumBackedButler.initialize(
201 config=self.butler_config,
202 quantum=quantum,
203 dimensions=self.universe,
204 )
206 if self.butler is not None:
207 log_capture = LogCapture.from_full(self.butler)
208 else:
209 log_capture = LogCapture.from_limited(limited_butler)
210 with log_capture.capture_logging(taskDef, quantum) as captureLog:
211 # Save detailed resource usage before task start to metadata.
212 quantumMetadata = _TASK_METADATA_TYPE()
213 logInfo(None, "prep", metadata=quantumMetadata) # type: ignore[arg-type]
215 # check whether to skip or delete old outputs, if it returns True
216 # or raises an exception do not try to store logs, as they may be
217 # already in butler.
218 captureLog.store = False
219 if self.checkExistingOutputs(quantum, taskDef, limited_butler):
220 _LOG.info(
221 "Skipping already-successful quantum for label=%s dataId=%s.",
222 taskDef.label,
223 quantum.dataId,
224 )
225 return quantum
226 captureLog.store = True
228 try:
229 quantum = self.updatedQuantumInputs(quantum, taskDef, limited_butler)
230 except NoWorkFound as exc:
231 _LOG.info(
232 "Nothing to do for task '%s' on quantum %s; saving metadata and skipping: %s",
233 taskDef.label,
234 quantum.dataId,
235 str(exc),
236 )
237 # Make empty metadata that looks something like what a
238 # do-nothing task would write (but we don't bother with empty
239 # nested PropertySets for subtasks). This is slightly
240 # duplicative with logic in pipe_base that we can't easily call
241 # from here; we'll fix this on DM-29761.
242 logInfo(None, "end", metadata=quantumMetadata) # type: ignore[arg-type]
243 fullMetadata = _TASK_FULL_METADATA_TYPE()
244 fullMetadata[taskDef.label] = _TASK_METADATA_TYPE()
245 fullMetadata["quantum"] = quantumMetadata
246 self.writeMetadata(quantum, fullMetadata, taskDef, limited_butler)
247 return quantum
249 # enable lsstDebug debugging
250 if self.enableLsstDebug:
251 try:
252 _LOG.debug("Will try to import debug.py")
253 import debug # type: ignore # noqa:F401
254 except ImportError:
255 _LOG.warn("No 'debug' module found.")
257 # initialize global state
258 self.initGlobals(quantum)
260 # Ensure that we are executing a frozen config
261 taskDef.config.freeze()
262 logInfo(None, "init", metadata=quantumMetadata) # type: ignore[arg-type]
263 init_input_refs = []
264 for ref in quantum.initInputs.values():
265 resolved = self._resolve_ref(ref)
266 if resolved is None:
267 raise ValueError(f"Failed to resolve init input reference {ref}")
268 init_input_refs.append(resolved)
269 task = self.taskFactory.makeTask(taskDef, limited_butler, init_input_refs)
270 logInfo(None, "start", metadata=quantumMetadata) # type: ignore[arg-type]
271 try:
272 if self.mock:
273 # Use mock task instance to execute method.
274 runTask = self._makeMockTask(taskDef)
275 else:
276 runTask = task
277 self.runQuantum(runTask, quantum, taskDef, limited_butler)
278 except Exception as e:
279 _LOG.error(
280 "Execution of task '%s' on quantum %s failed. Exception %s: %s",
281 taskDef.label,
282 quantum.dataId,
283 e.__class__.__name__,
284 str(e),
285 )
286 raise
287 logInfo(None, "end", metadata=quantumMetadata) # type: ignore[arg-type]
288 fullMetadata = task.getFullMetadata()
289 fullMetadata["quantum"] = quantumMetadata
290 self.writeMetadata(quantum, fullMetadata, taskDef, limited_butler)
291 stopTime = time.time()
292 _LOG.info(
293 "Execution of task '%s' on quantum %s took %.3f seconds",
294 taskDef.label,
295 quantum.dataId,
296 stopTime - startTime,
297 )
298 return quantum
300 def _makeMockTask(self, taskDef: TaskDef) -> PipelineTask:
301 """Make an instance of mock task for given TaskDef."""
302 # Make config instance and apply overrides
303 overrides = ConfigOverrides()
304 for action in self.mock_configs:
305 if action.label == taskDef.label + "-mock":
306 if action.action == "config":
307 key, _, value = action.value.partition("=")
308 overrides.addValueOverride(key, value)
309 elif action.action == "configfile":
310 overrides.addFileOverride(os.path.expandvars(action.value))
311 else:
312 raise ValueError(f"Unexpected action for mock task config overrides: {action}")
313 config = MockPipelineTask.ConfigClass()
314 overrides.applyTo(config)
316 task = MockPipelineTask(config=config, name=taskDef.label)
317 return task
319 def checkExistingOutputs(self, quantum: Quantum, taskDef: TaskDef, limited_butler: LimitedButler) -> bool:
320 """Decide whether this quantum needs to be executed.
322 If only partial outputs exist then they are removed if
323 ``clobberOutputs`` is True, otherwise an exception is raised.
325 Parameters
326 ----------
327 quantum : `~lsst.daf.butler.Quantum`
328 Quantum to check for existing outputs
329 taskDef : `~lsst.pipe.base.TaskDef`
330 Task definition structure.
332 Returns
333 -------
334 exist : `bool`
335 `True` if ``self.skipExistingIn`` is defined, and a previous
336 execution of this quanta appears to have completed successfully
337 (either because metadata was written or all datasets were written).
338 `False` otherwise.
340 Raises
341 ------
342 RuntimeError
343 Raised if some outputs exist and some not.
344 """
345 if self.skipExistingIn and taskDef.metadataDatasetName is not None:
346 # Metadata output exists; this is sufficient to assume the previous
347 # run was successful and should be skipped.
348 [metadata_ref] = quantum.outputs[taskDef.metadataDatasetName]
349 ref = self._resolve_ref(metadata_ref, self.skipExistingIn)
350 if ref is not None:
351 if limited_butler.datastore.exists(ref):
352 return True
354 # Previously we always checked for existing outputs in `butler.run`,
355 # now logic gets more complicated as we only want to skip quantum
356 # whose outputs exist in `self.skipExistingIn` but pruning should only
357 # be done for outputs existing in `butler.run`.
359 def findOutputs(
360 collections: Optional[Union[str, list[str]]]
361 ) -> tuple[list[DatasetRef], list[DatasetRef]]:
362 """Find quantum outputs in specified collections."""
363 existingRefs = []
364 missingRefs = []
365 for datasetRefs in quantum.outputs.values():
366 checkRefs: list[DatasetRef] = []
367 registryRefToQuantumRef: dict[DatasetRef, DatasetRef] = {}
368 for datasetRef in datasetRefs:
369 ref = self._resolve_ref(datasetRef, collections)
370 if ref is None:
371 missingRefs.append(datasetRef)
372 else:
373 checkRefs.append(ref)
374 registryRefToQuantumRef[ref] = datasetRef
376 # More efficient to ask the datastore in bulk for ref
377 # existence rather than one at a time.
378 existence = limited_butler.datastore.mexists(checkRefs)
379 for ref, exists in existence.items():
380 if exists:
381 existingRefs.append(ref)
382 else:
383 missingRefs.append(registryRefToQuantumRef[ref])
384 return existingRefs, missingRefs
386 existingRefs, missingRefs = findOutputs(self.skipExistingIn)
387 if self.skipExistingIn:
388 if existingRefs and not missingRefs:
389 # everything is already there
390 return True
392 # If we are to re-run quantum then prune datasets that exists in
393 # output run collection, only if `self.clobberOutputs` is set,
394 # that only works when we have full butler.
395 if existingRefs and self.butler is not None:
396 existingRefs, missingRefs = findOutputs(self.butler.run)
397 if existingRefs and missingRefs:
398 _LOG.debug(
399 "Partial outputs exist for task %s dataId=%s collection=%s "
400 "existingRefs=%s missingRefs=%s",
401 taskDef,
402 quantum.dataId,
403 self.butler.run,
404 existingRefs,
405 missingRefs,
406 )
407 if self.clobberOutputs:
408 # only prune
409 _LOG.info("Removing partial outputs for task %s: %s", taskDef, existingRefs)
410 self.butler.pruneDatasets(existingRefs, disassociate=True, unstore=True, purge=True)
411 return False
412 else:
413 raise RuntimeError(
414 "Registry inconsistency while checking for existing outputs:"
415 f" collection={self.butler.run} existingRefs={existingRefs}"
416 f" missingRefs={missingRefs}"
417 )
419 # need to re-run
420 return False
422 def updatedQuantumInputs(
423 self, quantum: Quantum, taskDef: TaskDef, limited_butler: LimitedButler
424 ) -> Quantum:
425 """Update quantum with extra information, returns a new updated
426 Quantum.
428 Some methods may require input DatasetRefs to have non-None
429 ``dataset_id``, but in case of intermediate dataset it may not be
430 filled during QuantumGraph construction. This method will retrieve
431 missing info from registry.
433 Parameters
434 ----------
435 quantum : `~lsst.daf.butler.Quantum`
436 Single Quantum instance.
437 taskDef : `~lsst.pipe.base.TaskDef`
438 Task definition structure.
440 Returns
441 -------
442 update : `~lsst.daf.butler.Quantum`
443 Updated Quantum instance
444 """
445 anyChanges = False
446 updatedInputs: defaultdict[DatasetType, list] = defaultdict(list)
447 for key, refsForDatasetType in quantum.inputs.items():
448 newRefsForDatasetType = updatedInputs[key]
449 for ref in refsForDatasetType:
450 # Inputs may already be resolved even if they do not exist, but
451 # we have to re-resolve them because IDs are ignored on output.
452 # Check datastore for existence first to cover calibration
453 # dataset types, as they would need a timespan for findDataset.
454 resolvedRef: DatasetRef | None
455 checked_datastore = False
456 if ref.id is not None and limited_butler.datastore.exists(ref):
457 resolvedRef = ref
458 checked_datastore = True
459 elif self.butler is not None:
460 # In case of full butler try to (re-)resolve it.
461 resolvedRef = self._resolve_ref(ref)
462 if resolvedRef is None:
463 _LOG.info("No dataset found for %s", ref)
464 continue
465 else:
466 _LOG.debug("Updated dataset ID for %s", ref)
467 else:
468 # QBB with missing intermediate
469 _LOG.info("No dataset found for %s", ref)
470 continue
472 # In case of mock execution we check that mock dataset exists
473 # instead. Mock execution is only possible with full butler.
474 if self.mock and self.butler is not None:
475 try:
476 typeName, component = ref.datasetType.nameAndComponent()
477 if component is not None:
478 mockDatasetTypeName = MockButlerQuantumContext.mockDatasetTypeName(typeName)
479 else:
480 mockDatasetTypeName = MockButlerQuantumContext.mockDatasetTypeName(
481 ref.datasetType.name
482 )
484 mockDatasetType = self.butler.registry.getDatasetType(mockDatasetTypeName)
485 except KeyError:
486 # means that mock dataset type is not there and this
487 # should be a pre-existing dataset
488 _LOG.debug("No mock dataset type for %s", ref)
489 if self.butler.datastore.exists(resolvedRef):
490 newRefsForDatasetType.append(resolvedRef)
491 else:
492 mockRef = DatasetRef(mockDatasetType, ref.dataId)
493 resolvedMockRef = self.butler.registry.findDataset(
494 mockRef.datasetType, mockRef.dataId, collections=self.butler.collections
495 )
496 _LOG.debug("mockRef=%s resolvedMockRef=%s", mockRef, resolvedMockRef)
497 if resolvedMockRef is not None and self.butler.datastore.exists(resolvedMockRef):
498 _LOG.debug("resolvedMockRef dataset exists")
499 newRefsForDatasetType.append(resolvedRef)
500 elif checked_datastore or limited_butler.datastore.exists(resolvedRef):
501 # We need to ask datastore if the dataset actually exists
502 # because the Registry of a local "execution butler"
503 # cannot know this (because we prepopulate it with all of
504 # the datasets that might be created).
505 newRefsForDatasetType.append(resolvedRef)
507 if len(newRefsForDatasetType) != len(refsForDatasetType):
508 anyChanges = True
509 # If we removed any input datasets, let the task check if it has enough
510 # to proceed and/or prune related datasets that it also doesn't
511 # need/produce anymore. It will raise NoWorkFound if it can't run,
512 # which we'll let propagate up. This is exactly what we run during QG
513 # generation, because a task shouldn't care whether an input is missing
514 # because some previous task didn't produce it, or because it just
515 # wasn't there during QG generation.
516 namedUpdatedInputs = NamedKeyDict[DatasetType, list[DatasetRef]](updatedInputs.items())
517 helper = AdjustQuantumHelper(namedUpdatedInputs, quantum.outputs)
518 if anyChanges:
519 assert quantum.dataId is not None, "Quantum DataId cannot be None"
520 helper.adjust_in_place(taskDef.connections, label=taskDef.label, data_id=quantum.dataId)
521 return Quantum(
522 taskName=quantum.taskName,
523 taskClass=quantum.taskClass,
524 dataId=quantum.dataId,
525 initInputs=quantum.initInputs,
526 inputs=helper.inputs,
527 outputs=helper.outputs,
528 )
530 def runQuantum(
531 self, task: PipelineTask, quantum: Quantum, taskDef: TaskDef, limited_butler: LimitedButler
532 ) -> None:
533 """Execute task on a single quantum.
535 Parameters
536 ----------
537 task : `~lsst.pipe.base.PipelineTask`
538 Task object.
539 quantum : `~lsst.daf.butler.Quantum`
540 Single Quantum instance.
541 taskDef : `~lsst.pipe.base.TaskDef`
542 Task definition structure.
543 """
544 # Create a butler that operates in the context of a quantum
545 if self.butler is None:
546 butlerQC = ButlerQuantumContext.from_limited(limited_butler, quantum)
547 else:
548 if self.mock:
549 butlerQC = MockButlerQuantumContext(self.butler, quantum)
550 else:
551 butlerQC = ButlerQuantumContext.from_full(self.butler, quantum)
553 # Get the input and output references for the task
554 inputRefs, outputRefs = taskDef.connections.buildDatasetRefs(quantum)
556 # Call task runQuantum() method. Catch a few known failure modes and
557 # translate them into specific
558 try:
559 task.runQuantum(butlerQC, inputRefs, outputRefs)
560 except NoWorkFound as err:
561 # Not an error, just an early exit.
562 _LOG.info("Task '%s' on quantum %s exited early: %s", taskDef.label, quantum.dataId, str(err))
563 pass
564 except RepeatableQuantumError as err:
565 if self.exitOnKnownError:
566 _LOG.warning("Caught repeatable quantum error for %s (%s):", taskDef, quantum.dataId)
567 _LOG.warning(err, exc_info=True)
568 sys.exit(err.EXIT_CODE)
569 else:
570 raise
571 except InvalidQuantumError as err:
572 _LOG.fatal("Invalid quantum error for %s (%s): %s", taskDef, quantum.dataId)
573 _LOG.fatal(err, exc_info=True)
574 sys.exit(err.EXIT_CODE)
576 def writeMetadata(
577 self, quantum: Quantum, metadata: Any, taskDef: TaskDef, limited_butler: LimitedButler
578 ) -> None:
579 if taskDef.metadataDatasetName is not None:
580 # DatasetRef has to be in the Quantum outputs, can lookup by name
581 try:
582 [ref] = quantum.outputs[taskDef.metadataDatasetName]
583 except LookupError as exc:
584 raise InvalidQuantumError(
585 f"Quantum outputs is missing metadata dataset type {taskDef.metadataDatasetName};"
586 " this could happen due to inconsistent options between QuantumGraph generation"
587 " and execution"
588 ) from exc
589 if self.butler is not None:
590 # Dataset ref can already be resolved, for non-QBB executor we
591 # have to ignore that because may be overriding run
592 # collection.
593 if ref.id is not None:
594 ref = ref.unresolved()
595 self.butler.put(metadata, ref)
596 else:
597 limited_butler.putDirect(metadata, ref)
599 def initGlobals(self, quantum: Quantum) -> None:
600 """Initialize global state needed for task execution.
602 Parameters
603 ----------
604 quantum : `~lsst.daf.butler.Quantum`
605 Single Quantum instance.
607 Notes
608 -----
609 There is an issue with initializing filters singleton which is done
610 by instrument, to avoid requiring tasks to do it in runQuantum()
611 we do it here when any dataId has an instrument dimension. Also for
612 now we only allow single instrument, verify that all instrument
613 names in all dataIds are identical.
615 This will need revision when filter singleton disappears.
616 """
617 # can only work for full butler
618 if self.butler is None:
619 return
620 oneInstrument = None
621 for datasetRefs in chain(quantum.inputs.values(), quantum.outputs.values()):
622 for datasetRef in datasetRefs:
623 dataId = datasetRef.dataId
624 instrument = dataId.get("instrument")
625 if instrument is not None:
626 if oneInstrument is not None:
627 assert ( # type: ignore
628 instrument == oneInstrument
629 ), "Currently require that only one instrument is used per graph"
630 else:
631 oneInstrument = instrument
632 Instrument.fromName(instrument, self.butler.registry)
634 def getReport(self) -> Optional[QuantumReport]:
635 # Docstring inherited from base class
636 if self.report is None:
637 raise RuntimeError("getReport() called before execute()")
638 return self.report