Coverage for python/lsst/ctrl/mpexec/singleQuantumExecutor.py: 8%
261 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-01-18 10:02 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2023-01-18 10:02 +0000
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22__all__ = ["SingleQuantumExecutor"]
24# -------------------------------
25# Imports of standard modules --
26# -------------------------------
27import logging
28import os
29import sys
30import time
31from collections import defaultdict
32from itertools import chain
33from typing import Any, Optional, Union
35from lsst.daf.butler import (
36 Butler,
37 Config,
38 DatasetRef,
39 DatasetType,
40 DimensionUniverse,
41 LimitedButler,
42 NamedKeyDict,
43 Quantum,
44 QuantumBackedButler,
45)
46from lsst.pipe.base import (
47 AdjustQuantumHelper,
48 ButlerQuantumContext,
49 Instrument,
50 InvalidQuantumError,
51 NoWorkFound,
52 PipelineTask,
53 RepeatableQuantumError,
54 TaskDef,
55 TaskFactory,
56)
57from lsst.pipe.base.configOverrides import ConfigOverrides
59# During metadata transition phase, determine metadata class by
60# asking pipe_base
61from lsst.pipe.base.task import _TASK_FULL_METADATA_TYPE, _TASK_METADATA_TYPE
62from lsst.utils.timer import logInfo
64# -----------------------------
65# Imports for other modules --
66# -----------------------------
67from .cli.utils import _PipelineAction
68from .log_capture import LogCapture
69from .mock_task import MockButlerQuantumContext, MockPipelineTask
70from .quantumGraphExecutor import QuantumExecutor
71from .reports import QuantumReport
73# ----------------------------------
74# Local non-exported definitions --
75# ----------------------------------
77_LOG = logging.getLogger(__name__)
80class SingleQuantumExecutor(QuantumExecutor):
81 """Executor class which runs one Quantum at a time.
83 Parameters
84 ----------
85 butler : `~lsst.daf.butler.Butler` or `None`
86 Data butler, `None` means that Quantum-backed butler should be used
87 instead.
88 taskFactory : `~lsst.pipe.base.TaskFactory`
89 Instance of a task factory.
90 skipExistingIn : `list` [ `str` ], optional
91 Accepts list of collections, if all Quantum outputs already exist in
92 the specified list of collections then that Quantum will not be rerun.
93 clobberOutputs : `bool`, optional
94 If `True`, then existing outputs in output run collection will be
95 overwritten. If ``skipExistingIn`` is defined, only outputs from
96 failed quanta will be overwritten. Only used when ``butler`` is not
97 `None`.
98 enableLsstDebug : `bool`, optional
99 Enable debugging with ``lsstDebug`` facility for a task.
100 exitOnKnownError : `bool`, optional
101 If `True`, call `sys.exit` with the appropriate exit code for special
102 known exceptions, after printing a traceback, instead of letting the
103 exception propagate up to calling. This is always the behavior for
104 InvalidQuantumError.
105 mock : `bool`, optional
106 If `True` then mock task execution.
107 mock_configs : `list` [ `_PipelineAction` ], optional
108 Optional config overrides for mock tasks.
109 """
111 def __init__(
112 self,
113 butler: Butler | None,
114 taskFactory: TaskFactory,
115 skipExistingIn: list[str] | None = None,
116 clobberOutputs: bool = False,
117 enableLsstDebug: bool = False,
118 exitOnKnownError: bool = False,
119 mock: bool = False,
120 mock_configs: list[_PipelineAction] | None = None,
121 butler_config: Config | str | None = None,
122 universe: DimensionUniverse | None = None,
123 ):
124 self.butler = butler
125 self.taskFactory = taskFactory
126 self.skipExistingIn = skipExistingIn
127 self.enableLsstDebug = enableLsstDebug
128 self.clobberOutputs = clobberOutputs
129 self.exitOnKnownError = exitOnKnownError
130 self.mock = mock
131 self.mock_configs = mock_configs if mock_configs is not None else []
132 self.butler_config = butler_config
133 self.universe = universe
134 self.report: Optional[QuantumReport] = None
136 if self.butler is None:
137 assert not self.mock, "Mock execution only possible with full butler"
138 if self.butler is not None:
139 assert butler_config is None and universe is None
140 if self.butler is None:
141 assert butler_config is not None and universe is not None
143 def execute(self, taskDef: TaskDef, quantum: Quantum) -> Quantum:
144 # Docstring inherited from QuantumExecutor.execute
145 assert quantum.dataId is not None, "Quantum DataId cannot be None"
147 if self.butler is not None:
148 self.butler.registry.refresh()
150 # Catch any exception and make a report based on that.
151 try:
152 result = self._execute(taskDef, quantum)
153 self.report = QuantumReport(dataId=quantum.dataId, taskLabel=taskDef.label)
154 return result
155 except Exception as exc:
156 self.report = QuantumReport.from_exception(
157 exception=exc,
158 dataId=quantum.dataId,
159 taskLabel=taskDef.label,
160 )
161 raise
163 def _resolve_ref(self, ref: DatasetRef, collections: Any = None) -> DatasetRef | None:
164 """Return resolved reference.
166 Parameters
167 ----------
168 ref : `DatasetRef`
169 Input reference, can be either resolved or unresolved.
170 collections :
171 Collections to search for the existing reference, only used when
172 running with full butler.
174 Notes
175 -----
176 When running with Quantum-backed butler it assumes that reference is
177 already resolved and returns input references without any checks. When
178 running with full butler, it always searches registry fof a reference
179 in specified collections, even if reference is already resolved.
180 """
181 if self.butler is not None:
182 # If running with full butler, need to re-resolve it in case
183 # collections are different.
184 ref = ref.unresolved()
185 return self.butler.registry.findDataset(ref.datasetType, ref.dataId, collections=collections)
186 else:
187 # In case of QBB all refs must be resolved already, do not check.
188 return ref
190 def _execute(self, taskDef: TaskDef, quantum: Quantum) -> Quantum:
191 """Internal implementation of execute()"""
192 startTime = time.time()
194 # Make butler instance
195 limited_butler: LimitedButler
196 if self.butler is not None:
197 limited_butler = self.butler
198 else:
199 assert self.butler_config is not None and self.universe is not None
200 limited_butler = QuantumBackedButler.initialize(
201 config=self.butler_config,
202 quantum=quantum,
203 dimensions=self.universe,
204 )
206 if self.butler is not None:
207 log_capture = LogCapture.from_full(self.butler)
208 else:
209 log_capture = LogCapture.from_limited(limited_butler)
210 with log_capture.capture_logging(taskDef, quantum) as captureLog:
212 # Save detailed resource usage before task start to metadata.
213 quantumMetadata = _TASK_METADATA_TYPE()
214 logInfo(None, "prep", metadata=quantumMetadata) # type: ignore[arg-type]
216 # check whether to skip or delete old outputs, if it returns True
217 # or raises an exception do not try to store logs, as they may be
218 # already in butler.
219 captureLog.store = False
220 if self.checkExistingOutputs(quantum, taskDef, limited_butler):
221 _LOG.info(
222 "Skipping already-successful quantum for label=%s dataId=%s.",
223 taskDef.label,
224 quantum.dataId,
225 )
226 return quantum
227 captureLog.store = True
229 try:
230 quantum = self.updatedQuantumInputs(quantum, taskDef, limited_butler)
231 except NoWorkFound as exc:
232 _LOG.info(
233 "Nothing to do for task '%s' on quantum %s; saving metadata and skipping: %s",
234 taskDef.label,
235 quantum.dataId,
236 str(exc),
237 )
238 # Make empty metadata that looks something like what a
239 # do-nothing task would write (but we don't bother with empty
240 # nested PropertySets for subtasks). This is slightly
241 # duplicative with logic in pipe_base that we can't easily call
242 # from here; we'll fix this on DM-29761.
243 logInfo(None, "end", metadata=quantumMetadata) # type: ignore[arg-type]
244 fullMetadata = _TASK_FULL_METADATA_TYPE()
245 fullMetadata[taskDef.label] = _TASK_METADATA_TYPE()
246 fullMetadata["quantum"] = quantumMetadata
247 self.writeMetadata(quantum, fullMetadata, taskDef, limited_butler)
248 return quantum
250 # enable lsstDebug debugging
251 if self.enableLsstDebug:
252 try:
253 _LOG.debug("Will try to import debug.py")
254 import debug # type: ignore # noqa:F401
255 except ImportError:
256 _LOG.warn("No 'debug' module found.")
258 # initialize global state
259 self.initGlobals(quantum)
261 # Ensure that we are executing a frozen config
262 taskDef.config.freeze()
263 logInfo(None, "init", metadata=quantumMetadata) # type: ignore[arg-type]
264 init_input_refs = []
265 for ref in quantum.initInputs.values():
266 resolved = self._resolve_ref(ref)
267 if resolved is None:
268 raise ValueError(f"Failed to resolve init input reference {ref}")
269 init_input_refs.append(resolved)
270 task = self.taskFactory.makeTask(taskDef, limited_butler, init_input_refs)
271 logInfo(None, "start", metadata=quantumMetadata) # type: ignore[arg-type]
272 try:
273 if self.mock:
274 # Use mock task instance to execute method.
275 runTask = self._makeMockTask(taskDef)
276 else:
277 runTask = task
278 self.runQuantum(runTask, quantum, taskDef, limited_butler)
279 except Exception as e:
280 _LOG.error(
281 "Execution of task '%s' on quantum %s failed. Exception %s: %s",
282 taskDef.label,
283 quantum.dataId,
284 e.__class__.__name__,
285 str(e),
286 )
287 raise
288 logInfo(None, "end", metadata=quantumMetadata) # type: ignore[arg-type]
289 fullMetadata = task.getFullMetadata()
290 fullMetadata["quantum"] = quantumMetadata
291 self.writeMetadata(quantum, fullMetadata, taskDef, limited_butler)
292 stopTime = time.time()
293 _LOG.info(
294 "Execution of task '%s' on quantum %s took %.3f seconds",
295 taskDef.label,
296 quantum.dataId,
297 stopTime - startTime,
298 )
299 return quantum
301 def _makeMockTask(self, taskDef: TaskDef) -> PipelineTask:
302 """Make an instance of mock task for given TaskDef."""
303 # Make config instance and apply overrides
304 overrides = ConfigOverrides()
305 for action in self.mock_configs:
306 if action.label == taskDef.label + "-mock":
307 if action.action == "config":
308 key, _, value = action.value.partition("=")
309 overrides.addValueOverride(key, value)
310 elif action.action == "configfile":
311 overrides.addFileOverride(os.path.expandvars(action.value))
312 else:
313 raise ValueError(f"Unexpected action for mock task config overrides: {action}")
314 config = MockPipelineTask.ConfigClass()
315 overrides.applyTo(config)
317 task = MockPipelineTask(config=config, name=taskDef.label)
318 return task
320 def checkExistingOutputs(self, quantum: Quantum, taskDef: TaskDef, limited_butler: LimitedButler) -> bool:
321 """Decide whether this quantum needs to be executed.
323 If only partial outputs exist then they are removed if
324 ``clobberOutputs`` is True, otherwise an exception is raised.
326 Parameters
327 ----------
328 quantum : `~lsst.daf.butler.Quantum`
329 Quantum to check for existing outputs
330 taskDef : `~lsst.pipe.base.TaskDef`
331 Task definition structure.
333 Returns
334 -------
335 exist : `bool`
336 `True` if ``self.skipExistingIn`` is defined, and a previous
337 execution of this quanta appears to have completed successfully
338 (either because metadata was written or all datasets were written).
339 `False` otherwise.
341 Raises
342 ------
343 RuntimeError
344 Raised if some outputs exist and some not.
345 """
346 if self.skipExistingIn and taskDef.metadataDatasetName is not None:
347 # Metadata output exists; this is sufficient to assume the previous
348 # run was successful and should be skipped.
349 [metadata_ref] = quantum.outputs[taskDef.metadataDatasetName]
350 ref = self._resolve_ref(metadata_ref, self.skipExistingIn)
351 if ref is not None:
352 if limited_butler.datastore.exists(ref):
353 return True
355 # Previously we always checked for existing outputs in `butler.run`,
356 # now logic gets more complicated as we only want to skip quantum
357 # whose outputs exist in `self.skipExistingIn` but pruning should only
358 # be done for outputs existing in `butler.run`.
360 def findOutputs(
361 collections: Optional[Union[str, list[str]]]
362 ) -> tuple[list[DatasetRef], list[DatasetRef]]:
363 """Find quantum outputs in specified collections."""
364 existingRefs = []
365 missingRefs = []
366 for datasetRefs in quantum.outputs.values():
367 checkRefs: list[DatasetRef] = []
368 registryRefToQuantumRef: dict[DatasetRef, DatasetRef] = {}
369 for datasetRef in datasetRefs:
370 ref = self._resolve_ref(datasetRef, collections)
371 if ref is None:
372 missingRefs.append(datasetRef)
373 else:
374 checkRefs.append(ref)
375 registryRefToQuantumRef[ref] = datasetRef
377 # More efficient to ask the datastore in bulk for ref
378 # existence rather than one at a time.
379 existence = limited_butler.datastore.mexists(checkRefs)
380 for ref, exists in existence.items():
381 if exists:
382 existingRefs.append(ref)
383 else:
384 missingRefs.append(registryRefToQuantumRef[ref])
385 return existingRefs, missingRefs
387 existingRefs, missingRefs = findOutputs(self.skipExistingIn)
388 if self.skipExistingIn:
389 if existingRefs and not missingRefs:
390 # everything is already there
391 return True
393 # If we are to re-run quantum then prune datasets that exists in
394 # output run collection, only if `self.clobberOutputs` is set,
395 # that only works when we have full butler.
396 if existingRefs and self.butler is not None:
397 existingRefs, missingRefs = findOutputs(self.butler.run)
398 if existingRefs and missingRefs:
399 _LOG.debug(
400 "Partial outputs exist for task %s dataId=%s collection=%s "
401 "existingRefs=%s missingRefs=%s",
402 taskDef,
403 quantum.dataId,
404 self.butler.run,
405 existingRefs,
406 missingRefs,
407 )
408 if self.clobberOutputs:
409 # only prune
410 _LOG.info("Removing partial outputs for task %s: %s", taskDef, existingRefs)
411 self.butler.pruneDatasets(existingRefs, disassociate=True, unstore=True, purge=True)
412 return False
413 else:
414 raise RuntimeError(
415 f"Registry inconsistency while checking for existing outputs:"
416 f" collection={self.butler.run} existingRefs={existingRefs}"
417 f" missingRefs={missingRefs}"
418 )
420 # need to re-run
421 return False
423 def updatedQuantumInputs(
424 self, quantum: Quantum, taskDef: TaskDef, limited_butler: LimitedButler
425 ) -> Quantum:
426 """Update quantum with extra information, returns a new updated
427 Quantum.
429 Some methods may require input DatasetRefs to have non-None
430 ``dataset_id``, but in case of intermediate dataset it may not be
431 filled during QuantumGraph construction. This method will retrieve
432 missing info from registry.
434 Parameters
435 ----------
436 quantum : `~lsst.daf.butler.Quantum`
437 Single Quantum instance.
438 taskDef : `~lsst.pipe.base.TaskDef`
439 Task definition structure.
441 Returns
442 -------
443 update : `~lsst.daf.butler.Quantum`
444 Updated Quantum instance
445 """
446 anyChanges = False
447 updatedInputs: defaultdict[DatasetType, list] = defaultdict(list)
448 for key, refsForDatasetType in quantum.inputs.items():
449 newRefsForDatasetType = updatedInputs[key]
450 for ref in refsForDatasetType:
452 # Inputs may already be resolved even if they do not exist, but
453 # we have to re-resolve them because IDs are ignored on output.
454 # Check datastore for existence first to cover calibration
455 # dataset types, as they would need a timespan for findDataset.
456 resolvedRef: DatasetRef | None
457 checked_datastore = False
458 if ref.id is not None and limited_butler.datastore.exists(ref):
459 resolvedRef = ref
460 checked_datastore = True
461 elif self.butler is not None:
462 # In case of full butler try to (re-)resolve it.
463 resolvedRef = self._resolve_ref(ref)
464 if resolvedRef is None:
465 _LOG.info("No dataset found for %s", ref)
466 continue
467 else:
468 _LOG.debug("Updated dataset ID for %s", ref)
469 else:
470 # QBB with missing intermediate
471 _LOG.info("No dataset found for %s", ref)
472 continue
474 # In case of mock execution we check that mock dataset exists
475 # instead. Mock execution is only possible with full butler.
476 if self.mock and self.butler is not None:
477 try:
478 typeName, component = ref.datasetType.nameAndComponent()
479 if component is not None:
480 mockDatasetTypeName = MockButlerQuantumContext.mockDatasetTypeName(typeName)
481 else:
482 mockDatasetTypeName = MockButlerQuantumContext.mockDatasetTypeName(
483 ref.datasetType.name
484 )
486 mockDatasetType = self.butler.registry.getDatasetType(mockDatasetTypeName)
487 except KeyError:
488 # means that mock dataset type is not there and this
489 # should be a pre-existing dataset
490 _LOG.debug("No mock dataset type for %s", ref)
491 if self.butler.datastore.exists(resolvedRef):
492 newRefsForDatasetType.append(resolvedRef)
493 else:
494 mockRef = DatasetRef(mockDatasetType, ref.dataId)
495 resolvedMockRef = self.butler.registry.findDataset(
496 mockRef.datasetType, mockRef.dataId, collections=self.butler.collections
497 )
498 _LOG.debug("mockRef=%s resolvedMockRef=%s", mockRef, resolvedMockRef)
499 if resolvedMockRef is not None and self.butler.datastore.exists(resolvedMockRef):
500 _LOG.debug("resolvedMockRef dataset exists")
501 newRefsForDatasetType.append(resolvedRef)
502 elif checked_datastore or limited_butler.datastore.exists(resolvedRef):
503 # We need to ask datastore if the dataset actually exists
504 # because the Registry of a local "execution butler"
505 # cannot know this (because we prepopulate it with all of
506 # the datasets that might be created).
507 newRefsForDatasetType.append(resolvedRef)
509 if len(newRefsForDatasetType) != len(refsForDatasetType):
510 anyChanges = True
511 # If we removed any input datasets, let the task check if it has enough
512 # to proceed and/or prune related datasets that it also doesn't
513 # need/produce anymore. It will raise NoWorkFound if it can't run,
514 # which we'll let propagate up. This is exactly what we run during QG
515 # generation, because a task shouldn't care whether an input is missing
516 # because some previous task didn't produce it, or because it just
517 # wasn't there during QG generation.
518 namedUpdatedInputs = NamedKeyDict[DatasetType, list[DatasetRef]](updatedInputs.items())
519 helper = AdjustQuantumHelper(namedUpdatedInputs, quantum.outputs)
520 if anyChanges:
521 assert quantum.dataId is not None, "Quantum DataId cannot be None"
522 helper.adjust_in_place(taskDef.connections, label=taskDef.label, data_id=quantum.dataId)
523 return Quantum(
524 taskName=quantum.taskName,
525 taskClass=quantum.taskClass,
526 dataId=quantum.dataId,
527 initInputs=quantum.initInputs,
528 inputs=helper.inputs,
529 outputs=helper.outputs,
530 )
532 def runQuantum(
533 self, task: PipelineTask, quantum: Quantum, taskDef: TaskDef, limited_butler: LimitedButler
534 ) -> None:
535 """Execute task on a single quantum.
537 Parameters
538 ----------
539 task : `~lsst.pipe.base.PipelineTask`
540 Task object.
541 quantum : `~lsst.daf.butler.Quantum`
542 Single Quantum instance.
543 taskDef : `~lsst.pipe.base.TaskDef`
544 Task definition structure.
545 """
546 # Create a butler that operates in the context of a quantum
547 if self.butler is None:
548 butlerQC = ButlerQuantumContext.from_limited(limited_butler, quantum)
549 else:
550 if self.mock:
551 butlerQC = MockButlerQuantumContext(self.butler, quantum)
552 else:
553 butlerQC = ButlerQuantumContext.from_full(self.butler, quantum)
555 # Get the input and output references for the task
556 inputRefs, outputRefs = taskDef.connections.buildDatasetRefs(quantum)
558 # Call task runQuantum() method. Catch a few known failure modes and
559 # translate them into specific
560 try:
561 task.runQuantum(butlerQC, inputRefs, outputRefs)
562 except NoWorkFound as err:
563 # Not an error, just an early exit.
564 _LOG.info("Task '%s' on quantum %s exited early: %s", taskDef.label, quantum.dataId, str(err))
565 pass
566 except RepeatableQuantumError as err:
567 if self.exitOnKnownError:
568 _LOG.warning("Caught repeatable quantum error for %s (%s):", taskDef, quantum.dataId)
569 _LOG.warning(err, exc_info=True)
570 sys.exit(err.EXIT_CODE)
571 else:
572 raise
573 except InvalidQuantumError as err:
574 _LOG.fatal("Invalid quantum error for %s (%s): %s", taskDef, quantum.dataId)
575 _LOG.fatal(err, exc_info=True)
576 sys.exit(err.EXIT_CODE)
578 def writeMetadata(
579 self, quantum: Quantum, metadata: Any, taskDef: TaskDef, limited_butler: LimitedButler
580 ) -> None:
581 if taskDef.metadataDatasetName is not None:
582 # DatasetRef has to be in the Quantum outputs, can lookup by name
583 try:
584 [ref] = quantum.outputs[taskDef.metadataDatasetName]
585 except LookupError as exc:
586 raise InvalidQuantumError(
587 f"Quantum outputs is missing metadata dataset type {taskDef.metadataDatasetName};"
588 f" this could happen due to inconsistent options between QuantumGraph generation"
589 f" and execution"
590 ) from exc
591 if self.butler is not None:
592 # Dataset ref can already be resolved, for non-QBB executor we
593 # have to ignore that because may be overriding run
594 # collection.
595 if ref.id is not None:
596 ref = ref.unresolved()
597 self.butler.put(metadata, ref)
598 else:
599 limited_butler.putDirect(metadata, ref)
601 def initGlobals(self, quantum: Quantum) -> None:
602 """Initialize global state needed for task execution.
604 Parameters
605 ----------
606 quantum : `~lsst.daf.butler.Quantum`
607 Single Quantum instance.
609 Notes
610 -----
611 There is an issue with initializing filters singleton which is done
612 by instrument, to avoid requiring tasks to do it in runQuantum()
613 we do it here when any dataId has an instrument dimension. Also for
614 now we only allow single instrument, verify that all instrument
615 names in all dataIds are identical.
617 This will need revision when filter singleton disappears.
618 """
619 # can only work for full butler
620 if self.butler is None:
621 return
622 oneInstrument = None
623 for datasetRefs in chain(quantum.inputs.values(), quantum.outputs.values()):
624 for datasetRef in datasetRefs:
625 dataId = datasetRef.dataId
626 instrument = dataId.get("instrument")
627 if instrument is not None:
628 if oneInstrument is not None:
629 assert ( # type: ignore
630 instrument == oneInstrument
631 ), "Currently require that only one instrument is used per graph"
632 else:
633 oneInstrument = instrument
634 Instrument.fromName(instrument, self.butler.registry)
636 def getReport(self) -> Optional[QuantumReport]:
637 # Docstring inherited from base class
638 if self.report is None:
639 raise RuntimeError("getReport() called before execute()")
640 return self.report