Coverage for python/lsst/ctrl/mpexec/singleQuantumExecutor.py: 10%
233 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-06-08 09:17 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2023-06-08 09:17 +0000
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22__all__ = ["SingleQuantumExecutor"]
24# -------------------------------
25# Imports of standard modules --
26# -------------------------------
27import logging
28import os
29import sys
30import time
31from collections import defaultdict
32from collections.abc import Callable
33from itertools import chain
34from typing import Any, Optional
36from lsst.daf.butler import (
37 Butler,
38 CollectionType,
39 DatasetRef,
40 DatasetType,
41 LimitedButler,
42 NamedKeyDict,
43 Quantum,
44)
45from lsst.daf.butler.registry.wildcards import CollectionWildcard
46from lsst.pipe.base import (
47 AdjustQuantumHelper,
48 ButlerQuantumContext,
49 Instrument,
50 InvalidQuantumError,
51 NoWorkFound,
52 PipelineTask,
53 RepeatableQuantumError,
54 TaskDef,
55 TaskFactory,
56)
57from lsst.pipe.base.configOverrides import ConfigOverrides
59# During metadata transition phase, determine metadata class by
60# asking pipe_base
61from lsst.pipe.base.task import _TASK_FULL_METADATA_TYPE, _TASK_METADATA_TYPE
62from lsst.utils.timer import logInfo
64# -----------------------------
65# Imports for other modules --
66# -----------------------------
67from .cli.utils import _PipelineAction
68from .log_capture import LogCapture
69from .mock_task import MockButlerQuantumContext, MockPipelineTask
70from .quantumGraphExecutor import QuantumExecutor
71from .reports import QuantumReport
73# ----------------------------------
74# Local non-exported definitions --
75# ----------------------------------
77_LOG = logging.getLogger(__name__)
80class SingleQuantumExecutor(QuantumExecutor):
81 """Executor class which runs one Quantum at a time.
83 Parameters
84 ----------
85 butler : `~lsst.daf.butler.Butler` or `None`
86 Data butler, `None` means that Quantum-backed butler should be used
87 instead.
88 taskFactory : `~lsst.pipe.base.TaskFactory`
89 Instance of a task factory.
90 skipExistingIn
91 Expressions representing the collections to search for existing
92 output datasets. See :ref:`daf_butler_ordered_collection_searches`
93 for allowed types. This class only checks for the presence of butler
94 output run in the list of collections. If the output run is present
95 in the list then the quanta whose complete outputs exist in the output
96 run will be skipped. `None` or empty string/sequence disables skipping.
97 clobberOutputs : `bool`, optional
98 If `True`, then outputs from a quantum that exist in output run
99 collection will be removed prior to executing a quantum. If
100 ``skipExistingIn`` contains output run, then only partial outputs from
101 a quantum will be removed. Only used when ``butler`` is not `None`.
102 enableLsstDebug : `bool`, optional
103 Enable debugging with ``lsstDebug`` facility for a task.
104 exitOnKnownError : `bool`, optional
105 If `True`, call `sys.exit` with the appropriate exit code for special
106 known exceptions, after printing a traceback, instead of letting the
107 exception propagate up to calling. This is always the behavior for
108 InvalidQuantumError.
109 mock : `bool`, optional
110 If `True` then mock task execution.
111 mock_configs : `list` [ `_PipelineAction` ], optional
112 Optional config overrides for mock tasks.
113 limited_butler_factory : `Callable`, optional
114 A method that creates a `~lsst.daf.butler.LimitedButler` instance
115 for a given Quantum. This parameter must be defined if ``butler`` is
116 `None`. If ``butler`` is not `None` then this parameter is ignored.
117 """
119 def __init__(
120 self,
121 butler: Butler | None,
122 taskFactory: TaskFactory,
123 skipExistingIn: Any = None,
124 clobberOutputs: bool = False,
125 enableLsstDebug: bool = False,
126 exitOnKnownError: bool = False,
127 mock: bool = False,
128 mock_configs: list[_PipelineAction] | None = None,
129 limited_butler_factory: Callable[[Quantum], LimitedButler] | None = None,
130 ):
131 self.butler = butler
132 self.taskFactory = taskFactory
133 self.enableLsstDebug = enableLsstDebug
134 self.clobberOutputs = clobberOutputs
135 self.exitOnKnownError = exitOnKnownError
136 self.mock = mock
137 self.mock_configs = mock_configs if mock_configs is not None else []
138 self.limited_butler_factory = limited_butler_factory
139 self.report: Optional[QuantumReport] = None
141 if self.butler is None:
142 assert not self.mock, "Mock execution only possible with full butler"
143 assert limited_butler_factory is not None, "limited_butler_factory is needed when butler is None"
145 # Find whether output run is in skipExistingIn.
146 # TODO: This duplicates logic in GraphBuilder, would be nice to have
147 # better abstraction for this some day.
148 self.skipExisting = False
149 if self.butler is not None and skipExistingIn:
150 skip_collections_wildcard = CollectionWildcard.from_expression(skipExistingIn)
151 # As optimization check in the explicit list of names first
152 self.skipExisting = self.butler.run in skip_collections_wildcard.strings
153 if not self.skipExisting:
154 # need to flatten it and check again
155 self.skipExisting = self.butler.run in self.butler.registry.queryCollections(
156 skipExistingIn,
157 collectionTypes=CollectionType.RUN,
158 )
160 def execute(self, taskDef: TaskDef, quantum: Quantum) -> Quantum:
161 # Docstring inherited from QuantumExecutor.execute
162 assert quantum.dataId is not None, "Quantum DataId cannot be None"
164 if self.butler is not None:
165 self.butler.registry.refresh()
167 # Catch any exception and make a report based on that.
168 try:
169 result = self._execute(taskDef, quantum)
170 self.report = QuantumReport(dataId=quantum.dataId, taskLabel=taskDef.label)
171 return result
172 except Exception as exc:
173 self.report = QuantumReport.from_exception(
174 exception=exc,
175 dataId=quantum.dataId,
176 taskLabel=taskDef.label,
177 )
178 raise
180 def _execute(self, taskDef: TaskDef, quantum: Quantum) -> Quantum:
181 """Internal implementation of execute()"""
182 startTime = time.time()
184 # Make a limited butler instance if needed (which should be QBB if full
185 # butler is not defined).
186 limited_butler: LimitedButler
187 if self.butler is not None:
188 limited_butler = self.butler
189 else:
190 # We check this in constructor, but mypy needs this check here.
191 assert self.limited_butler_factory is not None
192 limited_butler = self.limited_butler_factory(quantum)
194 if self.butler is not None:
195 log_capture = LogCapture.from_full(self.butler)
196 else:
197 log_capture = LogCapture.from_limited(limited_butler)
198 with log_capture.capture_logging(taskDef, quantum) as captureLog:
199 # Save detailed resource usage before task start to metadata.
200 quantumMetadata = _TASK_METADATA_TYPE()
201 logInfo(None, "prep", metadata=quantumMetadata) # type: ignore[arg-type]
203 # check whether to skip or delete old outputs, if it returns True
204 # or raises an exception do not try to store logs, as they may be
205 # already in butler.
206 captureLog.store = False
207 if self.checkExistingOutputs(quantum, taskDef, limited_butler):
208 _LOG.info(
209 "Skipping already-successful quantum for label=%s dataId=%s.",
210 taskDef.label,
211 quantum.dataId,
212 )
213 return quantum
214 captureLog.store = True
216 try:
217 quantum = self.updatedQuantumInputs(quantum, taskDef, limited_butler)
218 except NoWorkFound as exc:
219 _LOG.info(
220 "Nothing to do for task '%s' on quantum %s; saving metadata and skipping: %s",
221 taskDef.label,
222 quantum.dataId,
223 str(exc),
224 )
225 # Make empty metadata that looks something like what a
226 # do-nothing task would write (but we don't bother with empty
227 # nested PropertySets for subtasks). This is slightly
228 # duplicative with logic in pipe_base that we can't easily call
229 # from here; we'll fix this on DM-29761.
230 logInfo(None, "end", metadata=quantumMetadata) # type: ignore[arg-type]
231 fullMetadata = _TASK_FULL_METADATA_TYPE()
232 fullMetadata[taskDef.label] = _TASK_METADATA_TYPE()
233 fullMetadata["quantum"] = quantumMetadata
234 self.writeMetadata(quantum, fullMetadata, taskDef, limited_butler)
235 return quantum
237 # enable lsstDebug debugging
238 if self.enableLsstDebug:
239 try:
240 _LOG.debug("Will try to import debug.py")
241 import debug # type: ignore # noqa:F401
242 except ImportError:
243 _LOG.warn("No 'debug' module found.")
245 # initialize global state
246 self.initGlobals(quantum)
248 # Ensure that we are executing a frozen config
249 taskDef.config.freeze()
250 logInfo(None, "init", metadata=quantumMetadata) # type: ignore[arg-type]
251 init_input_refs = list(quantum.initInputs.values())
252 task = self.taskFactory.makeTask(taskDef, limited_butler, init_input_refs)
253 logInfo(None, "start", metadata=quantumMetadata) # type: ignore[arg-type]
254 try:
255 if self.mock:
256 # Use mock task instance to execute method.
257 runTask = self._makeMockTask(taskDef)
258 else:
259 runTask = task
260 self.runQuantum(runTask, quantum, taskDef, limited_butler)
261 except Exception as e:
262 _LOG.error(
263 "Execution of task '%s' on quantum %s failed. Exception %s: %s",
264 taskDef.label,
265 quantum.dataId,
266 e.__class__.__name__,
267 str(e),
268 )
269 raise
270 logInfo(None, "end", metadata=quantumMetadata) # type: ignore[arg-type]
271 fullMetadata = task.getFullMetadata()
272 fullMetadata["quantum"] = quantumMetadata
273 self.writeMetadata(quantum, fullMetadata, taskDef, limited_butler)
274 stopTime = time.time()
275 _LOG.info(
276 "Execution of task '%s' on quantum %s took %.3f seconds",
277 taskDef.label,
278 quantum.dataId,
279 stopTime - startTime,
280 )
281 return quantum
283 def _makeMockTask(self, taskDef: TaskDef) -> PipelineTask:
284 """Make an instance of mock task for given TaskDef."""
285 # Make config instance and apply overrides
286 overrides = ConfigOverrides()
287 for action in self.mock_configs:
288 if action.label == taskDef.label + "-mock":
289 if action.action == "config":
290 key, _, value = action.value.partition("=")
291 overrides.addValueOverride(key, value)
292 elif action.action == "configfile":
293 overrides.addFileOverride(os.path.expandvars(action.value))
294 else:
295 raise ValueError(f"Unexpected action for mock task config overrides: {action}")
296 config = MockPipelineTask.ConfigClass()
297 overrides.applyTo(config)
299 task = MockPipelineTask(config=config, name=taskDef.label)
300 return task
302 def checkExistingOutputs(self, quantum: Quantum, taskDef: TaskDef, limited_butler: LimitedButler) -> bool:
303 """Decide whether this quantum needs to be executed.
305 If only partial outputs exist then they are removed if
306 ``clobberOutputs`` is True, otherwise an exception is raised.
308 Parameters
309 ----------
310 quantum : `~lsst.daf.butler.Quantum`
311 Quantum to check for existing outputs
312 taskDef : `~lsst.pipe.base.TaskDef`
313 Task definition structure.
315 Returns
316 -------
317 exist : `bool`
318 `True` if ``self.skipExisting`` is defined, and a previous
319 execution of this quanta appears to have completed successfully
320 (either because metadata was written or all datasets were written).
321 `False` otherwise.
323 Raises
324 ------
325 RuntimeError
326 Raised if some outputs exist and some not.
327 """
328 if not self.butler:
329 # Skip/prune logic only works for full butler.
330 return False
332 if self.skipExisting:
333 # Metadata output exists; this is sufficient to assume the previous
334 # run was successful and should be skipped.
335 [metadata_ref] = quantum.outputs[taskDef.metadataDatasetName]
336 if metadata_ref is not None:
337 if limited_butler.stored(metadata_ref):
338 return True
340 # Find and prune (partial) outputs if `self.clobberOutputs` is set.
341 ref_dict = self.butler.stored_many(chain.from_iterable(quantum.outputs.values()))
342 existingRefs = [ref for ref, exists in ref_dict.items() if exists]
343 missingRefs = [ref for ref, exists in ref_dict.items() if not exists]
344 if existingRefs:
345 if not missingRefs:
346 # Full outputs exist.
347 if self.skipExisting:
348 return True
349 elif self.clobberOutputs:
350 _LOG.info("Removing complete outputs for quantum %s: %s", quantum, existingRefs)
351 self.butler.pruneDatasets(existingRefs, disassociate=True, unstore=True, purge=True)
352 else:
353 raise RuntimeError(
354 f"Complete outputs exists for a quantum {quantum} "
355 "and neither clobberOutputs nor skipExisting is set: "
356 f"collection={self.butler.run} existingRefs={existingRefs}"
357 )
358 else:
359 # Partial outputs from a failed quantum.
360 _LOG.debug(
361 "Partial outputs exist for quantum %s collection=%s existingRefs=%s missingRefs=%s",
362 quantum,
363 self.butler.run,
364 existingRefs,
365 missingRefs,
366 )
367 if self.clobberOutputs:
368 # only prune
369 _LOG.info("Removing partial outputs for task %s: %s", taskDef, existingRefs)
370 self.butler.pruneDatasets(existingRefs, disassociate=True, unstore=True, purge=True)
371 return False
372 else:
373 raise RuntimeError(
374 "Registry inconsistency while checking for existing quantum outputs:"
375 f" quantum={quantum} collection={self.butler.run} existingRefs={existingRefs}"
376 f" missingRefs={missingRefs}"
377 )
379 # By default always execute.
380 return False
382 def updatedQuantumInputs(
383 self, quantum: Quantum, taskDef: TaskDef, limited_butler: LimitedButler
384 ) -> Quantum:
385 """Update quantum with extra information, returns a new updated
386 Quantum.
388 Some methods may require input DatasetRefs to have non-None
389 ``dataset_id``, but in case of intermediate dataset it may not be
390 filled during QuantumGraph construction. This method will retrieve
391 missing info from registry.
393 Parameters
394 ----------
395 quantum : `~lsst.daf.butler.Quantum`
396 Single Quantum instance.
397 taskDef : `~lsst.pipe.base.TaskDef`
398 Task definition structure.
400 Returns
401 -------
402 update : `~lsst.daf.butler.Quantum`
403 Updated Quantum instance
404 """
405 anyChanges = False
406 updatedInputs: defaultdict[DatasetType, list] = defaultdict(list)
407 for key, refsForDatasetType in quantum.inputs.items():
408 newRefsForDatasetType = updatedInputs[key]
409 stored = limited_butler.stored_many(refsForDatasetType)
410 for ref in refsForDatasetType:
411 # Inputs may already be resolved even if they do not exist, but
412 # we have to re-resolve them because IDs are ignored on output.
413 # Check datastore for existence first to cover calibration
414 # dataset types, as they would need a timespan for findDataset.
415 resolvedRef: DatasetRef | None
416 if stored[ref]:
417 resolvedRef = ref
418 elif self.butler is not None:
419 # This branch is for mock execution only which does not
420 # generate actual outputs, only adds datasets to registry.
421 resolvedRef = self.butler.registry.findDataset(ref.datasetType, ref.dataId)
422 if resolvedRef is None:
423 _LOG.info("No dataset found for %s", ref)
424 continue
425 else:
426 _LOG.debug("Updated dataset ID for %s", ref)
427 else:
428 # QBB with missing intermediate
429 _LOG.info("No dataset found for %s", ref)
430 continue
432 # In case of mock execution we check that mock dataset exists
433 # instead. Mock execution is only possible with full butler.
434 if self.mock and self.butler is not None:
435 try:
436 typeName, component = ref.datasetType.nameAndComponent()
437 if component is not None:
438 mockDatasetTypeName = MockButlerQuantumContext.mockDatasetTypeName(typeName)
439 else:
440 mockDatasetTypeName = MockButlerQuantumContext.mockDatasetTypeName(
441 ref.datasetType.name
442 )
444 mockDatasetType = self.butler.registry.getDatasetType(mockDatasetTypeName)
445 except KeyError:
446 # means that mock dataset type is not there and this
447 # should be a pre-existing dataset
448 _LOG.debug("No mock dataset type for %s", ref)
449 # Only check datastore if we have not already checked
450 # it for this ref.
451 if (ref_stored := stored.get(resolvedRef)) or (
452 ref_stored is None and self.butler.stored(resolvedRef)
453 ):
454 newRefsForDatasetType.append(resolvedRef)
455 else:
456 resolvedMockRef = self.butler.registry.findDataset(
457 mockDatasetType, ref.dataId, collections=self.butler.collections
458 )
459 _LOG.debug(
460 "mockRef=(%s, %s) resolvedMockRef=%s",
461 mockDatasetType,
462 ref.dataId,
463 resolvedMockRef,
464 )
465 if resolvedMockRef is not None and self.butler.stored(resolvedMockRef):
466 _LOG.debug("resolvedMockRef dataset exists")
467 newRefsForDatasetType.append(resolvedRef)
468 elif (ref_stored := stored.get(resolvedRef)) or (
469 ref_stored is None and limited_butler.stored(resolvedRef)
470 ):
471 # We need to ask datastore if the dataset actually exists
472 # because the Registry of a local "execution butler"
473 # cannot know this (because we prepopulate it with all of
474 # the datasets that might be created). Either we have
475 # already checked and know the answer, or the resolved
476 # ref differed from the original and we have to ask
477 # explicitly for that.
478 newRefsForDatasetType.append(resolvedRef)
480 if len(newRefsForDatasetType) != len(refsForDatasetType):
481 anyChanges = True
482 # If we removed any input datasets, let the task check if it has enough
483 # to proceed and/or prune related datasets that it also doesn't
484 # need/produce anymore. It will raise NoWorkFound if it can't run,
485 # which we'll let propagate up. This is exactly what we run during QG
486 # generation, because a task shouldn't care whether an input is missing
487 # because some previous task didn't produce it, or because it just
488 # wasn't there during QG generation.
489 namedUpdatedInputs = NamedKeyDict[DatasetType, list[DatasetRef]](updatedInputs.items())
490 helper = AdjustQuantumHelper(namedUpdatedInputs, quantum.outputs)
491 if anyChanges:
492 assert quantum.dataId is not None, "Quantum DataId cannot be None"
493 helper.adjust_in_place(taskDef.connections, label=taskDef.label, data_id=quantum.dataId)
494 return Quantum(
495 taskName=quantum.taskName,
496 taskClass=quantum.taskClass,
497 dataId=quantum.dataId,
498 initInputs=quantum.initInputs,
499 inputs=helper.inputs,
500 outputs=helper.outputs,
501 )
503 def runQuantum(
504 self, task: PipelineTask, quantum: Quantum, taskDef: TaskDef, limited_butler: LimitedButler
505 ) -> None:
506 """Execute task on a single quantum.
508 Parameters
509 ----------
510 task : `~lsst.pipe.base.PipelineTask`
511 Task object.
512 quantum : `~lsst.daf.butler.Quantum`
513 Single Quantum instance.
514 taskDef : `~lsst.pipe.base.TaskDef`
515 Task definition structure.
516 """
517 # Create a butler that operates in the context of a quantum
518 if not self.mock:
519 butlerQC = ButlerQuantumContext(limited_butler, quantum)
520 else:
521 assert self.butler is not None, "Full Butler instance requred for mock execution"
522 butlerQC = MockButlerQuantumContext(self.butler, quantum)
524 # Get the input and output references for the task
525 inputRefs, outputRefs = taskDef.connections.buildDatasetRefs(quantum)
527 # Call task runQuantum() method. Catch a few known failure modes and
528 # translate them into specific
529 try:
530 task.runQuantum(butlerQC, inputRefs, outputRefs)
531 except NoWorkFound as err:
532 # Not an error, just an early exit.
533 _LOG.info("Task '%s' on quantum %s exited early: %s", taskDef.label, quantum.dataId, str(err))
534 pass
535 except RepeatableQuantumError as err:
536 if self.exitOnKnownError:
537 _LOG.warning("Caught repeatable quantum error for %s (%s):", taskDef, quantum.dataId)
538 _LOG.warning(err, exc_info=True)
539 sys.exit(err.EXIT_CODE)
540 else:
541 raise
542 except InvalidQuantumError as err:
543 _LOG.fatal("Invalid quantum error for %s (%s): %s", taskDef, quantum.dataId)
544 _LOG.fatal(err, exc_info=True)
545 sys.exit(err.EXIT_CODE)
547 def writeMetadata(
548 self, quantum: Quantum, metadata: Any, taskDef: TaskDef, limited_butler: LimitedButler
549 ) -> None:
550 # DatasetRef has to be in the Quantum outputs, can lookup by name
551 try:
552 [ref] = quantum.outputs[taskDef.metadataDatasetName]
553 except LookupError as exc:
554 raise InvalidQuantumError(
555 f"Quantum outputs is missing metadata dataset type {taskDef.metadataDatasetName};"
556 " this could happen due to inconsistent options between QuantumGraph generation"
557 " and execution"
558 ) from exc
559 limited_butler.put(metadata, ref)
561 def initGlobals(self, quantum: Quantum) -> None:
562 """Initialize global state needed for task execution.
564 Parameters
565 ----------
566 quantum : `~lsst.daf.butler.Quantum`
567 Single Quantum instance.
569 Notes
570 -----
571 There is an issue with initializing filters singleton which is done
572 by instrument, to avoid requiring tasks to do it in runQuantum()
573 we do it here when any dataId has an instrument dimension. Also for
574 now we only allow single instrument, verify that all instrument
575 names in all dataIds are identical.
577 This will need revision when filter singleton disappears.
578 """
579 # can only work for full butler
580 if self.butler is None:
581 return
582 oneInstrument = None
583 for datasetRefs in chain(quantum.inputs.values(), quantum.outputs.values()):
584 for datasetRef in datasetRefs:
585 dataId = datasetRef.dataId
586 instrument = dataId.get("instrument")
587 if instrument is not None:
588 if oneInstrument is not None:
589 assert ( # type: ignore
590 instrument == oneInstrument
591 ), "Currently require that only one instrument is used per graph"
592 else:
593 oneInstrument = instrument
594 Instrument.fromName(instrument, self.butler.registry)
596 def getReport(self) -> Optional[QuantumReport]:
597 # Docstring inherited from base class
598 if self.report is None:
599 raise RuntimeError("getReport() called before execute()")
600 return self.report