Coverage for python/lsst/ctrl/mpexec/singleQuantumExecutor.py: 10%
236 statements
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-18 09:18 +0000
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-18 09:18 +0000
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22__all__ = ["SingleQuantumExecutor"]
24# -------------------------------
25# Imports of standard modules --
26# -------------------------------
27import logging
28import os
29import sys
30import time
31from collections import defaultdict
32from collections.abc import Callable
33from itertools import chain
34from typing import Any, Optional
36from lsst.daf.butler import (
37 Butler,
38 CollectionType,
39 DatasetRef,
40 DatasetType,
41 LimitedButler,
42 NamedKeyDict,
43 Quantum,
44)
45from lsst.daf.butler.registry.wildcards import CollectionWildcard
46from lsst.pipe.base import (
47 AdjustQuantumHelper,
48 ButlerQuantumContext,
49 Instrument,
50 InvalidQuantumError,
51 NoWorkFound,
52 PipelineTask,
53 RepeatableQuantumError,
54 TaskDef,
55 TaskFactory,
56)
57from lsst.pipe.base.configOverrides import ConfigOverrides
59# During metadata transition phase, determine metadata class by
60# asking pipe_base
61from lsst.pipe.base.task import _TASK_FULL_METADATA_TYPE, _TASK_METADATA_TYPE
62from lsst.utils.timer import logInfo
64# -----------------------------
65# Imports for other modules --
66# -----------------------------
67from .cli.utils import _PipelineAction
68from .log_capture import LogCapture
69from .mock_task import MockButlerQuantumContext, MockPipelineTask
70from .quantumGraphExecutor import QuantumExecutor
71from .reports import QuantumReport
73# ----------------------------------
74# Local non-exported definitions --
75# ----------------------------------
77_LOG = logging.getLogger(__name__)
80class SingleQuantumExecutor(QuantumExecutor):
81 """Executor class which runs one Quantum at a time.
83 Parameters
84 ----------
85 butler : `~lsst.daf.butler.Butler` or `None`
86 Data butler, `None` means that Quantum-backed butler should be used
87 instead.
88 taskFactory : `~lsst.pipe.base.TaskFactory`
89 Instance of a task factory.
90 skipExistingIn
91 Expressions representing the collections to search for existing
92 output datasets. See :ref:`daf_butler_ordered_collection_searches`
93 for allowed types. This class only checks for the presence of butler
94 output run in the list of collections. If the output run is present
95 in the list then the quanta whose complete outputs exist in the output
96 run will be skipped. `None` or empty string/sequence disables skipping.
97 clobberOutputs : `bool`, optional
98 If `True`, then outputs from a quantum that exist in output run
99 collection will be removed prior to executing a quantum. If
100 ``skipExistingIn`` contains output run, then only partial outputs from
101 a quantum will be removed. Only used when ``butler`` is not `None`.
102 enableLsstDebug : `bool`, optional
103 Enable debugging with ``lsstDebug`` facility for a task.
104 exitOnKnownError : `bool`, optional
105 If `True`, call `sys.exit` with the appropriate exit code for special
106 known exceptions, after printing a traceback, instead of letting the
107 exception propagate up to calling. This is always the behavior for
108 InvalidQuantumError.
109 mock : `bool`, optional
110 If `True` then mock task execution.
111 mock_configs : `list` [ `_PipelineAction` ], optional
112 Optional config overrides for mock tasks.
113 limited_butler_factory : `Callable`, optional
114 A method that creates a `~lsst.daf.butler.LimitedButler` instance
115 for a given Quantum. This parameter must be defined if ``butler`` is
116 `None`. If ``butler`` is not `None` then this parameter is ignored.
117 """
119 def __init__(
120 self,
121 butler: Butler | None,
122 taskFactory: TaskFactory,
123 skipExistingIn: Any = None,
124 clobberOutputs: bool = False,
125 enableLsstDebug: bool = False,
126 exitOnKnownError: bool = False,
127 mock: bool = False,
128 mock_configs: list[_PipelineAction] | None = None,
129 limited_butler_factory: Callable[[Quantum], LimitedButler] | None = None,
130 ):
131 self.butler = butler
132 self.taskFactory = taskFactory
133 self.enableLsstDebug = enableLsstDebug
134 self.clobberOutputs = clobberOutputs
135 self.exitOnKnownError = exitOnKnownError
136 self.mock = mock
137 self.mock_configs = mock_configs if mock_configs is not None else []
138 self.limited_butler_factory = limited_butler_factory
139 self.report: Optional[QuantumReport] = None
141 if self.butler is None:
142 assert not self.mock, "Mock execution only possible with full butler"
143 assert limited_butler_factory is not None, "limited_butler_factory is needed when butler is None"
145 # Find whether output run is in skipExistingIn.
146 # TODO: This duplicates logic in GraphBuilder, would be nice to have
147 # better abstraction for this some day.
148 self.skipExisting = False
149 if self.butler is not None and skipExistingIn:
150 skip_collections_wildcard = CollectionWildcard.from_expression(skipExistingIn)
151 # As optimization check in the explicit list of names first
152 self.skipExisting = self.butler.run in skip_collections_wildcard.strings
153 if not self.skipExisting:
154 # need to flatten it and check again
155 self.skipExisting = self.butler.run in self.butler.registry.queryCollections(
156 skipExistingIn,
157 collectionTypes=CollectionType.RUN,
158 )
160 def execute(self, taskDef: TaskDef, quantum: Quantum) -> Quantum:
161 # Docstring inherited from QuantumExecutor.execute
162 assert quantum.dataId is not None, "Quantum DataId cannot be None"
164 if self.butler is not None:
165 self.butler.registry.refresh()
167 # Catch any exception and make a report based on that.
168 try:
169 result = self._execute(taskDef, quantum)
170 self.report = QuantumReport(dataId=quantum.dataId, taskLabel=taskDef.label)
171 return result
172 except Exception as exc:
173 self.report = QuantumReport.from_exception(
174 exception=exc,
175 dataId=quantum.dataId,
176 taskLabel=taskDef.label,
177 )
178 raise
180 def _execute(self, taskDef: TaskDef, quantum: Quantum) -> Quantum:
181 """Internal implementation of execute()"""
182 startTime = time.time()
184 # Make a limited butler instance if needed (which should be QBB if full
185 # butler is not defined).
186 limited_butler: LimitedButler
187 if self.butler is not None:
188 limited_butler = self.butler
189 else:
190 # We check this in constructor, but mypy needs this check here.
191 assert self.limited_butler_factory is not None
192 limited_butler = self.limited_butler_factory(quantum)
194 if self.butler is not None:
195 log_capture = LogCapture.from_full(self.butler)
196 else:
197 log_capture = LogCapture.from_limited(limited_butler)
198 with log_capture.capture_logging(taskDef, quantum) as captureLog:
199 # Save detailed resource usage before task start to metadata.
200 quantumMetadata = _TASK_METADATA_TYPE()
201 logInfo(None, "prep", metadata=quantumMetadata) # type: ignore[arg-type]
203 # check whether to skip or delete old outputs, if it returns True
204 # or raises an exception do not try to store logs, as they may be
205 # already in butler.
206 captureLog.store = False
207 if self.checkExistingOutputs(quantum, taskDef, limited_butler):
208 _LOG.info(
209 "Skipping already-successful quantum for label=%s dataId=%s.",
210 taskDef.label,
211 quantum.dataId,
212 )
213 return quantum
214 captureLog.store = True
216 try:
217 quantum = self.updatedQuantumInputs(quantum, taskDef, limited_butler)
218 except NoWorkFound as exc:
219 _LOG.info(
220 "Nothing to do for task '%s' on quantum %s; saving metadata and skipping: %s",
221 taskDef.label,
222 quantum.dataId,
223 str(exc),
224 )
225 # Make empty metadata that looks something like what a
226 # do-nothing task would write (but we don't bother with empty
227 # nested PropertySets for subtasks). This is slightly
228 # duplicative with logic in pipe_base that we can't easily call
229 # from here; we'll fix this on DM-29761.
230 logInfo(None, "end", metadata=quantumMetadata) # type: ignore[arg-type]
231 fullMetadata = _TASK_FULL_METADATA_TYPE()
232 fullMetadata[taskDef.label] = _TASK_METADATA_TYPE()
233 fullMetadata["quantum"] = quantumMetadata
234 self.writeMetadata(quantum, fullMetadata, taskDef, limited_butler)
235 return quantum
237 # enable lsstDebug debugging
238 if self.enableLsstDebug:
239 try:
240 _LOG.debug("Will try to import debug.py")
241 import debug # type: ignore # noqa:F401
242 except ImportError:
243 _LOG.warn("No 'debug' module found.")
245 # initialize global state
246 self.initGlobals(quantum)
248 # Ensure that we are executing a frozen config
249 taskDef.config.freeze()
250 logInfo(None, "init", metadata=quantumMetadata) # type: ignore[arg-type]
251 init_input_refs = list(quantum.initInputs.values())
252 task = self.taskFactory.makeTask(taskDef, limited_butler, init_input_refs)
253 logInfo(None, "start", metadata=quantumMetadata) # type: ignore[arg-type]
254 try:
255 if self.mock:
256 # Use mock task instance to execute method.
257 runTask = self._makeMockTask(taskDef)
258 else:
259 runTask = task
260 self.runQuantum(runTask, quantum, taskDef, limited_butler)
261 except Exception as e:
262 _LOG.error(
263 "Execution of task '%s' on quantum %s failed. Exception %s: %s",
264 taskDef.label,
265 quantum.dataId,
266 e.__class__.__name__,
267 str(e),
268 )
269 raise
270 logInfo(None, "end", metadata=quantumMetadata) # type: ignore[arg-type]
271 fullMetadata = task.getFullMetadata()
272 fullMetadata["quantum"] = quantumMetadata
273 self.writeMetadata(quantum, fullMetadata, taskDef, limited_butler)
274 stopTime = time.time()
275 _LOG.info(
276 "Execution of task '%s' on quantum %s took %.3f seconds",
277 taskDef.label,
278 quantum.dataId,
279 stopTime - startTime,
280 )
281 return quantum
283 def _makeMockTask(self, taskDef: TaskDef) -> PipelineTask:
284 """Make an instance of mock task for given TaskDef."""
285 # Make config instance and apply overrides
286 overrides = ConfigOverrides()
287 for action in self.mock_configs:
288 if action.label == taskDef.label + "-mock":
289 if action.action == "config":
290 key, _, value = action.value.partition("=")
291 overrides.addValueOverride(key, value)
292 elif action.action == "configfile":
293 overrides.addFileOverride(os.path.expandvars(action.value))
294 else:
295 raise ValueError(f"Unexpected action for mock task config overrides: {action}")
296 config = MockPipelineTask.ConfigClass()
297 overrides.applyTo(config)
299 task = MockPipelineTask(config=config, name=taskDef.label)
300 return task
302 def checkExistingOutputs(self, quantum: Quantum, taskDef: TaskDef, limited_butler: LimitedButler) -> bool:
303 """Decide whether this quantum needs to be executed.
305 If only partial outputs exist then they are removed if
306 ``clobberOutputs`` is True, otherwise an exception is raised.
308 Parameters
309 ----------
310 quantum : `~lsst.daf.butler.Quantum`
311 Quantum to check for existing outputs
312 taskDef : `~lsst.pipe.base.TaskDef`
313 Task definition structure.
315 Returns
316 -------
317 exist : `bool`
318 `True` if ``self.skipExisting`` is defined, and a previous
319 execution of this quanta appears to have completed successfully
320 (either because metadata was written or all datasets were written).
321 `False` otherwise.
323 Raises
324 ------
325 RuntimeError
326 Raised if some outputs exist and some not.
327 """
328 if not self.butler:
329 # Skip/prune logic only works for full butler.
330 return False
332 if self.skipExisting and taskDef.metadataDatasetName is not None:
333 # Metadata output exists; this is sufficient to assume the previous
334 # run was successful and should be skipped.
335 [metadata_ref] = quantum.outputs[taskDef.metadataDatasetName]
336 if metadata_ref is not None:
337 if limited_butler.datastore.exists(metadata_ref):
338 return True
340 # Find and prune (partial) outputs if `self.clobberOutputs` is set.
341 ref_dict = self.butler.datastore.mexists(chain.from_iterable(quantum.outputs.values()))
342 existingRefs = [ref for ref, exists in ref_dict.items() if exists]
343 missingRefs = [ref for ref, exists in ref_dict.items() if not exists]
344 if existingRefs:
345 if not missingRefs:
346 # Full outputs exist.
347 if self.skipExisting:
348 return True
349 elif self.clobberOutputs:
350 _LOG.info("Removing complete outputs for quantum %s: %s", quantum, existingRefs)
351 self.butler.pruneDatasets(existingRefs, disassociate=True, unstore=True, purge=True)
352 else:
353 raise RuntimeError(
354 f"Complete outputs exists for a quantum {quantum} "
355 "and neither clobberOutputs nor skipExisting is set: "
356 f"collection={self.butler.run} existingRefs={existingRefs}"
357 )
358 else:
359 # Partial outputs from a failed quantum.
360 _LOG.debug(
361 "Partial outputs exist for quantum %s collection=%s existingRefs=%s missingRefs=%s",
362 quantum,
363 self.butler.run,
364 existingRefs,
365 missingRefs,
366 )
367 if self.clobberOutputs:
368 # only prune
369 _LOG.info("Removing partial outputs for task %s: %s", taskDef, existingRefs)
370 self.butler.pruneDatasets(existingRefs, disassociate=True, unstore=True, purge=True)
371 return False
372 else:
373 raise RuntimeError(
374 "Registry inconsistency while checking for existing quantum outputs:"
375 f" quantum={quantum} collection={self.butler.run} existingRefs={existingRefs}"
376 f" missingRefs={missingRefs}"
377 )
379 # By default always execute.
380 return False
382 def updatedQuantumInputs(
383 self, quantum: Quantum, taskDef: TaskDef, limited_butler: LimitedButler
384 ) -> Quantum:
385 """Update quantum with extra information, returns a new updated
386 Quantum.
388 Some methods may require input DatasetRefs to have non-None
389 ``dataset_id``, but in case of intermediate dataset it may not be
390 filled during QuantumGraph construction. This method will retrieve
391 missing info from registry.
393 Parameters
394 ----------
395 quantum : `~lsst.daf.butler.Quantum`
396 Single Quantum instance.
397 taskDef : `~lsst.pipe.base.TaskDef`
398 Task definition structure.
400 Returns
401 -------
402 update : `~lsst.daf.butler.Quantum`
403 Updated Quantum instance
404 """
405 anyChanges = False
406 updatedInputs: defaultdict[DatasetType, list] = defaultdict(list)
407 for key, refsForDatasetType in quantum.inputs.items():
408 newRefsForDatasetType = updatedInputs[key]
409 for ref in refsForDatasetType:
410 # Inputs may already be resolved even if they do not exist, but
411 # we have to re-resolve them because IDs are ignored on output.
412 # Check datastore for existence first to cover calibration
413 # dataset types, as they would need a timespan for findDataset.
414 resolvedRef: DatasetRef | None
415 checked_datastore = False
416 if limited_butler.datastore.exists(ref):
417 resolvedRef = ref
418 checked_datastore = True
419 elif self.butler is not None:
420 # This branch is for mock execution only which does not
421 # generate actual outputs, only adds datasets to registry.
422 resolvedRef = self.butler.registry.findDataset(ref.datasetType, ref.dataId)
423 if resolvedRef is None:
424 _LOG.info("No dataset found for %s", ref)
425 continue
426 else:
427 _LOG.debug("Updated dataset ID for %s", ref)
428 else:
429 # QBB with missing intermediate
430 _LOG.info("No dataset found for %s", ref)
431 continue
433 # In case of mock execution we check that mock dataset exists
434 # instead. Mock execution is only possible with full butler.
435 if self.mock and self.butler is not None:
436 try:
437 typeName, component = ref.datasetType.nameAndComponent()
438 if component is not None:
439 mockDatasetTypeName = MockButlerQuantumContext.mockDatasetTypeName(typeName)
440 else:
441 mockDatasetTypeName = MockButlerQuantumContext.mockDatasetTypeName(
442 ref.datasetType.name
443 )
445 mockDatasetType = self.butler.registry.getDatasetType(mockDatasetTypeName)
446 except KeyError:
447 # means that mock dataset type is not there and this
448 # should be a pre-existing dataset
449 _LOG.debug("No mock dataset type for %s", ref)
450 if self.butler.datastore.exists(resolvedRef):
451 newRefsForDatasetType.append(resolvedRef)
452 else:
453 resolvedMockRef = self.butler.registry.findDataset(
454 mockDatasetType, ref.dataId, collections=self.butler.collections
455 )
456 _LOG.debug(
457 "mockRef=(%s, %s) resolvedMockRef=%s",
458 mockDatasetType,
459 ref.dataId,
460 resolvedMockRef,
461 )
462 if resolvedMockRef is not None and self.butler.datastore.exists(resolvedMockRef):
463 _LOG.debug("resolvedMockRef dataset exists")
464 newRefsForDatasetType.append(resolvedRef)
465 elif checked_datastore or limited_butler.datastore.exists(resolvedRef):
466 # We need to ask datastore if the dataset actually exists
467 # because the Registry of a local "execution butler"
468 # cannot know this (because we prepopulate it with all of
469 # the datasets that might be created).
470 newRefsForDatasetType.append(resolvedRef)
472 if len(newRefsForDatasetType) != len(refsForDatasetType):
473 anyChanges = True
474 # If we removed any input datasets, let the task check if it has enough
475 # to proceed and/or prune related datasets that it also doesn't
476 # need/produce anymore. It will raise NoWorkFound if it can't run,
477 # which we'll let propagate up. This is exactly what we run during QG
478 # generation, because a task shouldn't care whether an input is missing
479 # because some previous task didn't produce it, or because it just
480 # wasn't there during QG generation.
481 namedUpdatedInputs = NamedKeyDict[DatasetType, list[DatasetRef]](updatedInputs.items())
482 helper = AdjustQuantumHelper(namedUpdatedInputs, quantum.outputs)
483 if anyChanges:
484 assert quantum.dataId is not None, "Quantum DataId cannot be None"
485 helper.adjust_in_place(taskDef.connections, label=taskDef.label, data_id=quantum.dataId)
486 return Quantum(
487 taskName=quantum.taskName,
488 taskClass=quantum.taskClass,
489 dataId=quantum.dataId,
490 initInputs=quantum.initInputs,
491 inputs=helper.inputs,
492 outputs=helper.outputs,
493 )
495 def runQuantum(
496 self, task: PipelineTask, quantum: Quantum, taskDef: TaskDef, limited_butler: LimitedButler
497 ) -> None:
498 """Execute task on a single quantum.
500 Parameters
501 ----------
502 task : `~lsst.pipe.base.PipelineTask`
503 Task object.
504 quantum : `~lsst.daf.butler.Quantum`
505 Single Quantum instance.
506 taskDef : `~lsst.pipe.base.TaskDef`
507 Task definition structure.
508 """
509 # Create a butler that operates in the context of a quantum
510 if self.butler is None:
511 butlerQC = ButlerQuantumContext.from_limited(limited_butler, quantum)
512 else:
513 if self.mock:
514 butlerQC = MockButlerQuantumContext(self.butler, quantum)
515 else:
516 butlerQC = ButlerQuantumContext.from_full(self.butler, quantum)
518 # Get the input and output references for the task
519 inputRefs, outputRefs = taskDef.connections.buildDatasetRefs(quantum)
521 # Call task runQuantum() method. Catch a few known failure modes and
522 # translate them into specific
523 try:
524 task.runQuantum(butlerQC, inputRefs, outputRefs)
525 except NoWorkFound as err:
526 # Not an error, just an early exit.
527 _LOG.info("Task '%s' on quantum %s exited early: %s", taskDef.label, quantum.dataId, str(err))
528 pass
529 except RepeatableQuantumError as err:
530 if self.exitOnKnownError:
531 _LOG.warning("Caught repeatable quantum error for %s (%s):", taskDef, quantum.dataId)
532 _LOG.warning(err, exc_info=True)
533 sys.exit(err.EXIT_CODE)
534 else:
535 raise
536 except InvalidQuantumError as err:
537 _LOG.fatal("Invalid quantum error for %s (%s): %s", taskDef, quantum.dataId)
538 _LOG.fatal(err, exc_info=True)
539 sys.exit(err.EXIT_CODE)
541 def writeMetadata(
542 self, quantum: Quantum, metadata: Any, taskDef: TaskDef, limited_butler: LimitedButler
543 ) -> None:
544 if taskDef.metadataDatasetName is not None:
545 # DatasetRef has to be in the Quantum outputs, can lookup by name
546 try:
547 [ref] = quantum.outputs[taskDef.metadataDatasetName]
548 except LookupError as exc:
549 raise InvalidQuantumError(
550 f"Quantum outputs is missing metadata dataset type {taskDef.metadataDatasetName};"
551 " this could happen due to inconsistent options between QuantumGraph generation"
552 " and execution"
553 ) from exc
554 limited_butler.put(metadata, ref)
556 def initGlobals(self, quantum: Quantum) -> None:
557 """Initialize global state needed for task execution.
559 Parameters
560 ----------
561 quantum : `~lsst.daf.butler.Quantum`
562 Single Quantum instance.
564 Notes
565 -----
566 There is an issue with initializing filters singleton which is done
567 by instrument, to avoid requiring tasks to do it in runQuantum()
568 we do it here when any dataId has an instrument dimension. Also for
569 now we only allow single instrument, verify that all instrument
570 names in all dataIds are identical.
572 This will need revision when filter singleton disappears.
573 """
574 # can only work for full butler
575 if self.butler is None:
576 return
577 oneInstrument = None
578 for datasetRefs in chain(quantum.inputs.values(), quantum.outputs.values()):
579 for datasetRef in datasetRefs:
580 dataId = datasetRef.dataId
581 instrument = dataId.get("instrument")
582 if instrument is not None:
583 if oneInstrument is not None:
584 assert ( # type: ignore
585 instrument == oneInstrument
586 ), "Currently require that only one instrument is used per graph"
587 else:
588 oneInstrument = instrument
589 Instrument.fromName(instrument, self.butler.registry)
591 def getReport(self) -> Optional[QuantumReport]:
592 # Docstring inherited from base class
593 if self.report is None:
594 raise RuntimeError("getReport() called before execute()")
595 return self.report