Coverage for python/lsst/ctrl/mpexec/singleQuantumExecutor.py: 10%
189 statements
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-13 11:01 +0000
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-13 11:01 +0000
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28__all__ = ["SingleQuantumExecutor"]
30# -------------------------------
31# Imports of standard modules --
32# -------------------------------
33import logging
34import sys
35import time
36from collections import defaultdict
37from collections.abc import Callable
38from itertools import chain
39from typing import Any, cast
41from lsst.daf.butler import (
42 Butler,
43 CollectionType,
44 DatasetRef,
45 DatasetType,
46 LimitedButler,
47 NamedKeyDict,
48 Quantum,
49)
50from lsst.daf.butler.registry.wildcards import CollectionWildcard
51from lsst.pipe.base import (
52 AdjustQuantumHelper,
53 ExecutionResources,
54 Instrument,
55 InvalidQuantumError,
56 NoWorkFound,
57 PipelineTask,
58 QuantumContext,
59 RepeatableQuantumError,
60 TaskDef,
61 TaskFactory,
62)
64# During metadata transition phase, determine metadata class by
65# asking pipe_base
66from lsst.pipe.base.task import _TASK_FULL_METADATA_TYPE, _TASK_METADATA_TYPE
67from lsst.utils.timer import logInfo
69# -----------------------------
70# Imports for other modules --
71# -----------------------------
72from .log_capture import LogCapture
73from .quantumGraphExecutor import QuantumExecutor
74from .reports import QuantumReport
76# ----------------------------------
77# Local non-exported definitions --
78# ----------------------------------
80_LOG = logging.getLogger(__name__)
83class SingleQuantumExecutor(QuantumExecutor):
84 """Executor class which runs one Quantum at a time.
86 Parameters
87 ----------
88 butler : `~lsst.daf.butler.Butler` or `None`
89 Data butler, `None` means that Quantum-backed butler should be used
90 instead.
91 taskFactory : `~lsst.pipe.base.TaskFactory`
92 Instance of a task factory.
93 skipExistingIn : `~typing.Any`
94 Expressions representing the collections to search for existing
95 output datasets. See :ref:`daf_butler_ordered_collection_searches`
96 for allowed types. This class only checks for the presence of butler
97 output run in the list of collections. If the output run is present
98 in the list then the quanta whose complete outputs exist in the output
99 run will be skipped. `None` or empty string/sequence disables skipping.
100 clobberOutputs : `bool`, optional
101 If `True`, then outputs from a quantum that exist in output run
102 collection will be removed prior to executing a quantum. If
103 ``skipExistingIn`` contains output run, then only partial outputs from
104 a quantum will be removed. Only used when ``butler`` is not `None`.
105 enableLsstDebug : `bool`, optional
106 Enable debugging with ``lsstDebug`` facility for a task.
107 exitOnKnownError : `bool`, optional
108 If `True`, call `sys.exit` with the appropriate exit code for special
109 known exceptions, after printing a traceback, instead of letting the
110 exception propagate up to calling. This is always the behavior for
111 InvalidQuantumError.
112 limited_butler_factory : `Callable`, optional
113 A method that creates a `~lsst.daf.butler.LimitedButler` instance
114 for a given Quantum. This parameter must be defined if ``butler`` is
115 `None`. If ``butler`` is not `None` then this parameter is ignored.
116 resources : `~lsst.pipe.base.ExecutionResources`, optional
117 The resources available to this quantum when executing.
118 """
120 def __init__(
121 self,
122 butler: Butler | None,
123 taskFactory: TaskFactory,
124 skipExistingIn: Any = None,
125 clobberOutputs: bool = False,
126 enableLsstDebug: bool = False,
127 exitOnKnownError: bool = False,
128 limited_butler_factory: Callable[[Quantum], LimitedButler] | None = None,
129 resources: ExecutionResources | None = None,
130 ):
131 self.butler = butler
132 self.taskFactory = taskFactory
133 self.enableLsstDebug = enableLsstDebug
134 self.clobberOutputs = clobberOutputs
135 self.exitOnKnownError = exitOnKnownError
136 self.limited_butler_factory = limited_butler_factory
137 self.report: QuantumReport | None = None
138 self.resources = resources
140 if self.butler is None:
141 assert limited_butler_factory is not None, "limited_butler_factory is needed when butler is None"
143 # Find whether output run is in skipExistingIn.
144 # TODO: This duplicates logic in GraphBuilder, would be nice to have
145 # better abstraction for this some day.
146 self.skipExisting = False
147 if self.butler is not None and skipExistingIn:
148 skip_collections_wildcard = CollectionWildcard.from_expression(skipExistingIn)
149 # As optimization check in the explicit list of names first
150 self.skipExisting = self.butler.run in skip_collections_wildcard.strings
151 if not self.skipExisting:
152 # need to flatten it and check again
153 self.skipExisting = self.butler.run in self.butler.registry.queryCollections(
154 skipExistingIn,
155 collectionTypes=CollectionType.RUN,
156 )
158 def execute(self, taskDef: TaskDef, quantum: Quantum) -> Quantum:
159 # Docstring inherited from QuantumExecutor.execute
160 assert quantum.dataId is not None, "Quantum DataId cannot be None"
162 if self.butler is not None:
163 self.butler.registry.refresh()
165 # Catch any exception and make a report based on that.
166 try:
167 result = self._execute(taskDef, quantum)
168 self.report = QuantumReport(dataId=quantum.dataId, taskLabel=taskDef.label)
169 return result
170 except Exception as exc:
171 self.report = QuantumReport.from_exception(
172 exception=exc,
173 dataId=quantum.dataId,
174 taskLabel=taskDef.label,
175 )
176 raise
178 def _execute(self, taskDef: TaskDef, quantum: Quantum) -> Quantum:
179 """Execute the quantum.
181 Internal implementation of `execute()`.
182 """
183 startTime = time.time()
185 # Make a limited butler instance if needed (which should be QBB if full
186 # butler is not defined).
187 limited_butler: LimitedButler
188 if self.butler is not None:
189 limited_butler = self.butler
190 else:
191 # We check this in constructor, but mypy needs this check here.
192 assert self.limited_butler_factory is not None
193 limited_butler = self.limited_butler_factory(quantum)
195 if self.butler is not None:
196 log_capture = LogCapture.from_full(self.butler)
197 else:
198 log_capture = LogCapture.from_limited(limited_butler)
199 with log_capture.capture_logging(taskDef, quantum) as captureLog:
200 # Save detailed resource usage before task start to metadata.
201 quantumMetadata = _TASK_METADATA_TYPE()
202 logInfo(None, "prep", metadata=quantumMetadata) # type: ignore[arg-type]
204 _LOG.info("Preparing execution of quantum for label=%s dataId=%s.", taskDef.label, quantum.dataId)
206 # check whether to skip or delete old outputs, if it returns True
207 # or raises an exception do not try to store logs, as they may be
208 # already in butler.
209 captureLog.store = False
210 if self.checkExistingOutputs(quantum, taskDef, limited_butler):
211 _LOG.info(
212 "Skipping already-successful quantum for label=%s dataId=%s.",
213 taskDef.label,
214 quantum.dataId,
215 )
216 return quantum
217 captureLog.store = True
219 try:
220 quantum = self.updatedQuantumInputs(quantum, taskDef, limited_butler)
221 except NoWorkFound as exc:
222 _LOG.info(
223 "Nothing to do for task '%s' on quantum %s; saving metadata and skipping: %s",
224 taskDef.label,
225 quantum.dataId,
226 str(exc),
227 )
228 # Make empty metadata that looks something like what a
229 # do-nothing task would write (but we don't bother with empty
230 # nested PropertySets for subtasks). This is slightly
231 # duplicative with logic in pipe_base that we can't easily call
232 # from here; we'll fix this on DM-29761.
233 logInfo(None, "end", metadata=quantumMetadata) # type: ignore[arg-type]
234 fullMetadata = _TASK_FULL_METADATA_TYPE()
235 fullMetadata[taskDef.label] = _TASK_METADATA_TYPE()
236 fullMetadata["quantum"] = quantumMetadata
237 self.writeMetadata(quantum, fullMetadata, taskDef, limited_butler)
238 return quantum
240 # enable lsstDebug debugging
241 if self.enableLsstDebug:
242 try:
243 _LOG.debug("Will try to import debug.py")
244 import debug # type: ignore # noqa:F401
245 except ImportError:
246 _LOG.warn("No 'debug' module found.")
248 # initialize global state
249 self.initGlobals(quantum)
251 # Ensure that we are executing a frozen config
252 taskDef.config.freeze()
253 logInfo(None, "init", metadata=quantumMetadata) # type: ignore[arg-type]
254 init_input_refs = list(quantum.initInputs.values())
256 _LOG.info(
257 "Constructing task and executing quantum for label=%s dataId=%s.",
258 taskDef.label,
259 quantum.dataId,
260 )
261 task = self.taskFactory.makeTask(taskDef, limited_butler, init_input_refs)
262 logInfo(None, "start", metadata=quantumMetadata) # type: ignore[arg-type]
263 try:
264 self.runQuantum(task, quantum, taskDef, limited_butler)
265 except Exception as e:
266 _LOG.error(
267 "Execution of task '%s' on quantum %s failed. Exception %s: %s",
268 taskDef.label,
269 quantum.dataId,
270 e.__class__.__name__,
271 str(e),
272 )
273 raise
274 logInfo(None, "end", metadata=quantumMetadata) # type: ignore[arg-type]
275 fullMetadata = task.getFullMetadata()
276 fullMetadata["quantum"] = quantumMetadata
277 self.writeMetadata(quantum, fullMetadata, taskDef, limited_butler)
278 stopTime = time.time()
279 _LOG.info(
280 "Execution of task '%s' on quantum %s took %.3f seconds",
281 taskDef.label,
282 quantum.dataId,
283 stopTime - startTime,
284 )
285 return quantum
287 def checkExistingOutputs(self, quantum: Quantum, taskDef: TaskDef, limited_butler: LimitedButler) -> bool:
288 """Decide whether this quantum needs to be executed.
290 If only partial outputs exist then they are removed if
291 ``clobberOutputs`` is True, otherwise an exception is raised.
293 Parameters
294 ----------
295 quantum : `~lsst.daf.butler.Quantum`
296 Quantum to check for existing outputs.
297 taskDef : `~lsst.pipe.base.TaskDef`
298 Task definition structure.
299 limited_butler : `~lsst.daf.butler.LimitedButler`
300 Butler to use for querying.
302 Returns
303 -------
304 exist : `bool`
305 `True` if ``self.skipExisting`` is defined, and a previous
306 execution of this quanta appears to have completed successfully
307 (either because metadata was written or all datasets were written).
308 `False` otherwise.
310 Raises
311 ------
312 RuntimeError
313 Raised if some outputs exist and some not.
314 """
315 if not self.butler:
316 # Skip/prune logic only works for full butler.
317 return False
319 if self.skipExisting:
320 _LOG.debug(
321 "Checking existence of metadata from previous execution of label=%s dataId=%s.",
322 taskDef.label,
323 quantum.dataId,
324 )
325 # Metadata output exists; this is sufficient to assume the previous
326 # run was successful and should be skipped.
327 [metadata_ref] = quantum.outputs[taskDef.metadataDatasetName]
328 if metadata_ref is not None:
329 if limited_butler.stored(metadata_ref):
330 return True
332 # Find and prune (partial) outputs if `self.clobberOutputs` is set.
333 _LOG.debug(
334 "Looking for existing outputs in the way for label=%s dataId=%s.", taskDef.label, quantum.dataId
335 )
336 ref_dict = self.butler.stored_many(chain.from_iterable(quantum.outputs.values()))
337 existingRefs = [ref for ref, exists in ref_dict.items() if exists]
338 missingRefs = [ref for ref, exists in ref_dict.items() if not exists]
339 if existingRefs:
340 if not missingRefs:
341 # Full outputs exist.
342 if self.skipExisting:
343 return True
344 elif self.clobberOutputs:
345 _LOG.info("Removing complete outputs for quantum %s: %s", quantum, existingRefs)
346 self.butler.pruneDatasets(existingRefs, disassociate=True, unstore=True, purge=True)
347 else:
348 raise RuntimeError(
349 f"Complete outputs exists for a quantum {quantum} "
350 "and neither clobberOutputs nor skipExisting is set: "
351 f"collection={self.butler.run} existingRefs={existingRefs}"
352 )
353 else:
354 # Partial outputs from a failed quantum.
355 _LOG.debug(
356 "Partial outputs exist for quantum %s collection=%s existingRefs=%s missingRefs=%s",
357 quantum,
358 self.butler.run,
359 existingRefs,
360 missingRefs,
361 )
362 if self.clobberOutputs:
363 # only prune
364 _LOG.info("Removing partial outputs for task %s: %s", taskDef, existingRefs)
365 self.butler.pruneDatasets(existingRefs, disassociate=True, unstore=True, purge=True)
366 return False
367 else:
368 raise RuntimeError(
369 "Registry inconsistency while checking for existing quantum outputs:"
370 f" quantum={quantum} collection={self.butler.run} existingRefs={existingRefs}"
371 f" missingRefs={missingRefs}"
372 )
374 # By default always execute.
375 return False
377 def updatedQuantumInputs(
378 self, quantum: Quantum, taskDef: TaskDef, limited_butler: LimitedButler
379 ) -> Quantum:
380 """Update quantum with extra information, returns a new updated
381 Quantum.
383 Some methods may require input DatasetRefs to have non-None
384 ``dataset_id``, but in case of intermediate dataset it may not be
385 filled during QuantumGraph construction. This method will retrieve
386 missing info from registry.
388 Parameters
389 ----------
390 quantum : `~lsst.daf.butler.Quantum`
391 Single Quantum instance.
392 taskDef : `~lsst.pipe.base.TaskDef`
393 Task definition structure.
394 limited_butler : `~lsst.daf.butler.LimitedButler`
395 Butler to use for querying.
397 Returns
398 -------
399 update : `~lsst.daf.butler.Quantum`
400 Updated Quantum instance.
401 """
402 anyChanges = False
403 updatedInputs: defaultdict[DatasetType, list] = defaultdict(list)
404 for key, refsForDatasetType in quantum.inputs.items():
405 _LOG.debug(
406 "Checking existence of input '%s' for label=%s dataId=%s.",
407 key.name,
408 taskDef.label,
409 quantum.dataId,
410 )
411 newRefsForDatasetType = updatedInputs[key]
412 stored = limited_butler.stored_many(refsForDatasetType)
413 for ref in refsForDatasetType:
414 if stored[ref]:
415 newRefsForDatasetType.append(ref)
416 else:
417 # This should only happen if a predicted intermediate was
418 # not actually produced upstream, but
419 # datastore misconfigurations can unfortunately also land
420 # us here.
421 _LOG.info("No dataset artifact found for %s", ref)
422 continue
423 if len(newRefsForDatasetType) != len(refsForDatasetType):
424 anyChanges = True
425 # If we removed any input datasets, let the task check if it has enough
426 # to proceed and/or prune related datasets that it also doesn't
427 # need/produce anymore. It will raise NoWorkFound if it can't run,
428 # which we'll let propagate up. This is exactly what we run during QG
429 # generation, because a task shouldn't care whether an input is missing
430 # because some previous task didn't produce it, or because it just
431 # wasn't there during QG generation.
432 namedUpdatedInputs = NamedKeyDict[DatasetType, list[DatasetRef]](updatedInputs.items())
433 helper = AdjustQuantumHelper(namedUpdatedInputs, quantum.outputs)
434 if anyChanges:
435 _LOG.debug("Running adjustQuantum for label=%s dataId=%s.", taskDef.label, quantum.dataId)
436 assert quantum.dataId is not None, "Quantum DataId cannot be None"
437 helper.adjust_in_place(taskDef.connections, label=taskDef.label, data_id=quantum.dataId)
438 return Quantum(
439 taskName=quantum.taskName,
440 taskClass=quantum.taskClass,
441 dataId=quantum.dataId,
442 initInputs=quantum.initInputs,
443 inputs=helper.inputs,
444 outputs=helper.outputs,
445 )
447 def runQuantum(
448 self, task: PipelineTask, quantum: Quantum, taskDef: TaskDef, limited_butler: LimitedButler
449 ) -> None:
450 """Execute task on a single quantum.
452 Parameters
453 ----------
454 task : `~lsst.pipe.base.PipelineTask`
455 Task object.
456 quantum : `~lsst.daf.butler.Quantum`
457 Single Quantum instance.
458 taskDef : `~lsst.pipe.base.TaskDef`
459 Task definition structure.
460 limited_butler : `~lsst.daf.butler.LimitedButler`
461 Butler to use for dataset I/O.
462 """
463 # Create a butler that operates in the context of a quantum
464 butlerQC = QuantumContext(limited_butler, quantum, resources=self.resources)
466 # Get the input and output references for the task
467 inputRefs, outputRefs = taskDef.connections.buildDatasetRefs(quantum)
469 # Call task runQuantum() method. Catch a few known failure modes and
470 # translate them into specific
471 try:
472 task.runQuantum(butlerQC, inputRefs, outputRefs)
473 except NoWorkFound as err:
474 # Not an error, just an early exit.
475 _LOG.info("Task '%s' on quantum %s exited early: %s", taskDef.label, quantum.dataId, str(err))
476 pass
477 except RepeatableQuantumError as err:
478 if self.exitOnKnownError:
479 _LOG.warning("Caught repeatable quantum error for %s (%s):", taskDef, quantum.dataId)
480 _LOG.warning(err, exc_info=True)
481 sys.exit(err.EXIT_CODE)
482 else:
483 raise
484 except InvalidQuantumError as err:
485 _LOG.fatal("Invalid quantum error for %s (%s): %s", taskDef, quantum.dataId)
486 _LOG.fatal(err, exc_info=True)
487 sys.exit(err.EXIT_CODE)
489 def writeMetadata(
490 self, quantum: Quantum, metadata: Any, taskDef: TaskDef, limited_butler: LimitedButler
491 ) -> None:
492 # DatasetRef has to be in the Quantum outputs, can lookup by name
493 try:
494 [ref] = quantum.outputs[taskDef.metadataDatasetName]
495 except LookupError as exc:
496 raise InvalidQuantumError(
497 f"Quantum outputs is missing metadata dataset type {taskDef.metadataDatasetName};"
498 " this could happen due to inconsistent options between QuantumGraph generation"
499 " and execution"
500 ) from exc
501 limited_butler.put(metadata, ref)
503 def initGlobals(self, quantum: Quantum) -> None:
504 """Initialize global state needed for task execution.
506 Parameters
507 ----------
508 quantum : `~lsst.daf.butler.Quantum`
509 Single Quantum instance.
511 Notes
512 -----
513 There is an issue with initializing filters singleton which is done
514 by instrument, to avoid requiring tasks to do it in runQuantum()
515 we do it here when any dataId has an instrument dimension. Also for
516 now we only allow single instrument, verify that all instrument
517 names in all dataIds are identical.
519 This will need revision when filter singleton disappears.
520 """
521 # can only work for full butler
522 if self.butler is None:
523 return
524 oneInstrument = None
525 for datasetRefs in chain(quantum.inputs.values(), quantum.outputs.values()):
526 for datasetRef in datasetRefs:
527 dataId = datasetRef.dataId
528 instrument = cast(str, dataId.get("instrument"))
529 if instrument is not None:
530 if oneInstrument is not None:
531 assert ( # type: ignore
532 instrument == oneInstrument
533 ), "Currently require that only one instrument is used per graph"
534 else:
535 oneInstrument = instrument
536 Instrument.fromName(instrument, self.butler.registry)
538 def getReport(self) -> QuantumReport | None:
539 # Docstring inherited from base class
540 if self.report is None:
541 raise RuntimeError("getReport() called before execute()")
542 return self.report