Coverage for python/lsst/ctrl/mpexec/singleQuantumExecutor.py : 11%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22__all__ = ['SingleQuantumExecutor']
24# -------------------------------
25# Imports of standard modules --
26# -------------------------------
27import logging
28import sys
29import tempfile
30import time
31from contextlib import contextmanager
32from collections import defaultdict
33from itertools import chain
34from logging import FileHandler
35from typing import List
37# -----------------------------
38# Imports for other modules --
39# -----------------------------
40from .quantumGraphExecutor import QuantumExecutor
41from lsst.daf.base import PropertyList, PropertySet
42from lsst.obs.base import Instrument
43from lsst.pipe.base import (
44 AdjustQuantumHelper,
45 ButlerQuantumContext,
46 InvalidQuantumError,
47 NoWorkFound,
48 RepeatableQuantumError,
49 logInfo,
50)
51from lsst.daf.butler import (
52 DatasetRef,
53 DatasetType,
54 FileDataset,
55 NamedKeyDict,
56 Quantum,
57)
58from lsst.daf.butler.core.logging import (
59 ButlerLogRecordHandler,
60 ButlerMDC,
61 JsonLogFormatter,
62)
63# ----------------------------------
64# Local non-exported definitions --
65# ----------------------------------
67_LOG = logging.getLogger(__name__.partition(".")[2])
70class SingleQuantumExecutor(QuantumExecutor):
71 """Executor class which runs one Quantum at a time.
73 Parameters
74 ----------
75 butler : `~lsst.daf.butler.Butler`
76 Data butler.
77 taskFactory : `~lsst.pipe.base.TaskFactory`
78 Instance of a task factory.
79 skipExisting : `bool`, optional
80 If `True`, then quanta that succeeded will not be rerun.
81 clobberOutputs : `bool`, optional
82 If `True`, then existing outputs will be overwritten. If
83 `skipExisting` is also `True`, only outputs from failed quanta will
84 be overwritten.
85 enableLsstDebug : `bool`, optional
86 Enable debugging with ``lsstDebug`` facility for a task.
87 exitOnKnownError : `bool`, optional
88 If `True`, call `sys.exit` with the appropriate exit code for special
89 known exceptions, after printing a traceback, instead of letting the
90 exception propagate up to calling. This is always the behavior for
91 InvalidQuantumError.
92 """
94 stream_json_logs = True
95 """If True each log record is written to a temporary file and ingested
96 when quantum completes. If False the records are accumulated in memory
97 and stored in butler on quantum completion."""
99 def __init__(self, taskFactory, skipExisting=False, clobberOutputs=False, enableLsstDebug=False,
100 exitOnKnownError=False):
101 self.taskFactory = taskFactory
102 self.skipExisting = skipExisting
103 self.enableLsstDebug = enableLsstDebug
104 self.clobberOutputs = clobberOutputs
105 self.exitOnKnownError = exitOnKnownError
106 self.log_handler = None
108 def execute(self, taskDef, quantum, butler):
109 # Docstring inherited from QuantumExecutor.execute
110 startTime = time.time()
112 with self.captureLogging(taskDef, quantum, butler):
113 # Save detailed resource usage before task start to metadata.
114 quantumMetadata = PropertyList()
115 logInfo(None, "prep", metadata=quantumMetadata)
117 taskClass, label, config = taskDef.taskClass, taskDef.label, taskDef.config
119 # check whether to skip or delete old outputs
120 if self.checkExistingOutputs(quantum, butler, taskDef):
121 _LOG.info("Skipping already-successful quantum for label=%s dataId=%s.", label,
122 quantum.dataId)
123 return
124 try:
125 quantum = self.updatedQuantumInputs(quantum, butler, taskDef)
126 except NoWorkFound as exc:
127 _LOG.info("Nothing to do for task '%s' on quantum %s; saving metadata and skipping: %s",
128 taskDef.label, quantum.dataId, str(exc))
129 # Make empty metadata that looks something like what a
130 # do-nothing task would write (but we don't bother with empty
131 # nested PropertySets for subtasks). This is slightly
132 # duplicative with logic in pipe_base that we can't easily call
133 # from here; we'll fix this on DM-29761.
134 logInfo(None, "end", metadata=quantumMetadata)
135 fullMetadata = PropertySet()
136 fullMetadata[taskDef.label] = PropertyList()
137 fullMetadata["quantum"] = quantumMetadata
138 self.writeMetadata(quantum, fullMetadata, taskDef, butler)
139 return
141 # enable lsstDebug debugging
142 if self.enableLsstDebug:
143 try:
144 _LOG.debug("Will try to import debug.py")
145 import debug # noqa:F401
146 except ImportError:
147 _LOG.warn("No 'debug' module found.")
149 # initialize global state
150 self.initGlobals(quantum, butler)
152 # Ensure that we are executing a frozen config
153 config.freeze()
154 logInfo(None, "init", metadata=quantumMetadata)
155 task = self.makeTask(taskClass, label, config, butler)
156 logInfo(None, "start", metadata=quantumMetadata)
157 try:
158 self.runQuantum(task, quantum, taskDef, butler)
159 except Exception:
160 _LOG.exception("Execution of task '%s' on quantum %s failed",
161 taskDef.label, quantum.dataId)
162 raise
163 logInfo(None, "end", metadata=quantumMetadata)
164 fullMetadata = task.getFullMetadata()
165 fullMetadata["quantum"] = quantumMetadata
166 self.writeMetadata(quantum, fullMetadata, taskDef, butler)
167 stopTime = time.time()
168 _LOG.info("Execution of task '%s' on quantum %s took %.3f seconds",
169 taskDef.label, quantum.dataId, stopTime - startTime)
171 @contextmanager
172 def captureLogging(self, taskDef, quantum, butler):
173 """Configure logging system to capture logs for execution of this task.
175 Parameters
176 ----------
177 taskDef : `lsst.pipe.base.TaskDef`
178 The task definition.
179 quantum : `~lsst.daf.butler.Quantum`
180 Single Quantum instance.
181 butler : `~lsst.daf.butler.Butler`
182 Butler to write logs to.
184 Notes
185 -----
186 Expected to be used as a context manager to ensure that logging
187 records are inserted into the butler once the quantum has been
188 executed:
190 .. code-block:: py
192 with self.captureLogging(taskDef, quantum, butler):
193 # Run quantum and capture logs.
195 Ths method can also setup logging to attach task- or
196 quantum-specific information to log messages. Potentially this can
197 take into account some info from task configuration as well.
198 """
199 # Add a handler to the root logger to capture execution log output.
200 # How does it get removed reliably?
201 if taskDef.logOutputDatasetName is not None:
202 # Either accumulate into ButlerLogRecords or stream
203 # JSON records to file and ingest that.
204 if self.stream_json_logs:
205 tmp = tempfile.NamedTemporaryFile(mode="w",
206 suffix=".json",
207 prefix=f"butler-log-{taskDef.label}-",
208 delete=False)
209 self.log_handler = FileHandler(tmp.name)
210 tmp.close()
211 self.log_handler.setFormatter(JsonLogFormatter())
212 else:
213 self.log_handler = ButlerLogRecordHandler()
215 logging.getLogger().addHandler(self.log_handler)
217 # include quantum dataId and task label into MDC
218 label = taskDef.label
219 if quantum.dataId:
220 label += f":{quantum.dataId}"
222 try:
223 with ButlerMDC.set_mdc({"LABEL": label}):
224 yield
225 finally:
226 # Ensure that the logs are stored in butler.
227 self.writeLogRecords(quantum, taskDef, butler)
229 def checkExistingOutputs(self, quantum, butler, taskDef):
230 """Decide whether this quantum needs to be executed.
232 If only partial outputs exist then they are removed if
233 ``clobberOutputs`` is True, otherwise an exception is raised.
235 Parameters
236 ----------
237 quantum : `~lsst.daf.butler.Quantum`
238 Quantum to check for existing outputs
239 butler : `~lsst.daf.butler.Butler`
240 Data butler.
241 taskDef : `~lsst.pipe.base.TaskDef`
242 Task definition structure.
244 Returns
245 -------
246 exist : `bool`
247 `True` if ``self.skipExisting`` is `True`, and a previous execution
248 of this quanta appears to have completed successfully (either
249 because metadata was written or all datasets were written).
250 `False` otherwise.
252 Raises
253 ------
254 RuntimeError
255 Raised if some outputs exist and some not.
256 """
257 collection = butler.run
258 registry = butler.registry
260 if self.skipExisting and taskDef.metadataDatasetName is not None:
261 # Metadata output exists; this is sufficient to assume the previous
262 # run was successful and should be skipped.
263 if (ref := butler.registry.findDataset(taskDef.metadataDatasetName, quantum.dataId)) is not None:
264 if butler.datastore.exists(ref):
265 return True
267 existingRefs = []
268 missingRefs = []
269 for datasetRefs in quantum.outputs.values():
270 for datasetRef in datasetRefs:
271 ref = registry.findDataset(datasetRef.datasetType, datasetRef.dataId,
272 collections=butler.run)
273 if ref is None:
274 missingRefs.append(datasetRef)
275 else:
276 if butler.datastore.exists(ref):
277 existingRefs.append(ref)
278 else:
279 missingRefs.append(datasetRef)
280 if existingRefs and missingRefs:
281 # Some outputs exist and some don't, either delete existing ones
282 # or complain.
283 _LOG.debug("Partial outputs exist for task %s dataId=%s collection=%s "
284 "existingRefs=%s missingRefs=%s",
285 taskDef, quantum.dataId, collection, existingRefs, missingRefs)
286 if self.clobberOutputs:
287 _LOG.info("Removing partial outputs for task %s: %s", taskDef, existingRefs)
288 butler.pruneDatasets(existingRefs, disassociate=True, unstore=True, purge=True)
289 return False
290 else:
291 raise RuntimeError(f"Registry inconsistency while checking for existing outputs:"
292 f" collection={collection} existingRefs={existingRefs}"
293 f" missingRefs={missingRefs}")
294 elif existingRefs:
295 # complete outputs exist, this is fine only if skipExisting is set
296 return self.skipExisting
297 else:
298 # no outputs exist
299 return False
301 def makeTask(self, taskClass, name, config, butler):
302 """Make new task instance.
304 Parameters
305 ----------
306 taskClass : `type`
307 Sub-class of `~lsst.pipe.base.PipelineTask`.
308 name : `str`
309 Name for this task.
310 config : `~lsst.pipe.base.PipelineTaskConfig`
311 Configuration object for this task
313 Returns
314 -------
315 task : `~lsst.pipe.base.PipelineTask`
316 Instance of ``taskClass`` type.
317 butler : `~lsst.daf.butler.Butler`
318 Data butler.
319 """
320 # call task factory for that
321 return self.taskFactory.makeTask(taskClass, name, config, None, butler)
323 def updatedQuantumInputs(self, quantum, butler, taskDef):
324 """Update quantum with extra information, returns a new updated
325 Quantum.
327 Some methods may require input DatasetRefs to have non-None
328 ``dataset_id``, but in case of intermediate dataset it may not be
329 filled during QuantumGraph construction. This method will retrieve
330 missing info from registry.
332 Parameters
333 ----------
334 quantum : `~lsst.daf.butler.Quantum`
335 Single Quantum instance.
336 butler : `~lsst.daf.butler.Butler`
337 Data butler.
338 taskDef : `~lsst.pipe.base.TaskDef`
339 Task definition structure.
341 Returns
342 -------
343 update : `~lsst.daf.butler.Quantum`
344 Updated Quantum instance
345 """
346 anyChanges = False
347 updatedInputs = defaultdict(list)
348 for key, refsForDatasetType in quantum.inputs.items():
349 newRefsForDatasetType = updatedInputs[key]
350 for ref in refsForDatasetType:
351 if ref.id is None:
352 resolvedRef = butler.registry.findDataset(ref.datasetType, ref.dataId,
353 collections=butler.collections)
354 if resolvedRef is None:
355 _LOG.info("No dataset found for %s", ref)
356 continue
357 else:
358 _LOG.debug("Updated dataset ID for %s", ref)
359 else:
360 resolvedRef = ref
361 # We need to ask datastore if the dataset actually exists
362 # because the Registry of a local "execution butler" cannot
363 # know this (because we prepopulate it with all of the datasets
364 # that might be created).
365 if butler.datastore.exists(resolvedRef):
366 newRefsForDatasetType.append(resolvedRef)
367 if len(newRefsForDatasetType) != len(refsForDatasetType):
368 anyChanges = True
369 # If we removed any input datasets, let the task check if it has enough
370 # to proceed and/or prune related datasets that it also doesn't
371 # need/produce anymore. It will raise NoWorkFound if it can't run,
372 # which we'll let propagate up. This is exactly what we run during QG
373 # generation, because a task shouldn't care whether an input is missing
374 # because some previous task didn't produce it, or because it just
375 # wasn't there during QG generation.
376 updatedInputs = NamedKeyDict[DatasetType, List[DatasetRef]](updatedInputs.items())
377 helper = AdjustQuantumHelper(updatedInputs, quantum.outputs)
378 if anyChanges:
379 helper.adjust_in_place(taskDef.connections, label=taskDef.label, data_id=quantum.dataId)
380 return Quantum(taskName=quantum.taskName,
381 taskClass=quantum.taskClass,
382 dataId=quantum.dataId,
383 initInputs=quantum.initInputs,
384 inputs=helper.inputs,
385 outputs=helper.outputs
386 )
388 def runQuantum(self, task, quantum, taskDef, butler):
389 """Execute task on a single quantum.
391 Parameters
392 ----------
393 task : `~lsst.pipe.base.PipelineTask`
394 Task object.
395 quantum : `~lsst.daf.butler.Quantum`
396 Single Quantum instance.
397 taskDef : `~lsst.pipe.base.TaskDef`
398 Task definition structure.
399 butler : `~lsst.daf.butler.Butler`
400 Data butler.
401 """
402 # Create a butler that operates in the context of a quantum
403 butlerQC = ButlerQuantumContext(butler, quantum)
405 # Get the input and output references for the task
406 inputRefs, outputRefs = taskDef.connections.buildDatasetRefs(quantum)
408 # Call task runQuantum() method. Catch a few known failure modes and
409 # translate them into specific
410 try:
411 task.runQuantum(butlerQC, inputRefs, outputRefs)
412 except NoWorkFound as err:
413 # Not an error, just an early exit.
414 _LOG.info("Task '%s' on quantum %s exited early: %s",
415 taskDef.label, quantum.dataId, str(err))
416 pass
417 except RepeatableQuantumError as err:
418 if self.exitOnKnownError:
419 _LOG.warning("Caught repeatable quantum error for %s (%s):", taskDef, quantum.dataId)
420 _LOG.warning(err, exc_info=True)
421 sys.exit(err.EXIT_CODE)
422 else:
423 raise
424 except InvalidQuantumError as err:
425 _LOG.fatal("Invalid quantum error for %s (%s): %s", taskDef, quantum.dataId)
426 _LOG.fatal(err, exc_info=True)
427 sys.exit(err.EXIT_CODE)
429 def writeMetadata(self, quantum, metadata, taskDef, butler):
430 if taskDef.metadataDatasetName is not None:
431 # DatasetRef has to be in the Quantum outputs, can lookup by name
432 try:
433 ref = quantum.outputs[taskDef.metadataDatasetName]
434 except LookupError as exc:
435 raise InvalidQuantumError(
436 f"Quantum outputs is missing metadata dataset type {taskDef.metadataDatasetName};"
437 f" this could happen due to inconsistent options between QuantumGraph generation"
438 f" and execution") from exc
439 butler.put(metadata, ref[0])
441 def writeLogRecords(self, quantum, taskDef, butler):
442 # If we are logging to an external file we must always try to
443 # close it.
444 filename = None
445 if isinstance(self.log_handler, FileHandler):
446 filename = self.log_handler.stream.name
447 self.log_handler.close()
449 if self.log_handler is not None:
450 # Remove the handler so we stop accumulating log messages.
451 logging.getLogger().removeHandler(self.log_handler)
453 if taskDef.logOutputDatasetName is not None and self.log_handler is not None:
454 # DatasetRef has to be in the Quantum outputs, can lookup by name
455 try:
456 ref = quantum.outputs[taskDef.logOutputDatasetName]
457 except LookupError as exc:
458 raise InvalidQuantumError(
459 f"Quantum outputs is missing log output dataset type {taskDef.logOutputDatasetName};"
460 f" this could happen due to inconsistent options between QuantumGraph generation"
461 f" and execution") from exc
463 if isinstance(self.log_handler, ButlerLogRecordHandler):
464 butler.put(self.log_handler.records, ref[0])
466 # Clear the records in case the handler is reused.
467 self.log_handler.records.clear()
468 else:
469 assert filename is not None, "Somehow unable to extract filename from file handler"
471 # Need to ingest this file directly into butler.
472 dataset = FileDataset(path=filename, refs=ref[0])
473 try:
474 butler.ingest(dataset, transfer="move")
475 except NotImplementedError:
476 # Some datastores can't receive files (e.g. in-memory
477 # datastore when testing) so skip log storage for those.
478 # Alternative is to read the file as a ButlerLogRecords
479 # object and put it.
480 _LOG.info("Log records could not be stored in this butler because the"
481 " datastore can not ingest files.")
482 pass
484 def initGlobals(self, quantum, butler):
485 """Initialize global state needed for task execution.
487 Parameters
488 ----------
489 quantum : `~lsst.daf.butler.Quantum`
490 Single Quantum instance.
491 butler : `~lsst.daf.butler.Butler`
492 Data butler.
494 Notes
495 -----
496 There is an issue with initializing filters singleton which is done
497 by instrument, to avoid requiring tasks to do it in runQuantum()
498 we do it here when any dataId has an instrument dimension. Also for
499 now we only allow single instrument, verify that all instrument
500 names in all dataIds are identical.
502 This will need revision when filter singleton disappears.
503 """
504 oneInstrument = None
505 for datasetRefs in chain(quantum.inputs.values(), quantum.outputs.values()):
506 for datasetRef in datasetRefs:
507 dataId = datasetRef.dataId
508 instrument = dataId.get("instrument")
509 if instrument is not None:
510 if oneInstrument is not None:
511 assert instrument == oneInstrument, \
512 "Currently require that only one instrument is used per graph"
513 else:
514 oneInstrument = instrument
515 Instrument.fromName(instrument, butler.registry)