Coverage for python/lsst/ctrl/mpexec/singleQuantumExecutor.py : 12%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22__all__ = ['SingleQuantumExecutor']
24# -------------------------------
25# Imports of standard modules --
26# -------------------------------
27import logging
28import os
29import sys
30import tempfile
31import time
32from contextlib import contextmanager
33from collections import defaultdict
34from itertools import chain
35from logging import FileHandler
36from typing import List
38# -----------------------------
39# Imports for other modules --
40# -----------------------------
41from .quantumGraphExecutor import QuantumExecutor
42from lsst.daf.base import PropertyList, PropertySet
43from lsst.obs.base import Instrument
44from lsst.pipe.base import (
45 AdjustQuantumHelper,
46 ButlerQuantumContext,
47 InvalidQuantumError,
48 NoWorkFound,
49 RepeatableQuantumError,
50 logInfo,
51)
52from lsst.daf.butler import (
53 DatasetRef,
54 DatasetType,
55 FileDataset,
56 NamedKeyDict,
57 Quantum,
58)
59from lsst.daf.butler.core.logging import (
60 ButlerLogRecordHandler,
61 ButlerLogRecords,
62 ButlerMDC,
63 JsonLogFormatter,
64)
65# ----------------------------------
66# Local non-exported definitions --
67# ----------------------------------
69_LOG = logging.getLogger(__name__.partition(".")[2])
72class _LogCaptureFlag:
73 """Simple flag to enable/disable log-to-butler saving.
74 """
75 store: bool = True
78class SingleQuantumExecutor(QuantumExecutor):
79 """Executor class which runs one Quantum at a time.
81 Parameters
82 ----------
83 butler : `~lsst.daf.butler.Butler`
84 Data butler.
85 taskFactory : `~lsst.pipe.base.TaskFactory`
86 Instance of a task factory.
87 skipExisting : `bool`, optional
88 If `True`, then quanta that succeeded will not be rerun.
89 clobberOutputs : `bool`, optional
90 If `True`, then existing outputs will be overwritten. If
91 `skipExisting` is also `True`, only outputs from failed quanta will
92 be overwritten.
93 enableLsstDebug : `bool`, optional
94 Enable debugging with ``lsstDebug`` facility for a task.
95 exitOnKnownError : `bool`, optional
96 If `True`, call `sys.exit` with the appropriate exit code for special
97 known exceptions, after printing a traceback, instead of letting the
98 exception propagate up to calling. This is always the behavior for
99 InvalidQuantumError.
100 """
102 stream_json_logs = True
103 """If True each log record is written to a temporary file and ingested
104 when quantum completes. If False the records are accumulated in memory
105 and stored in butler on quantum completion."""
107 def __init__(self, taskFactory, skipExisting=False, clobberOutputs=False, enableLsstDebug=False,
108 exitOnKnownError=False):
109 self.taskFactory = taskFactory
110 self.skipExisting = skipExisting
111 self.enableLsstDebug = enableLsstDebug
112 self.clobberOutputs = clobberOutputs
113 self.exitOnKnownError = exitOnKnownError
114 self.log_handler = None
116 def execute(self, taskDef, quantum, butler):
117 # Docstring inherited from QuantumExecutor.execute
118 startTime = time.time()
120 with self.captureLogging(taskDef, quantum, butler) as captureLog:
122 # Save detailed resource usage before task start to metadata.
123 quantumMetadata = PropertyList()
124 logInfo(None, "prep", metadata=quantumMetadata)
126 taskClass, label, config = taskDef.taskClass, taskDef.label, taskDef.config
128 # check whether to skip or delete old outputs, if it returns True
129 # or raises an exception do not try to store logs, as they may be
130 # already in butler.
131 captureLog.store = False
132 if self.checkExistingOutputs(quantum, butler, taskDef):
133 _LOG.info("Skipping already-successful quantum for label=%s dataId=%s.", label,
134 quantum.dataId)
135 return
136 captureLog.store = True
138 try:
139 quantum = self.updatedQuantumInputs(quantum, butler, taskDef)
140 except NoWorkFound as exc:
141 _LOG.info("Nothing to do for task '%s' on quantum %s; saving metadata and skipping: %s",
142 taskDef.label, quantum.dataId, str(exc))
143 # Make empty metadata that looks something like what a
144 # do-nothing task would write (but we don't bother with empty
145 # nested PropertySets for subtasks). This is slightly
146 # duplicative with logic in pipe_base that we can't easily call
147 # from here; we'll fix this on DM-29761.
148 logInfo(None, "end", metadata=quantumMetadata)
149 fullMetadata = PropertySet()
150 fullMetadata[taskDef.label] = PropertyList()
151 fullMetadata["quantum"] = quantumMetadata
152 self.writeMetadata(quantum, fullMetadata, taskDef, butler)
153 return
155 # enable lsstDebug debugging
156 if self.enableLsstDebug:
157 try:
158 _LOG.debug("Will try to import debug.py")
159 import debug # noqa:F401
160 except ImportError:
161 _LOG.warn("No 'debug' module found.")
163 # initialize global state
164 self.initGlobals(quantum, butler)
166 # Ensure that we are executing a frozen config
167 config.freeze()
168 logInfo(None, "init", metadata=quantumMetadata)
169 task = self.makeTask(taskClass, label, config, butler)
170 logInfo(None, "start", metadata=quantumMetadata)
171 try:
172 self.runQuantum(task, quantum, taskDef, butler)
173 except Exception:
174 _LOG.exception("Execution of task '%s' on quantum %s failed",
175 taskDef.label, quantum.dataId)
176 raise
177 logInfo(None, "end", metadata=quantumMetadata)
178 fullMetadata = task.getFullMetadata()
179 fullMetadata["quantum"] = quantumMetadata
180 self.writeMetadata(quantum, fullMetadata, taskDef, butler)
181 stopTime = time.time()
182 _LOG.info("Execution of task '%s' on quantum %s took %.3f seconds",
183 taskDef.label, quantum.dataId, stopTime - startTime)
185 @contextmanager
186 def captureLogging(self, taskDef, quantum, butler):
187 """Configure logging system to capture logs for execution of this task.
189 Parameters
190 ----------
191 taskDef : `lsst.pipe.base.TaskDef`
192 The task definition.
193 quantum : `~lsst.daf.butler.Quantum`
194 Single Quantum instance.
195 butler : `~lsst.daf.butler.Butler`
196 Butler to write logs to.
198 Notes
199 -----
200 Expected to be used as a context manager to ensure that logging
201 records are inserted into the butler once the quantum has been
202 executed:
204 .. code-block:: py
206 with self.captureLogging(taskDef, quantum, butler):
207 # Run quantum and capture logs.
209 Ths method can also setup logging to attach task- or
210 quantum-specific information to log messages. Potentially this can
211 take into account some info from task configuration as well.
212 """
213 # Add a handler to the root logger to capture execution log output.
214 # How does it get removed reliably?
215 if taskDef.logOutputDatasetName is not None:
216 # Either accumulate into ButlerLogRecords or stream
217 # JSON records to file and ingest that.
218 if self.stream_json_logs:
219 tmp = tempfile.NamedTemporaryFile(mode="w",
220 suffix=".json",
221 prefix=f"butler-log-{taskDef.label}-",
222 delete=False)
223 self.log_handler = FileHandler(tmp.name)
224 tmp.close()
225 self.log_handler.setFormatter(JsonLogFormatter())
226 else:
227 self.log_handler = ButlerLogRecordHandler()
229 logging.getLogger().addHandler(self.log_handler)
231 # include quantum dataId and task label into MDC
232 label = taskDef.label
233 if quantum.dataId:
234 label += f":{quantum.dataId}"
236 ctx = _LogCaptureFlag()
237 try:
238 with ButlerMDC.set_mdc({"LABEL": label}):
239 yield ctx
240 finally:
241 # Ensure that the logs are stored in butler.
242 self.writeLogRecords(quantum, taskDef, butler, ctx.store)
244 def checkExistingOutputs(self, quantum, butler, taskDef):
245 """Decide whether this quantum needs to be executed.
247 If only partial outputs exist then they are removed if
248 ``clobberOutputs`` is True, otherwise an exception is raised.
250 Parameters
251 ----------
252 quantum : `~lsst.daf.butler.Quantum`
253 Quantum to check for existing outputs
254 butler : `~lsst.daf.butler.Butler`
255 Data butler.
256 taskDef : `~lsst.pipe.base.TaskDef`
257 Task definition structure.
259 Returns
260 -------
261 exist : `bool`
262 `True` if ``self.skipExisting`` is `True`, and a previous execution
263 of this quanta appears to have completed successfully (either
264 because metadata was written or all datasets were written).
265 `False` otherwise.
267 Raises
268 ------
269 RuntimeError
270 Raised if some outputs exist and some not.
271 """
272 collection = butler.run
273 registry = butler.registry
275 if self.skipExisting and taskDef.metadataDatasetName is not None:
276 # Metadata output exists; this is sufficient to assume the previous
277 # run was successful and should be skipped.
278 if (ref := butler.registry.findDataset(taskDef.metadataDatasetName, quantum.dataId)) is not None:
279 if butler.datastore.exists(ref):
280 return True
282 existingRefs = []
283 missingRefs = []
284 for datasetRefs in quantum.outputs.values():
285 for datasetRef in datasetRefs:
286 ref = registry.findDataset(datasetRef.datasetType, datasetRef.dataId,
287 collections=butler.run)
288 if ref is None:
289 missingRefs.append(datasetRef)
290 else:
291 if butler.datastore.exists(ref):
292 existingRefs.append(ref)
293 else:
294 missingRefs.append(datasetRef)
295 if existingRefs and missingRefs:
296 # Some outputs exist and some don't, either delete existing ones
297 # or complain.
298 _LOG.debug("Partial outputs exist for task %s dataId=%s collection=%s "
299 "existingRefs=%s missingRefs=%s",
300 taskDef, quantum.dataId, collection, existingRefs, missingRefs)
301 if self.clobberOutputs:
302 _LOG.info("Removing partial outputs for task %s: %s", taskDef, existingRefs)
303 butler.pruneDatasets(existingRefs, disassociate=True, unstore=True, purge=True)
304 return False
305 else:
306 raise RuntimeError(f"Registry inconsistency while checking for existing outputs:"
307 f" collection={collection} existingRefs={existingRefs}"
308 f" missingRefs={missingRefs}")
309 elif existingRefs:
310 # complete outputs exist, this is fine only if skipExisting is set
311 return self.skipExisting
312 else:
313 # no outputs exist
314 return False
316 def makeTask(self, taskClass, name, config, butler):
317 """Make new task instance.
319 Parameters
320 ----------
321 taskClass : `type`
322 Sub-class of `~lsst.pipe.base.PipelineTask`.
323 name : `str`
324 Name for this task.
325 config : `~lsst.pipe.base.PipelineTaskConfig`
326 Configuration object for this task
328 Returns
329 -------
330 task : `~lsst.pipe.base.PipelineTask`
331 Instance of ``taskClass`` type.
332 butler : `~lsst.daf.butler.Butler`
333 Data butler.
334 """
335 # call task factory for that
336 return self.taskFactory.makeTask(taskClass, name, config, None, butler)
338 def updatedQuantumInputs(self, quantum, butler, taskDef):
339 """Update quantum with extra information, returns a new updated
340 Quantum.
342 Some methods may require input DatasetRefs to have non-None
343 ``dataset_id``, but in case of intermediate dataset it may not be
344 filled during QuantumGraph construction. This method will retrieve
345 missing info from registry.
347 Parameters
348 ----------
349 quantum : `~lsst.daf.butler.Quantum`
350 Single Quantum instance.
351 butler : `~lsst.daf.butler.Butler`
352 Data butler.
353 taskDef : `~lsst.pipe.base.TaskDef`
354 Task definition structure.
356 Returns
357 -------
358 update : `~lsst.daf.butler.Quantum`
359 Updated Quantum instance
360 """
361 anyChanges = False
362 updatedInputs = defaultdict(list)
363 for key, refsForDatasetType in quantum.inputs.items():
364 newRefsForDatasetType = updatedInputs[key]
365 for ref in refsForDatasetType:
366 if ref.id is None:
367 resolvedRef = butler.registry.findDataset(ref.datasetType, ref.dataId,
368 collections=butler.collections)
369 if resolvedRef is None:
370 _LOG.info("No dataset found for %s", ref)
371 continue
372 else:
373 _LOG.debug("Updated dataset ID for %s", ref)
374 else:
375 resolvedRef = ref
376 # We need to ask datastore if the dataset actually exists
377 # because the Registry of a local "execution butler" cannot
378 # know this (because we prepopulate it with all of the datasets
379 # that might be created).
380 if butler.datastore.exists(resolvedRef):
381 newRefsForDatasetType.append(resolvedRef)
382 if len(newRefsForDatasetType) != len(refsForDatasetType):
383 anyChanges = True
384 # If we removed any input datasets, let the task check if it has enough
385 # to proceed and/or prune related datasets that it also doesn't
386 # need/produce anymore. It will raise NoWorkFound if it can't run,
387 # which we'll let propagate up. This is exactly what we run during QG
388 # generation, because a task shouldn't care whether an input is missing
389 # because some previous task didn't produce it, or because it just
390 # wasn't there during QG generation.
391 updatedInputs = NamedKeyDict[DatasetType, List[DatasetRef]](updatedInputs.items())
392 helper = AdjustQuantumHelper(updatedInputs, quantum.outputs)
393 if anyChanges:
394 helper.adjust_in_place(taskDef.connections, label=taskDef.label, data_id=quantum.dataId)
395 return Quantum(taskName=quantum.taskName,
396 taskClass=quantum.taskClass,
397 dataId=quantum.dataId,
398 initInputs=quantum.initInputs,
399 inputs=helper.inputs,
400 outputs=helper.outputs
401 )
403 def runQuantum(self, task, quantum, taskDef, butler):
404 """Execute task on a single quantum.
406 Parameters
407 ----------
408 task : `~lsst.pipe.base.PipelineTask`
409 Task object.
410 quantum : `~lsst.daf.butler.Quantum`
411 Single Quantum instance.
412 taskDef : `~lsst.pipe.base.TaskDef`
413 Task definition structure.
414 butler : `~lsst.daf.butler.Butler`
415 Data butler.
416 """
417 # Create a butler that operates in the context of a quantum
418 butlerQC = ButlerQuantumContext(butler, quantum)
420 # Get the input and output references for the task
421 inputRefs, outputRefs = taskDef.connections.buildDatasetRefs(quantum)
423 # Call task runQuantum() method. Catch a few known failure modes and
424 # translate them into specific
425 try:
426 task.runQuantum(butlerQC, inputRefs, outputRefs)
427 except NoWorkFound as err:
428 # Not an error, just an early exit.
429 _LOG.info("Task '%s' on quantum %s exited early: %s",
430 taskDef.label, quantum.dataId, str(err))
431 pass
432 except RepeatableQuantumError as err:
433 if self.exitOnKnownError:
434 _LOG.warning("Caught repeatable quantum error for %s (%s):", taskDef, quantum.dataId)
435 _LOG.warning(err, exc_info=True)
436 sys.exit(err.EXIT_CODE)
437 else:
438 raise
439 except InvalidQuantumError as err:
440 _LOG.fatal("Invalid quantum error for %s (%s): %s", taskDef, quantum.dataId)
441 _LOG.fatal(err, exc_info=True)
442 sys.exit(err.EXIT_CODE)
444 def writeMetadata(self, quantum, metadata, taskDef, butler):
445 if taskDef.metadataDatasetName is not None:
446 # DatasetRef has to be in the Quantum outputs, can lookup by name
447 try:
448 ref = quantum.outputs[taskDef.metadataDatasetName]
449 except LookupError as exc:
450 raise InvalidQuantumError(
451 f"Quantum outputs is missing metadata dataset type {taskDef.metadataDatasetName};"
452 f" this could happen due to inconsistent options between QuantumGraph generation"
453 f" and execution") from exc
454 butler.put(metadata, ref[0])
456 def writeLogRecords(self, quantum, taskDef, butler, store):
457 # If we are logging to an external file we must always try to
458 # close it.
459 filename = None
460 if isinstance(self.log_handler, FileHandler):
461 filename = self.log_handler.stream.name
462 self.log_handler.close()
464 if self.log_handler is not None:
465 # Remove the handler so we stop accumulating log messages.
466 logging.getLogger().removeHandler(self.log_handler)
468 try:
469 if store and taskDef.logOutputDatasetName is not None and self.log_handler is not None:
470 # DatasetRef has to be in the Quantum outputs, can lookup by
471 # name
472 try:
473 ref = quantum.outputs[taskDef.logOutputDatasetName]
474 except LookupError as exc:
475 raise InvalidQuantumError(
476 f"Quantum outputs is missing log output dataset type {taskDef.logOutputDatasetName};"
477 f" this could happen due to inconsistent options between QuantumGraph generation"
478 f" and execution") from exc
480 if isinstance(self.log_handler, ButlerLogRecordHandler):
481 butler.put(self.log_handler.records, ref[0])
483 # Clear the records in case the handler is reused.
484 self.log_handler.records.clear()
485 else:
486 assert filename is not None, "Somehow unable to extract filename from file handler"
488 # Need to ingest this file directly into butler.
489 dataset = FileDataset(path=filename, refs=ref[0])
490 try:
491 butler.ingest(dataset, transfer="move")
492 filename = None
493 except NotImplementedError:
494 # Some datastores can't receive files (e.g. in-memory
495 # datastore when testing), we store empty list for
496 # those just to have a dataset. Alternative is to read
497 # the file as a ButlerLogRecords object and put it.
498 _LOG.info("Log records could not be stored in this butler because the"
499 " datastore can not ingest files, empty record list is stored instead.")
500 records = ButlerLogRecords.from_records([])
501 butler.put(records, ref[0])
502 finally:
503 # remove file if it is not ingested
504 if filename is not None:
505 try:
506 os.remove(filename)
507 except OSError:
508 pass
510 def initGlobals(self, quantum, butler):
511 """Initialize global state needed for task execution.
513 Parameters
514 ----------
515 quantum : `~lsst.daf.butler.Quantum`
516 Single Quantum instance.
517 butler : `~lsst.daf.butler.Butler`
518 Data butler.
520 Notes
521 -----
522 There is an issue with initializing filters singleton which is done
523 by instrument, to avoid requiring tasks to do it in runQuantum()
524 we do it here when any dataId has an instrument dimension. Also for
525 now we only allow single instrument, verify that all instrument
526 names in all dataIds are identical.
528 This will need revision when filter singleton disappears.
529 """
530 oneInstrument = None
531 for datasetRefs in chain(quantum.inputs.values(), quantum.outputs.values()):
532 for datasetRef in datasetRefs:
533 dataId = datasetRef.dataId
534 instrument = dataId.get("instrument")
535 if instrument is not None:
536 if oneInstrument is not None:
537 assert instrument == oneInstrument, \
538 "Currently require that only one instrument is used per graph"
539 else:
540 oneInstrument = instrument
541 Instrument.fromName(instrument, butler.registry)