Coverage for python/lsst/ctrl/mpexec/singleQuantumExecutor.py: 13%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22__all__ = ["SingleQuantumExecutor"]
24# -------------------------------
25# Imports of standard modules --
26# -------------------------------
27import logging
28import os
29import shutil
30import sys
31import tempfile
32import time
33from collections import defaultdict
34from contextlib import contextmanager
35from itertools import chain
36from logging import FileHandler
37from typing import List
39from lsst.daf.butler import DatasetRef, DatasetType, FileDataset, NamedKeyDict, Quantum
40from lsst.daf.butler.core.logging import ButlerLogRecordHandler, ButlerLogRecords, ButlerMDC, JsonLogFormatter
41from lsst.obs.base import Instrument
42from lsst.pipe.base import (
43 AdjustQuantumHelper,
44 ButlerQuantumContext,
45 InvalidQuantumError,
46 NoWorkFound,
47 RepeatableQuantumError,
48)
50# During metadata transition phase, determine metadata class by
51# asking pipe_base
52from lsst.pipe.base.task import _TASK_FULL_METADATA_TYPE, _TASK_METADATA_TYPE
53from lsst.utils.timer import logInfo
55# -----------------------------
56# Imports for other modules --
57# -----------------------------
58from .quantumGraphExecutor import QuantumExecutor
60# ----------------------------------
61# Local non-exported definitions --
62# ----------------------------------
64_LOG = logging.getLogger(__name__)
67class _LogCaptureFlag:
68 """Simple flag to enable/disable log-to-butler saving."""
70 store: bool = True
73class SingleQuantumExecutor(QuantumExecutor):
74 """Executor class which runs one Quantum at a time.
76 Parameters
77 ----------
78 butler : `~lsst.daf.butler.Butler`
79 Data butler.
80 taskFactory : `~lsst.pipe.base.TaskFactory`
81 Instance of a task factory.
82 skipExistingIn : `list` [ `str` ], optional
83 Accepts list of collections, if all Quantum outputs already exist in
84 the specified list of collections then that Quantum will not be rerun.
85 clobberOutputs : `bool`, optional
86 If `True`, then existing outputs in output run collection will be
87 overwritten. If ``skipExistingIn`` is defined, only outputs from
88 failed quanta will be overwritten.
89 enableLsstDebug : `bool`, optional
90 Enable debugging with ``lsstDebug`` facility for a task.
91 exitOnKnownError : `bool`, optional
92 If `True`, call `sys.exit` with the appropriate exit code for special
93 known exceptions, after printing a traceback, instead of letting the
94 exception propagate up to calling. This is always the behavior for
95 InvalidQuantumError.
96 """
98 stream_json_logs = True
99 """If True each log record is written to a temporary file and ingested
100 when quantum completes. If False the records are accumulated in memory
101 and stored in butler on quantum completion."""
103 def __init__(
104 self,
105 taskFactory,
106 skipExistingIn=None,
107 clobberOutputs=False,
108 enableLsstDebug=False,
109 exitOnKnownError=False,
110 ):
111 self.taskFactory = taskFactory
112 self.skipExistingIn = skipExistingIn
113 self.enableLsstDebug = enableLsstDebug
114 self.clobberOutputs = clobberOutputs
115 self.exitOnKnownError = exitOnKnownError
116 self.log_handler = None
118 def execute(self, taskDef, quantum, butler):
119 # Docstring inherited from QuantumExecutor.execute
120 startTime = time.time()
122 with self.captureLogging(taskDef, quantum, butler) as captureLog:
124 # Save detailed resource usage before task start to metadata.
125 quantumMetadata = _TASK_METADATA_TYPE()
126 logInfo(None, "prep", metadata=quantumMetadata)
128 taskClass, label, config = taskDef.taskClass, taskDef.label, taskDef.config
130 # check whether to skip or delete old outputs, if it returns True
131 # or raises an exception do not try to store logs, as they may be
132 # already in butler.
133 captureLog.store = False
134 if self.checkExistingOutputs(quantum, butler, taskDef):
135 _LOG.info(
136 "Skipping already-successful quantum for label=%s dataId=%s.", label, quantum.dataId
137 )
138 return
139 captureLog.store = True
141 try:
142 quantum = self.updatedQuantumInputs(quantum, butler, taskDef)
143 except NoWorkFound as exc:
144 _LOG.info(
145 "Nothing to do for task '%s' on quantum %s; saving metadata and skipping: %s",
146 taskDef.label,
147 quantum.dataId,
148 str(exc),
149 )
150 # Make empty metadata that looks something like what a
151 # do-nothing task would write (but we don't bother with empty
152 # nested PropertySets for subtasks). This is slightly
153 # duplicative with logic in pipe_base that we can't easily call
154 # from here; we'll fix this on DM-29761.
155 logInfo(None, "end", metadata=quantumMetadata)
156 fullMetadata = _TASK_FULL_METADATA_TYPE()
157 fullMetadata[taskDef.label] = _TASK_METADATA_TYPE()
158 fullMetadata["quantum"] = quantumMetadata
159 self.writeMetadata(quantum, fullMetadata, taskDef, butler)
160 return
162 # enable lsstDebug debugging
163 if self.enableLsstDebug:
164 try:
165 _LOG.debug("Will try to import debug.py")
166 import debug # noqa:F401
167 except ImportError:
168 _LOG.warn("No 'debug' module found.")
170 # initialize global state
171 self.initGlobals(quantum, butler)
173 # Ensure that we are executing a frozen config
174 config.freeze()
175 logInfo(None, "init", metadata=quantumMetadata)
176 task = self.makeTask(taskClass, label, config, butler)
177 logInfo(None, "start", metadata=quantumMetadata)
178 try:
179 self.runQuantum(task, quantum, taskDef, butler)
180 except Exception as e:
181 _LOG.error(
182 "Execution of task '%s' on quantum %s failed. Exception %s: %s",
183 taskDef.label,
184 quantum.dataId,
185 e.__class__.__name__,
186 str(e),
187 )
188 raise
189 logInfo(None, "end", metadata=quantumMetadata)
190 fullMetadata = task.getFullMetadata()
191 fullMetadata["quantum"] = quantumMetadata
192 self.writeMetadata(quantum, fullMetadata, taskDef, butler)
193 stopTime = time.time()
194 _LOG.info(
195 "Execution of task '%s' on quantum %s took %.3f seconds",
196 taskDef.label,
197 quantum.dataId,
198 stopTime - startTime,
199 )
200 return quantum
202 @contextmanager
203 def captureLogging(self, taskDef, quantum, butler):
204 """Configure logging system to capture logs for execution of this task.
206 Parameters
207 ----------
208 taskDef : `lsst.pipe.base.TaskDef`
209 The task definition.
210 quantum : `~lsst.daf.butler.Quantum`
211 Single Quantum instance.
212 butler : `~lsst.daf.butler.Butler`
213 Butler to write logs to.
215 Notes
216 -----
217 Expected to be used as a context manager to ensure that logging
218 records are inserted into the butler once the quantum has been
219 executed:
221 .. code-block:: py
223 with self.captureLogging(taskDef, quantum, butler):
224 # Run quantum and capture logs.
226 Ths method can also setup logging to attach task- or
227 quantum-specific information to log messages. Potentially this can
228 take into account some info from task configuration as well.
229 """
230 # Add a handler to the root logger to capture execution log output.
231 # How does it get removed reliably?
232 if taskDef.logOutputDatasetName is not None:
233 # Either accumulate into ButlerLogRecords or stream
234 # JSON records to file and ingest that.
235 tmpdir = None
236 if self.stream_json_logs:
237 # Create the log file in a temporary directory rather than
238 # creating a temporary file. This is necessary because
239 # temporary files are created with restrictive permissions
240 # and during file ingest these permissions persist in the
241 # datastore. Using a temp directory allows us to create
242 # a file with umask default permissions.
243 tmpdir = tempfile.mkdtemp(prefix="butler-temp-logs-")
245 # Construct a file to receive the log records and "touch" it.
246 log_file = os.path.join(tmpdir, f"butler-log-{taskDef.label}.json")
247 with open(log_file, "w"):
248 pass
249 self.log_handler = FileHandler(log_file)
250 self.log_handler.setFormatter(JsonLogFormatter())
251 else:
252 self.log_handler = ButlerLogRecordHandler()
254 logging.getLogger().addHandler(self.log_handler)
256 # include quantum dataId and task label into MDC
257 label = taskDef.label
258 if quantum.dataId:
259 label += f":{quantum.dataId}"
261 ctx = _LogCaptureFlag()
262 try:
263 with ButlerMDC.set_mdc({"LABEL": label, "RUN": butler.run}):
264 yield ctx
265 finally:
266 # Ensure that the logs are stored in butler.
267 self.writeLogRecords(quantum, taskDef, butler, ctx.store)
268 if tmpdir:
269 shutil.rmtree(tmpdir, ignore_errors=True)
271 def checkExistingOutputs(self, quantum, butler, taskDef):
272 """Decide whether this quantum needs to be executed.
274 If only partial outputs exist then they are removed if
275 ``clobberOutputs`` is True, otherwise an exception is raised.
277 Parameters
278 ----------
279 quantum : `~lsst.daf.butler.Quantum`
280 Quantum to check for existing outputs
281 butler : `~lsst.daf.butler.Butler`
282 Data butler.
283 taskDef : `~lsst.pipe.base.TaskDef`
284 Task definition structure.
286 Returns
287 -------
288 exist : `bool`
289 `True` if ``self.skipExistingIn`` is defined, and a previous
290 execution of this quanta appears to have completed successfully
291 (either because metadata was written or all datasets were written).
292 `False` otherwise.
294 Raises
295 ------
296 RuntimeError
297 Raised if some outputs exist and some not.
298 """
299 if self.skipExistingIn and taskDef.metadataDatasetName is not None:
300 # Metadata output exists; this is sufficient to assume the previous
301 # run was successful and should be skipped.
302 ref = butler.registry.findDataset(
303 taskDef.metadataDatasetName, quantum.dataId, collections=self.skipExistingIn
304 )
305 if ref is not None:
306 if butler.datastore.exists(ref):
307 return True
309 # Previously we always checked for existing outputs in `butler.run`,
310 # now logic gets more complicated as we only want to skip quantum
311 # whose outputs exist in `self.skipExistingIn` but pruning should only
312 # be done for outputs existing in `butler.run`.
314 def findOutputs(collections):
315 """Find quantum outputs in specified collections."""
316 existingRefs = []
317 missingRefs = []
318 for datasetRefs in quantum.outputs.values():
319 for datasetRef in datasetRefs:
320 ref = butler.registry.findDataset(
321 datasetRef.datasetType, datasetRef.dataId, collections=collections
322 )
323 if ref is not None and butler.datastore.exists(ref):
324 existingRefs.append(ref)
325 else:
326 missingRefs.append(datasetRef)
327 return existingRefs, missingRefs
329 existingRefs, missingRefs = findOutputs(self.skipExistingIn)
330 if self.skipExistingIn:
331 if existingRefs and not missingRefs:
332 # everything is already there
333 return True
335 # If we are to re-run quantum then prune datasets that exists in
336 # output run collection, only if `self.clobberOutputs` is set.
337 if existingRefs:
338 existingRefs, missingRefs = findOutputs(butler.run)
339 if existingRefs and missingRefs:
340 _LOG.debug(
341 "Partial outputs exist for task %s dataId=%s collection=%s "
342 "existingRefs=%s missingRefs=%s",
343 taskDef,
344 quantum.dataId,
345 butler.run,
346 existingRefs,
347 missingRefs,
348 )
349 if self.clobberOutputs:
350 # only prune
351 _LOG.info("Removing partial outputs for task %s: %s", taskDef, existingRefs)
352 # Do not purge registry records if this looks like
353 # an execution butler. This ensures that the UUID
354 # of the dataset doesn't change.
355 if butler._allow_put_of_predefined_dataset:
356 purge = False
357 disassociate = False
358 else:
359 purge = True
360 disassociate = True
361 butler.pruneDatasets(existingRefs, disassociate=disassociate, unstore=True, purge=purge)
362 return False
363 else:
364 raise RuntimeError(
365 f"Registry inconsistency while checking for existing outputs:"
366 f" collection={butler.run} existingRefs={existingRefs}"
367 f" missingRefs={missingRefs}"
368 )
370 # need to re-run
371 return False
373 def makeTask(self, taskClass, name, config, butler):
374 """Make new task instance.
376 Parameters
377 ----------
378 taskClass : `type`
379 Sub-class of `~lsst.pipe.base.PipelineTask`.
380 name : `str`
381 Name for this task.
382 config : `~lsst.pipe.base.PipelineTaskConfig`
383 Configuration object for this task
385 Returns
386 -------
387 task : `~lsst.pipe.base.PipelineTask`
388 Instance of ``taskClass`` type.
389 butler : `~lsst.daf.butler.Butler`
390 Data butler.
391 """
392 # call task factory for that
393 return self.taskFactory.makeTask(taskClass, name, config, None, butler)
395 def updatedQuantumInputs(self, quantum, butler, taskDef):
396 """Update quantum with extra information, returns a new updated
397 Quantum.
399 Some methods may require input DatasetRefs to have non-None
400 ``dataset_id``, but in case of intermediate dataset it may not be
401 filled during QuantumGraph construction. This method will retrieve
402 missing info from registry.
404 Parameters
405 ----------
406 quantum : `~lsst.daf.butler.Quantum`
407 Single Quantum instance.
408 butler : `~lsst.daf.butler.Butler`
409 Data butler.
410 taskDef : `~lsst.pipe.base.TaskDef`
411 Task definition structure.
413 Returns
414 -------
415 update : `~lsst.daf.butler.Quantum`
416 Updated Quantum instance
417 """
418 anyChanges = False
419 updatedInputs = defaultdict(list)
420 for key, refsForDatasetType in quantum.inputs.items():
421 newRefsForDatasetType = updatedInputs[key]
422 for ref in refsForDatasetType:
423 if ref.id is None:
424 resolvedRef = butler.registry.findDataset(
425 ref.datasetType, ref.dataId, collections=butler.collections
426 )
427 if resolvedRef is None:
428 _LOG.info("No dataset found for %s", ref)
429 continue
430 else:
431 _LOG.debug("Updated dataset ID for %s", ref)
432 else:
433 resolvedRef = ref
434 # We need to ask datastore if the dataset actually exists
435 # because the Registry of a local "execution butler" cannot
436 # know this (because we prepopulate it with all of the datasets
437 # that might be created).
438 if butler.datastore.exists(resolvedRef):
439 newRefsForDatasetType.append(resolvedRef)
440 if len(newRefsForDatasetType) != len(refsForDatasetType):
441 anyChanges = True
442 # If we removed any input datasets, let the task check if it has enough
443 # to proceed and/or prune related datasets that it also doesn't
444 # need/produce anymore. It will raise NoWorkFound if it can't run,
445 # which we'll let propagate up. This is exactly what we run during QG
446 # generation, because a task shouldn't care whether an input is missing
447 # because some previous task didn't produce it, or because it just
448 # wasn't there during QG generation.
449 updatedInputs = NamedKeyDict[DatasetType, List[DatasetRef]](updatedInputs.items())
450 helper = AdjustQuantumHelper(updatedInputs, quantum.outputs)
451 if anyChanges:
452 helper.adjust_in_place(taskDef.connections, label=taskDef.label, data_id=quantum.dataId)
453 return Quantum(
454 taskName=quantum.taskName,
455 taskClass=quantum.taskClass,
456 dataId=quantum.dataId,
457 initInputs=quantum.initInputs,
458 inputs=helper.inputs,
459 outputs=helper.outputs,
460 )
462 def runQuantum(self, task, quantum, taskDef, butler):
463 """Execute task on a single quantum.
465 Parameters
466 ----------
467 task : `~lsst.pipe.base.PipelineTask`
468 Task object.
469 quantum : `~lsst.daf.butler.Quantum`
470 Single Quantum instance.
471 taskDef : `~lsst.pipe.base.TaskDef`
472 Task definition structure.
473 butler : `~lsst.daf.butler.Butler`
474 Data butler.
475 """
476 # Create a butler that operates in the context of a quantum
477 butlerQC = ButlerQuantumContext(butler, quantum)
479 # Get the input and output references for the task
480 inputRefs, outputRefs = taskDef.connections.buildDatasetRefs(quantum)
482 # Call task runQuantum() method. Catch a few known failure modes and
483 # translate them into specific
484 try:
485 task.runQuantum(butlerQC, inputRefs, outputRefs)
486 except NoWorkFound as err:
487 # Not an error, just an early exit.
488 _LOG.info("Task '%s' on quantum %s exited early: %s", taskDef.label, quantum.dataId, str(err))
489 pass
490 except RepeatableQuantumError as err:
491 if self.exitOnKnownError:
492 _LOG.warning("Caught repeatable quantum error for %s (%s):", taskDef, quantum.dataId)
493 _LOG.warning(err, exc_info=True)
494 sys.exit(err.EXIT_CODE)
495 else:
496 raise
497 except InvalidQuantumError as err:
498 _LOG.fatal("Invalid quantum error for %s (%s): %s", taskDef, quantum.dataId)
499 _LOG.fatal(err, exc_info=True)
500 sys.exit(err.EXIT_CODE)
502 def writeMetadata(self, quantum, metadata, taskDef, butler):
503 if taskDef.metadataDatasetName is not None:
504 # DatasetRef has to be in the Quantum outputs, can lookup by name
505 try:
506 ref = quantum.outputs[taskDef.metadataDatasetName]
507 except LookupError as exc:
508 raise InvalidQuantumError(
509 f"Quantum outputs is missing metadata dataset type {taskDef.metadataDatasetName};"
510 f" this could happen due to inconsistent options between QuantumGraph generation"
511 f" and execution"
512 ) from exc
513 butler.put(metadata, ref[0])
515 def writeLogRecords(self, quantum, taskDef, butler, store):
516 # If we are logging to an external file we must always try to
517 # close it.
518 filename = None
519 if isinstance(self.log_handler, FileHandler):
520 filename = self.log_handler.stream.name
521 self.log_handler.close()
523 if self.log_handler is not None:
524 # Remove the handler so we stop accumulating log messages.
525 logging.getLogger().removeHandler(self.log_handler)
527 try:
528 if store and taskDef.logOutputDatasetName is not None and self.log_handler is not None:
529 # DatasetRef has to be in the Quantum outputs, can lookup by
530 # name
531 try:
532 ref = quantum.outputs[taskDef.logOutputDatasetName]
533 except LookupError as exc:
534 raise InvalidQuantumError(
535 f"Quantum outputs is missing log output dataset type {taskDef.logOutputDatasetName};"
536 f" this could happen due to inconsistent options between QuantumGraph generation"
537 f" and execution"
538 ) from exc
540 if isinstance(self.log_handler, ButlerLogRecordHandler):
541 butler.put(self.log_handler.records, ref[0])
543 # Clear the records in case the handler is reused.
544 self.log_handler.records.clear()
545 else:
546 assert filename is not None, "Somehow unable to extract filename from file handler"
548 # Need to ingest this file directly into butler.
549 dataset = FileDataset(path=filename, refs=ref[0])
550 try:
551 butler.ingest(dataset, transfer="move")
552 filename = None
553 except NotImplementedError:
554 # Some datastores can't receive files (e.g. in-memory
555 # datastore when testing), we store empty list for
556 # those just to have a dataset. Alternative is to read
557 # the file as a ButlerLogRecords object and put it.
558 _LOG.info(
559 "Log records could not be stored in this butler because the"
560 " datastore can not ingest files, empty record list is stored instead."
561 )
562 records = ButlerLogRecords.from_records([])
563 butler.put(records, ref[0])
564 finally:
565 # remove file if it is not ingested
566 if filename is not None:
567 try:
568 os.remove(filename)
569 except OSError:
570 pass
572 def initGlobals(self, quantum, butler):
573 """Initialize global state needed for task execution.
575 Parameters
576 ----------
577 quantum : `~lsst.daf.butler.Quantum`
578 Single Quantum instance.
579 butler : `~lsst.daf.butler.Butler`
580 Data butler.
582 Notes
583 -----
584 There is an issue with initializing filters singleton which is done
585 by instrument, to avoid requiring tasks to do it in runQuantum()
586 we do it here when any dataId has an instrument dimension. Also for
587 now we only allow single instrument, verify that all instrument
588 names in all dataIds are identical.
590 This will need revision when filter singleton disappears.
591 """
592 oneInstrument = None
593 for datasetRefs in chain(quantum.inputs.values(), quantum.outputs.values()):
594 for datasetRef in datasetRefs:
595 dataId = datasetRef.dataId
596 instrument = dataId.get("instrument")
597 if instrument is not None:
598 if oneInstrument is not None:
599 assert (
600 instrument == oneInstrument
601 ), "Currently require that only one instrument is used per graph"
602 else:
603 oneInstrument = instrument
604 Instrument.fromName(instrument, butler.registry)