Coverage for python/lsst/ctrl/mpexec/singleQuantumExecutor.py : 12%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22__all__ = ['SingleQuantumExecutor']
24# -------------------------------
25# Imports of standard modules --
26# -------------------------------
27import logging
28from itertools import chain
30# -----------------------------
31# Imports for other modules --
32# -----------------------------
33from lsst.log import Log
34from lsst.pipe.base import ButlerQuantumContext
36# ----------------------------------
37# Local non-exported definitions --
38# ----------------------------------
40_LOG = logging.getLogger(__name__.partition(".")[2])
43class SingleQuantumExecutor:
44 """Executor class which runs one Quantum at a time.
46 Parameters
47 ----------
48 butler : `~lsst.daf.butler.Butler`
49 Data butler.
50 taskFactory : `~lsst.pipe.base.TaskFactory`
51 Instance of a task factory.
52 skipExisting : `bool`, optional
53 If True then quanta with all existing outputs are not executed.
54 clobberOutput : `bool`, optional
55 It `True` then override all existing output datasets in an output
56 collection.
57 enableLsstDebug : `bool`, optional
58 Enable debugging with ``lsstDebug`` facility for a task.
59 """
60 def __init__(self, butler, taskFactory, skipExisting=False, clobberOutput=False, enableLsstDebug=False):
61 self.butler = butler
62 self.taskFactory = taskFactory
63 self.skipExisting = skipExisting
64 self.clobberOutput = clobberOutput
65 self.enableLsstDebug = enableLsstDebug
67 def execute(self, taskDef, quantum):
68 """Execute PipelineTask on a single Quantum.
70 Parameters
71 ----------
72 taskDef : `~lsst.pipe.base.TaskDef`
73 Task definition structure.
74 quantum : `~lsst.daf.butler.Quantum`
75 Single Quantum instance.
76 """
77 taskClass, config = taskDef.taskClass, taskDef.config
78 self.setupLogging(taskClass, config, quantum)
79 if self.clobberOutput:
80 self.doClobberOutputs(quantum)
81 if self.skipExisting and self.quantumOutputsExist(quantum):
82 _LOG.info("Quantum execution skipped due to existing outputs, "
83 f"task={taskClass.__name__} dataId={quantum.dataId}.")
84 return
85 self.updateQuantumInputs(quantum)
87 # enable lsstDebug debugging
88 if self.enableLsstDebug:
89 try:
90 _LOG.debug("Will try to import debug.py")
91 import debug # noqa:F401
92 except ImportError:
93 _LOG.warn("No 'debug' module found.")
95 task = self.makeTask(taskClass, config)
96 self.runQuantum(task, quantum, taskDef)
98 def setupLogging(self, taskClass, config, quantum):
99 """Configure logging system for execution of this task.
101 Ths method can setup logging to attach task- or
102 quantum-specific information to log messages. Potentially this can
103 take into accout some info from task configuration as well.
105 Parameters
106 ----------
107 taskClass : `type`
108 Sub-class of `~lsst.pipe.base.PipelineTask`.
109 config : `~lsst.pipe.base.PipelineTaskConfig`
110 Configuration object for this task
111 quantum : `~lsst.daf.butler.Quantum`
112 Single Quantum instance.
113 """
114 # include input dataIds into MDC
115 dataIds = set(ref.dataId for ref in chain.from_iterable(quantum.predictedInputs.values()))
116 if dataIds:
117 if len(dataIds) == 1:
118 Log.MDC("LABEL", str(dataIds.pop()))
119 else:
120 Log.MDC("LABEL", '[' + ', '.join([str(dataId) for dataId in dataIds]) + ']')
122 def doClobberOutputs(self, quantum):
123 """Delete any outputs that already exist for a Quantum.
125 Parameters
126 ----------
127 quantum : `~lsst.daf.butler.Quantum`
128 Quantum to check for existing outputs.
129 """
130 collection = self.butler.run
131 registry = self.butler.registry
133 existingRefs = []
134 for datasetRefs in quantum.outputs.values():
135 for datasetRef in datasetRefs:
136 ref = registry.find(collection, datasetRef.datasetType, datasetRef.dataId)
137 if ref is not None:
138 existingRefs.append(ref)
139 for ref in existingRefs:
140 _LOG.debug("Removing existing dataset: %s", ref)
141 self.butler.remove(ref)
143 def quantumOutputsExist(self, quantum):
144 """Decide whether this quantum needs to be executed.
146 Parameters
147 ----------
148 quantum : `~lsst.daf.butler.Quantum`
149 Quantum to check for existing outputs
151 Returns
152 -------
153 exist : `bool`
154 True if all quantum's outputs exist in a collection, False
155 otherwise.
157 Raises
158 ------
159 RuntimeError
160 Raised if some outputs exist and some not.
161 """
162 collection = self.butler.run
163 registry = self.butler.registry
165 existingRefs = []
166 missingRefs = []
167 for datasetRefs in quantum.outputs.values():
168 for datasetRef in datasetRefs:
169 ref = registry.find(collection, datasetRef.datasetType, datasetRef.dataId)
170 if ref is None:
171 missingRefs.append(datasetRefs)
172 else:
173 existingRefs.append(datasetRefs)
174 if existingRefs and missingRefs:
175 # some outputs exist and same not, can't do a thing with that
176 raise RuntimeError(f"Registry inconsistency while checking for existing outputs:"
177 f" collection={collection} existingRefs={existingRefs}"
178 f" missingRefs={missingRefs}")
179 else:
180 return bool(existingRefs)
182 def makeTask(self, taskClass, config):
183 """Make new task instance.
185 Parameters
186 ----------
187 taskClass : `type`
188 Sub-class of `~lsst.pipe.base.PipelineTask`.
189 config : `~lsst.pipe.base.PipelineTaskConfig`
190 Configuration object for this task
192 Returns
193 -------
194 task : `~lsst.pipe.base.PipelineTask`
195 Instance of ``taskClass`` type.
196 """
197 # call task factory for that
198 return self.taskFactory.makeTask(taskClass, config, None, self.butler)
200 def updateQuantumInputs(self, quantum):
201 """Update quantum with extra information.
203 Some methods may require input DatasetRefs to have non-None
204 ``dataset_id``, but in case of intermediate dataset it may not be
205 filled during QuantumGraph construction. This method will retrieve
206 missing info from registry.
208 Parameters
209 ----------
210 quantum : `~lsst.daf.butler.Quantum`
211 Single Quantum instance.
212 """
213 butler = self.butler
214 for refsForDatasetType in quantum.predictedInputs.values():
215 newRefsForDatasetType = []
216 for ref in refsForDatasetType:
217 if ref.id is None:
218 resolvedRef = butler.registry.find(butler.collection, ref.datasetType, ref.dataId)
219 if resolvedRef is None:
220 raise ValueError(
221 f"Cannot find {ref.datasetType.name} with id {ref.dataId} "
222 f"in collection {butler.collection}."
223 )
224 newRefsForDatasetType.append(resolvedRef)
225 _LOG.debug("Updating dataset ID for %s", ref)
226 else:
227 newRefsForDatasetType.append(ref)
228 refsForDatasetType[:] = newRefsForDatasetType
230 def runQuantum(self, task, quantum, taskDef):
231 """Execute task on a single quantum.
233 Parameters
234 ----------
235 task : `~lsst.pipe.base.PipelineTask`
236 Task object.
237 quantum : `~lsst.daf.butler.Quantum`
238 Single Quantum instance.
239 taskDef : `~lsst.pipe.base.TaskDef`
240 Task definition structure.
241 """
242 # Create a butler that operates in the context of a quantum
243 butlerQC = ButlerQuantumContext(self.butler, quantum)
245 # Get the input and output references for the task
246 connectionInstance = task.config.connections.ConnectionsClass(config=task.config)
247 inputRefs, outputRefs = connectionInstance.buildDatasetRefs(quantum)
248 # Call task runQuantum() method. Any exception thrown by the task
249 # propagates to caller.
250 task.runQuantum(butlerQC, inputRefs, outputRefs)
252 if taskDef.metadataDatasetName is not None:
253 # DatasetRef has to be in the Quantum outputs, can lookup by name
254 try:
255 ref = quantum.outputs[taskDef.metadataDatasetName]
256 except LookupError as exc:
257 raise LookupError(
258 f"Quantum outputs is missing metadata dataset type {taskDef.metadataDatasetName},"
259 f" it could happen due to inconsistent options between Quantum generation"
260 f" and execution") from exc
261 butlerQC.put(task.metadata, ref[0])