Coverage for python/lsst/ctrl/mpexec/singleQuantumExecutor.py : 15%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22__all__ = ['SingleQuantumExecutor']
24# -------------------------------
25# Imports of standard modules --
26# -------------------------------
27from collections import defaultdict
28import logging
29from itertools import chain
31# -----------------------------
32# Imports for other modules --
33# -----------------------------
34from .quantumGraphExecutor import QuantumExecutor
35from lsst.log import Log
36from lsst.obs.base import Instrument
37from lsst.pipe.base import ButlerQuantumContext
38from lsst.daf.butler import Quantum
40# ----------------------------------
41# Local non-exported definitions --
42# ----------------------------------
44_LOG = logging.getLogger(__name__.partition(".")[2])
47class SingleQuantumExecutor(QuantumExecutor):
48 """Executor class which runs one Quantum at a time.
50 Parameters
51 ----------
52 butler : `~lsst.daf.butler.Butler`
53 Data butler.
54 taskFactory : `~lsst.pipe.base.TaskFactory`
55 Instance of a task factory.
56 skipExisting : `bool`, optional
57 If True then quanta with all existing outputs are not executed.
58 clobberPartialOutputs : `bool`, optional
59 If True then delete any partial outputs from quantum execution. If
60 complete outputs exists then exception is raise if ``skipExisting`` is
61 False.
62 enableLsstDebug : `bool`, optional
63 Enable debugging with ``lsstDebug`` facility for a task.
64 """
65 def __init__(self, taskFactory, skipExisting=False, clobberPartialOutputs=False, enableLsstDebug=False):
66 self.taskFactory = taskFactory
67 self.skipExisting = skipExisting
68 self.enableLsstDebug = enableLsstDebug
69 self.clobberPartialOutputs = clobberPartialOutputs
71 def execute(self, taskDef, quantum, butler):
72 # Docstring inherited from QuantumExecutor.execute
73 self.setupLogging(taskDef, quantum)
74 taskClass, config = taskDef.taskClass, taskDef.config
76 # check whether to skip or delete old outputs
77 if self.checkExistingOutputs(quantum, butler, taskDef):
78 _LOG.info("Quantum execution skipped due to existing outputs, "
79 f"task={taskClass.__name__} dataId={quantum.dataId}.")
80 return
82 quantum = self.updatedQuantumInputs(quantum, butler)
84 # enable lsstDebug debugging
85 if self.enableLsstDebug:
86 try:
87 _LOG.debug("Will try to import debug.py")
88 import debug # noqa:F401
89 except ImportError:
90 _LOG.warn("No 'debug' module found.")
92 # initialize global state
93 self.initGlobals(quantum, butler)
95 # Ensure that we are executing a frozen config
96 config.freeze()
98 task = self.makeTask(taskClass, config, butler)
99 self.runQuantum(task, quantum, taskDef, butler)
101 def setupLogging(self, taskDef, quantum):
102 """Configure logging system for execution of this task.
104 Ths method can setup logging to attach task- or
105 quantum-specific information to log messages. Potentially this can
106 take into account some info from task configuration as well.
108 Parameters
109 ----------
110 taskDef : `lsst.pipe.base.TaskDef`
111 The task definition.
112 quantum : `~lsst.daf.butler.Quantum`
113 Single Quantum instance.
114 """
115 # include quantum dataId and task label into MDC
116 label = taskDef.label
117 if quantum.dataId:
118 label += f":{quantum.dataId}"
119 Log.MDC("LABEL", label)
121 def checkExistingOutputs(self, quantum, butler, taskDef):
122 """Decide whether this quantum needs to be executed.
124 If only partial outputs exist then they are removed if
125 ``clobberPartialOutputs`` is True, otherwise an exception is raised.
127 Parameters
128 ----------
129 quantum : `~lsst.daf.butler.Quantum`
130 Quantum to check for existing outputs
131 butler : `~lsst.daf.butler.Butler`
132 Data butler.
133 taskDef : `~lsst.pipe.base.TaskDef`
134 Task definition structure.
136 Returns
137 -------
138 exist : `bool`
139 True if all quantum's outputs exist in a collection and
140 ``skipExisting`` is True, False otherwise.
142 Raises
143 ------
144 RuntimeError
145 Raised if some outputs exist and some not.
146 """
147 collection = butler.run
148 registry = butler.registry
150 existingRefs = []
151 missingRefs = []
152 for datasetRefs in quantum.outputs.values():
153 for datasetRef in datasetRefs:
154 ref = registry.findDataset(datasetRef.datasetType, datasetRef.dataId,
155 collections=butler.run)
156 if ref is None:
157 missingRefs.append(datasetRef)
158 else:
159 existingRefs.append(ref)
160 if existingRefs and missingRefs:
161 # some outputs exist and some don't, either delete existing ones or complain
162 _LOG.debug("Partial outputs exist for task %s dataId=%s collection=%s "
163 "existingRefs=%s missingRefs=%s",
164 taskDef, quantum.dataId, collection, existingRefs, missingRefs)
165 if self.clobberPartialOutputs:
166 _LOG.info("Removing partial outputs for task %s: %s", taskDef, existingRefs)
167 butler.pruneDatasets(existingRefs, disassociate=True, unstore=True, purge=True)
168 return False
169 else:
170 raise RuntimeError(f"Registry inconsistency while checking for existing outputs:"
171 f" collection={collection} existingRefs={existingRefs}"
172 f" missingRefs={missingRefs}")
173 elif existingRefs:
174 # complete outputs exist, this is fine only if skipExisting is set
175 return self.skipExisting
176 else:
177 # no outputs exist
178 return False
180 def makeTask(self, taskClass, config, butler):
181 """Make new task instance.
183 Parameters
184 ----------
185 taskClass : `type`
186 Sub-class of `~lsst.pipe.base.PipelineTask`.
187 config : `~lsst.pipe.base.PipelineTaskConfig`
188 Configuration object for this task
190 Returns
191 -------
192 task : `~lsst.pipe.base.PipelineTask`
193 Instance of ``taskClass`` type.
194 butler : `~lsst.daf.butler.Butler`
195 Data butler.
196 """
197 # call task factory for that
198 return self.taskFactory.makeTask(taskClass, config, None, butler)
200 def updatedQuantumInputs(self, quantum, butler):
201 """Update quantum with extra information, returns a new updated Quantum.
203 Some methods may require input DatasetRefs to have non-None
204 ``dataset_id``, but in case of intermediate dataset it may not be
205 filled during QuantumGraph construction. This method will retrieve
206 missing info from registry.
208 Parameters
209 ----------
210 quantum : `~lsst.daf.butler.Quantum`
211 Single Quantum instance.
212 butler : `~lsst.daf.butler.Butler`
213 Data butler.
215 Returns
216 -------
217 update : `~lsst.daf.butler.Quantum`
218 Updated Quantum instance
219 """
220 updatedInputs = defaultdict(list)
221 for key, refsForDatasetType in quantum.inputs.items():
222 newRefsForDatasetType = updatedInputs[key]
223 for ref in refsForDatasetType:
224 if ref.id is None:
225 resolvedRef = butler.registry.findDataset(ref.datasetType, ref.dataId,
226 collections=butler.collections)
227 if resolvedRef is None:
228 raise ValueError(
229 f"Cannot find {ref.datasetType.name} with id {ref.dataId} "
230 f"in collections {butler.collections}."
231 )
232 newRefsForDatasetType.append(resolvedRef)
233 _LOG.debug("Updating dataset ID for %s", ref)
234 else:
235 newRefsForDatasetType.append(ref)
236 return Quantum(taskName=quantum.taskName,
237 taskClass=quantum.taskClass,
238 dataId=quantum.dataId,
239 initInputs=quantum.initInputs,
240 inputs=updatedInputs,
241 outputs=quantum.outputs
242 )
244 def runQuantum(self, task, quantum, taskDef, butler):
245 """Execute task on a single quantum.
247 Parameters
248 ----------
249 task : `~lsst.pipe.base.PipelineTask`
250 Task object.
251 quantum : `~lsst.daf.butler.Quantum`
252 Single Quantum instance.
253 taskDef : `~lsst.pipe.base.TaskDef`
254 Task definition structure.
255 butler : `~lsst.daf.butler.Butler`
256 Data butler.
257 """
258 # Create a butler that operates in the context of a quantum
259 butlerQC = ButlerQuantumContext(butler, quantum)
261 # Get the input and output references for the task
262 inputRefs, outputRefs = taskDef.connections.buildDatasetRefs(quantum)
264 # Call task runQuantum() method. Any exception thrown by the task
265 # propagates to caller.
266 task.runQuantum(butlerQC, inputRefs, outputRefs)
268 if taskDef.metadataDatasetName is not None:
269 # DatasetRef has to be in the Quantum outputs, can lookup by name
270 try:
271 ref = quantum.outputs[taskDef.metadataDatasetName]
272 except LookupError as exc:
273 raise LookupError(
274 f"Quantum outputs is missing metadata dataset type {taskDef.metadataDatasetName},"
275 f" it could happen due to inconsistent options between Quantum generation"
276 f" and execution") from exc
277 butlerQC.put(task.getFullMetadata(), ref[0])
279 def initGlobals(self, quantum, butler):
280 """Initialize global state needed for task execution.
282 Parameters
283 ----------
284 quantum : `~lsst.daf.butler.Quantum`
285 Single Quantum instance.
286 butler : `~lsst.daf.butler.Butler`
287 Data butler.
289 Notes
290 -----
291 There is an issue with initializing filters singleton which is done
292 by instrument, to avoid requiring tasks to do it in runQuantum()
293 we do it here when any dataId has an instrument dimension. Also for
294 now we only allow single instrument, verify that all instrument
295 names in all dataIds are identical.
297 This will need revision when filter singleton disappears.
298 """
299 oneInstrument = None
300 for datasetRefs in chain(quantum.inputs.values(), quantum.outputs.values()):
301 for datasetRef in datasetRefs:
302 dataId = datasetRef.dataId
303 instrument = dataId.get("instrument")
304 if instrument is not None:
305 if oneInstrument is not None:
306 assert instrument == oneInstrument, \
307 "Currently require that only one instrument is used per graph"
308 else:
309 oneInstrument = instrument
310 Instrument.fromName(instrument, butler.registry)