Coverage for python/lsst/ctrl/mpexec/singleQuantumExecutor.py : 15%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22__all__ = ['SingleQuantumExecutor']
24# -------------------------------
25# Imports of standard modules --
26# -------------------------------
27from collections import defaultdict
28import logging
29from itertools import chain
30import time
32# -----------------------------
33# Imports for other modules --
34# -----------------------------
35from .quantumGraphExecutor import QuantumExecutor
36from lsst.log import Log
37from lsst.obs.base import Instrument
38from lsst.pipe.base import ButlerQuantumContext
39from lsst.daf.butler import Quantum
41# ----------------------------------
42# Local non-exported definitions --
43# ----------------------------------
45_LOG = logging.getLogger(__name__.partition(".")[2])
48class SingleQuantumExecutor(QuantumExecutor):
49 """Executor class which runs one Quantum at a time.
51 Parameters
52 ----------
53 butler : `~lsst.daf.butler.Butler`
54 Data butler.
55 taskFactory : `~lsst.pipe.base.TaskFactory`
56 Instance of a task factory.
57 skipExisting : `bool`, optional
58 If True then quanta with all existing outputs are not executed.
59 clobberPartialOutputs : `bool`, optional
60 If True then delete any partial outputs from quantum execution. If
61 complete outputs exists then exception is raise if ``skipExisting`` is
62 False.
63 enableLsstDebug : `bool`, optional
64 Enable debugging with ``lsstDebug`` facility for a task.
65 """
66 def __init__(self, taskFactory, skipExisting=False, clobberPartialOutputs=False, enableLsstDebug=False):
67 self.taskFactory = taskFactory
68 self.skipExisting = skipExisting
69 self.enableLsstDebug = enableLsstDebug
70 self.clobberPartialOutputs = clobberPartialOutputs
72 def execute(self, taskDef, quantum, butler):
74 startTime = time.time()
76 # Docstring inherited from QuantumExecutor.execute
77 self.setupLogging(taskDef, quantum)
78 taskClass, label, config = taskDef.taskClass, taskDef.label, taskDef.config
80 # check whether to skip or delete old outputs
81 if self.checkExistingOutputs(quantum, butler, taskDef):
82 _LOG.info("Quantum execution skipped due to existing outputs, "
83 f"task={taskClass.__name__} dataId={quantum.dataId}.")
84 return
86 quantum = self.updatedQuantumInputs(quantum, butler)
88 # enable lsstDebug debugging
89 if self.enableLsstDebug:
90 try:
91 _LOG.debug("Will try to import debug.py")
92 import debug # noqa:F401
93 except ImportError:
94 _LOG.warn("No 'debug' module found.")
96 # initialize global state
97 self.initGlobals(quantum, butler)
99 # Ensure that we are executing a frozen config
100 config.freeze()
102 task = self.makeTask(taskClass, label, config, butler)
103 self.runQuantum(task, quantum, taskDef, butler)
105 stopTime = time.time()
106 _LOG.info("Execution of task '%s' on quantum %s took %.3f seconds",
107 taskDef.label, quantum.dataId, stopTime - startTime)
109 def setupLogging(self, taskDef, quantum):
110 """Configure logging system for execution of this task.
112 Ths method can setup logging to attach task- or
113 quantum-specific information to log messages. Potentially this can
114 take into account some info from task configuration as well.
116 Parameters
117 ----------
118 taskDef : `lsst.pipe.base.TaskDef`
119 The task definition.
120 quantum : `~lsst.daf.butler.Quantum`
121 Single Quantum instance.
122 """
123 # include quantum dataId and task label into MDC
124 label = taskDef.label
125 if quantum.dataId:
126 label += f":{quantum.dataId}"
127 Log.MDC("LABEL", label)
129 def checkExistingOutputs(self, quantum, butler, taskDef):
130 """Decide whether this quantum needs to be executed.
132 If only partial outputs exist then they are removed if
133 ``clobberPartialOutputs`` is True, otherwise an exception is raised.
135 Parameters
136 ----------
137 quantum : `~lsst.daf.butler.Quantum`
138 Quantum to check for existing outputs
139 butler : `~lsst.daf.butler.Butler`
140 Data butler.
141 taskDef : `~lsst.pipe.base.TaskDef`
142 Task definition structure.
144 Returns
145 -------
146 exist : `bool`
147 True if all quantum's outputs exist in a collection and
148 ``skipExisting`` is True, False otherwise.
150 Raises
151 ------
152 RuntimeError
153 Raised if some outputs exist and some not.
154 """
155 collection = butler.run
156 registry = butler.registry
158 existingRefs = []
159 missingRefs = []
160 for datasetRefs in quantum.outputs.values():
161 for datasetRef in datasetRefs:
162 ref = registry.findDataset(datasetRef.datasetType, datasetRef.dataId,
163 collections=butler.run)
164 if ref is None:
165 missingRefs.append(datasetRef)
166 else:
167 existingRefs.append(ref)
168 if existingRefs and missingRefs:
169 # some outputs exist and some don't, either delete existing ones or complain
170 _LOG.debug("Partial outputs exist for task %s dataId=%s collection=%s "
171 "existingRefs=%s missingRefs=%s",
172 taskDef, quantum.dataId, collection, existingRefs, missingRefs)
173 if self.clobberPartialOutputs:
174 _LOG.info("Removing partial outputs for task %s: %s", taskDef, existingRefs)
175 butler.pruneDatasets(existingRefs, disassociate=True, unstore=True, purge=True)
176 return False
177 else:
178 raise RuntimeError(f"Registry inconsistency while checking for existing outputs:"
179 f" collection={collection} existingRefs={existingRefs}"
180 f" missingRefs={missingRefs}")
181 elif existingRefs:
182 # complete outputs exist, this is fine only if skipExisting is set
183 return self.skipExisting
184 else:
185 # no outputs exist
186 return False
188 def makeTask(self, taskClass, name, config, butler):
189 """Make new task instance.
191 Parameters
192 ----------
193 taskClass : `type`
194 Sub-class of `~lsst.pipe.base.PipelineTask`.
195 name : `str`
196 Name for this task.
197 config : `~lsst.pipe.base.PipelineTaskConfig`
198 Configuration object for this task
200 Returns
201 -------
202 task : `~lsst.pipe.base.PipelineTask`
203 Instance of ``taskClass`` type.
204 butler : `~lsst.daf.butler.Butler`
205 Data butler.
206 """
207 # call task factory for that
208 return self.taskFactory.makeTask(taskClass, name, config, None, butler)
210 def updatedQuantumInputs(self, quantum, butler):
211 """Update quantum with extra information, returns a new updated Quantum.
213 Some methods may require input DatasetRefs to have non-None
214 ``dataset_id``, but in case of intermediate dataset it may not be
215 filled during QuantumGraph construction. This method will retrieve
216 missing info from registry.
218 Parameters
219 ----------
220 quantum : `~lsst.daf.butler.Quantum`
221 Single Quantum instance.
222 butler : `~lsst.daf.butler.Butler`
223 Data butler.
225 Returns
226 -------
227 update : `~lsst.daf.butler.Quantum`
228 Updated Quantum instance
229 """
230 updatedInputs = defaultdict(list)
231 for key, refsForDatasetType in quantum.inputs.items():
232 newRefsForDatasetType = updatedInputs[key]
233 for ref in refsForDatasetType:
234 if ref.id is None:
235 resolvedRef = butler.registry.findDataset(ref.datasetType, ref.dataId,
236 collections=butler.collections)
237 if resolvedRef is None:
238 raise ValueError(
239 f"Cannot find {ref.datasetType.name} with id {ref.dataId} "
240 f"in collections {butler.collections}."
241 )
242 newRefsForDatasetType.append(resolvedRef)
243 _LOG.debug("Updating dataset ID for %s", ref)
244 else:
245 newRefsForDatasetType.append(ref)
246 return Quantum(taskName=quantum.taskName,
247 taskClass=quantum.taskClass,
248 dataId=quantum.dataId,
249 initInputs=quantum.initInputs,
250 inputs=updatedInputs,
251 outputs=quantum.outputs
252 )
254 def runQuantum(self, task, quantum, taskDef, butler):
255 """Execute task on a single quantum.
257 Parameters
258 ----------
259 task : `~lsst.pipe.base.PipelineTask`
260 Task object.
261 quantum : `~lsst.daf.butler.Quantum`
262 Single Quantum instance.
263 taskDef : `~lsst.pipe.base.TaskDef`
264 Task definition structure.
265 butler : `~lsst.daf.butler.Butler`
266 Data butler.
267 """
268 # Create a butler that operates in the context of a quantum
269 butlerQC = ButlerQuantumContext(butler, quantum)
271 # Get the input and output references for the task
272 inputRefs, outputRefs = taskDef.connections.buildDatasetRefs(quantum)
274 # Call task runQuantum() method. Any exception thrown by the task
275 # propagates to caller.
276 task.runQuantum(butlerQC, inputRefs, outputRefs)
278 if taskDef.metadataDatasetName is not None:
279 # DatasetRef has to be in the Quantum outputs, can lookup by name
280 try:
281 ref = quantum.outputs[taskDef.metadataDatasetName]
282 except LookupError as exc:
283 raise LookupError(
284 f"Quantum outputs is missing metadata dataset type {taskDef.metadataDatasetName},"
285 f" it could happen due to inconsistent options between Quantum generation"
286 f" and execution") from exc
287 butlerQC.put(task.getFullMetadata(), ref[0])
289 def initGlobals(self, quantum, butler):
290 """Initialize global state needed for task execution.
292 Parameters
293 ----------
294 quantum : `~lsst.daf.butler.Quantum`
295 Single Quantum instance.
296 butler : `~lsst.daf.butler.Butler`
297 Data butler.
299 Notes
300 -----
301 There is an issue with initializing filters singleton which is done
302 by instrument, to avoid requiring tasks to do it in runQuantum()
303 we do it here when any dataId has an instrument dimension. Also for
304 now we only allow single instrument, verify that all instrument
305 names in all dataIds are identical.
307 This will need revision when filter singleton disappears.
308 """
309 oneInstrument = None
310 for datasetRefs in chain(quantum.inputs.values(), quantum.outputs.values()):
311 for datasetRef in datasetRefs:
312 dataId = datasetRef.dataId
313 instrument = dataId.get("instrument")
314 if instrument is not None:
315 if oneInstrument is not None:
316 assert instrument == oneInstrument, \
317 "Currently require that only one instrument is used per graph"
318 else:
319 oneInstrument = instrument
320 Instrument.fromName(instrument, butler.registry)