Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22__all__ = ['SingleQuantumExecutor'] 

23 

24# ------------------------------- 

25# Imports of standard modules -- 

26# ------------------------------- 

27from collections import defaultdict 

28import logging 

29from itertools import chain 

30import time 

31 

32# ----------------------------- 

33# Imports for other modules -- 

34# ----------------------------- 

35from .quantumGraphExecutor import QuantumExecutor 

36from lsst.log import Log 

37from lsst.obs.base import Instrument 

38from lsst.pipe.base import ButlerQuantumContext 

39from lsst.daf.butler import Quantum 

40 

41# ---------------------------------- 

42# Local non-exported definitions -- 

43# ---------------------------------- 

44 

45_LOG = logging.getLogger(__name__.partition(".")[2]) 

46 

47 

48class SingleQuantumExecutor(QuantumExecutor): 

49 """Executor class which runs one Quantum at a time. 

50 

51 Parameters 

52 ---------- 

53 butler : `~lsst.daf.butler.Butler` 

54 Data butler. 

55 taskFactory : `~lsst.pipe.base.TaskFactory` 

56 Instance of a task factory. 

57 skipExisting : `bool`, optional 

58 If True then quanta with all existing outputs are not executed. 

59 clobberPartialOutputs : `bool`, optional 

60 If True then delete any partial outputs from quantum execution. If 

61 complete outputs exists then exception is raise if ``skipExisting`` is 

62 False. 

63 enableLsstDebug : `bool`, optional 

64 Enable debugging with ``lsstDebug`` facility for a task. 

65 """ 

66 def __init__(self, taskFactory, skipExisting=False, clobberPartialOutputs=False, enableLsstDebug=False): 

67 self.taskFactory = taskFactory 

68 self.skipExisting = skipExisting 

69 self.enableLsstDebug = enableLsstDebug 

70 self.clobberPartialOutputs = clobberPartialOutputs 

71 

72 def execute(self, taskDef, quantum, butler): 

73 

74 startTime = time.time() 

75 

76 # Docstring inherited from QuantumExecutor.execute 

77 self.setupLogging(taskDef, quantum) 

78 taskClass, label, config = taskDef.taskClass, taskDef.label, taskDef.config 

79 

80 # check whether to skip or delete old outputs 

81 if self.checkExistingOutputs(quantum, butler, taskDef): 

82 _LOG.info("Quantum execution skipped due to existing outputs, " 

83 f"task={taskClass.__name__} dataId={quantum.dataId}.") 

84 return 

85 

86 quantum = self.updatedQuantumInputs(quantum, butler) 

87 

88 # enable lsstDebug debugging 

89 if self.enableLsstDebug: 

90 try: 

91 _LOG.debug("Will try to import debug.py") 

92 import debug # noqa:F401 

93 except ImportError: 

94 _LOG.warn("No 'debug' module found.") 

95 

96 # initialize global state 

97 self.initGlobals(quantum, butler) 

98 

99 # Ensure that we are executing a frozen config 

100 config.freeze() 

101 

102 task = self.makeTask(taskClass, label, config, butler) 

103 self.runQuantum(task, quantum, taskDef, butler) 

104 

105 stopTime = time.time() 

106 _LOG.info("Execution of task '%s' on quantum %s took %.3f seconds", 

107 taskDef.label, quantum.dataId, stopTime - startTime) 

108 

109 def setupLogging(self, taskDef, quantum): 

110 """Configure logging system for execution of this task. 

111 

112 Ths method can setup logging to attach task- or 

113 quantum-specific information to log messages. Potentially this can 

114 take into account some info from task configuration as well. 

115 

116 Parameters 

117 ---------- 

118 taskDef : `lsst.pipe.base.TaskDef` 

119 The task definition. 

120 quantum : `~lsst.daf.butler.Quantum` 

121 Single Quantum instance. 

122 """ 

123 # include quantum dataId and task label into MDC 

124 label = taskDef.label 

125 if quantum.dataId: 

126 label += f":{quantum.dataId}" 

127 Log.MDC("LABEL", label) 

128 

129 def checkExistingOutputs(self, quantum, butler, taskDef): 

130 """Decide whether this quantum needs to be executed. 

131 

132 If only partial outputs exist then they are removed if 

133 ``clobberPartialOutputs`` is True, otherwise an exception is raised. 

134 

135 Parameters 

136 ---------- 

137 quantum : `~lsst.daf.butler.Quantum` 

138 Quantum to check for existing outputs 

139 butler : `~lsst.daf.butler.Butler` 

140 Data butler. 

141 taskDef : `~lsst.pipe.base.TaskDef` 

142 Task definition structure. 

143 

144 Returns 

145 ------- 

146 exist : `bool` 

147 True if all quantum's outputs exist in a collection and 

148 ``skipExisting`` is True, False otherwise. 

149 

150 Raises 

151 ------ 

152 RuntimeError 

153 Raised if some outputs exist and some not. 

154 """ 

155 collection = butler.run 

156 registry = butler.registry 

157 

158 existingRefs = [] 

159 missingRefs = [] 

160 for datasetRefs in quantum.outputs.values(): 

161 for datasetRef in datasetRefs: 

162 ref = registry.findDataset(datasetRef.datasetType, datasetRef.dataId, 

163 collections=butler.run) 

164 if ref is None: 

165 missingRefs.append(datasetRef) 

166 else: 

167 existingRefs.append(ref) 

168 if existingRefs and missingRefs: 

169 # some outputs exist and some don't, either delete existing ones or complain 

170 _LOG.debug("Partial outputs exist for task %s dataId=%s collection=%s " 

171 "existingRefs=%s missingRefs=%s", 

172 taskDef, quantum.dataId, collection, existingRefs, missingRefs) 

173 if self.clobberPartialOutputs: 

174 _LOG.info("Removing partial outputs for task %s: %s", taskDef, existingRefs) 

175 butler.pruneDatasets(existingRefs, disassociate=True, unstore=True, purge=True) 

176 return False 

177 else: 

178 raise RuntimeError(f"Registry inconsistency while checking for existing outputs:" 

179 f" collection={collection} existingRefs={existingRefs}" 

180 f" missingRefs={missingRefs}") 

181 elif existingRefs: 

182 # complete outputs exist, this is fine only if skipExisting is set 

183 return self.skipExisting 

184 else: 

185 # no outputs exist 

186 return False 

187 

188 def makeTask(self, taskClass, name, config, butler): 

189 """Make new task instance. 

190 

191 Parameters 

192 ---------- 

193 taskClass : `type` 

194 Sub-class of `~lsst.pipe.base.PipelineTask`. 

195 name : `str` 

196 Name for this task. 

197 config : `~lsst.pipe.base.PipelineTaskConfig` 

198 Configuration object for this task 

199 

200 Returns 

201 ------- 

202 task : `~lsst.pipe.base.PipelineTask` 

203 Instance of ``taskClass`` type. 

204 butler : `~lsst.daf.butler.Butler` 

205 Data butler. 

206 """ 

207 # call task factory for that 

208 return self.taskFactory.makeTask(taskClass, name, config, None, butler) 

209 

210 def updatedQuantumInputs(self, quantum, butler): 

211 """Update quantum with extra information, returns a new updated Quantum. 

212 

213 Some methods may require input DatasetRefs to have non-None 

214 ``dataset_id``, but in case of intermediate dataset it may not be 

215 filled during QuantumGraph construction. This method will retrieve 

216 missing info from registry. 

217 

218 Parameters 

219 ---------- 

220 quantum : `~lsst.daf.butler.Quantum` 

221 Single Quantum instance. 

222 butler : `~lsst.daf.butler.Butler` 

223 Data butler. 

224 

225 Returns 

226 ------- 

227 update : `~lsst.daf.butler.Quantum` 

228 Updated Quantum instance 

229 """ 

230 updatedInputs = defaultdict(list) 

231 for key, refsForDatasetType in quantum.inputs.items(): 

232 newRefsForDatasetType = updatedInputs[key] 

233 for ref in refsForDatasetType: 

234 if ref.id is None: 

235 resolvedRef = butler.registry.findDataset(ref.datasetType, ref.dataId, 

236 collections=butler.collections) 

237 if resolvedRef is None: 

238 raise ValueError( 

239 f"Cannot find {ref.datasetType.name} with id {ref.dataId} " 

240 f"in collections {butler.collections}." 

241 ) 

242 newRefsForDatasetType.append(resolvedRef) 

243 _LOG.debug("Updating dataset ID for %s", ref) 

244 else: 

245 newRefsForDatasetType.append(ref) 

246 return Quantum(taskName=quantum.taskName, 

247 taskClass=quantum.taskClass, 

248 dataId=quantum.dataId, 

249 initInputs=quantum.initInputs, 

250 inputs=updatedInputs, 

251 outputs=quantum.outputs 

252 ) 

253 

254 def runQuantum(self, task, quantum, taskDef, butler): 

255 """Execute task on a single quantum. 

256 

257 Parameters 

258 ---------- 

259 task : `~lsst.pipe.base.PipelineTask` 

260 Task object. 

261 quantum : `~lsst.daf.butler.Quantum` 

262 Single Quantum instance. 

263 taskDef : `~lsst.pipe.base.TaskDef` 

264 Task definition structure. 

265 butler : `~lsst.daf.butler.Butler` 

266 Data butler. 

267 """ 

268 # Create a butler that operates in the context of a quantum 

269 butlerQC = ButlerQuantumContext(butler, quantum) 

270 

271 # Get the input and output references for the task 

272 inputRefs, outputRefs = taskDef.connections.buildDatasetRefs(quantum) 

273 

274 # Call task runQuantum() method. Any exception thrown by the task 

275 # propagates to caller. 

276 task.runQuantum(butlerQC, inputRefs, outputRefs) 

277 

278 if taskDef.metadataDatasetName is not None: 

279 # DatasetRef has to be in the Quantum outputs, can lookup by name 

280 try: 

281 ref = quantum.outputs[taskDef.metadataDatasetName] 

282 except LookupError as exc: 

283 raise LookupError( 

284 f"Quantum outputs is missing metadata dataset type {taskDef.metadataDatasetName}," 

285 f" it could happen due to inconsistent options between Quantum generation" 

286 f" and execution") from exc 

287 butlerQC.put(task.getFullMetadata(), ref[0]) 

288 

289 def initGlobals(self, quantum, butler): 

290 """Initialize global state needed for task execution. 

291 

292 Parameters 

293 ---------- 

294 quantum : `~lsst.daf.butler.Quantum` 

295 Single Quantum instance. 

296 butler : `~lsst.daf.butler.Butler` 

297 Data butler. 

298 

299 Notes 

300 ----- 

301 There is an issue with initializing filters singleton which is done 

302 by instrument, to avoid requiring tasks to do it in runQuantum() 

303 we do it here when any dataId has an instrument dimension. Also for 

304 now we only allow single instrument, verify that all instrument 

305 names in all dataIds are identical. 

306 

307 This will need revision when filter singleton disappears. 

308 """ 

309 oneInstrument = None 

310 for datasetRefs in chain(quantum.inputs.values(), quantum.outputs.values()): 

311 for datasetRef in datasetRefs: 

312 dataId = datasetRef.dataId 

313 instrument = dataId.get("instrument") 

314 if instrument is not None: 

315 if oneInstrument is not None: 

316 assert instrument == oneInstrument, \ 

317 "Currently require that only one instrument is used per graph" 

318 else: 

319 oneInstrument = instrument 

320 Instrument.fromName(instrument, butler.registry)