Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22__all__ = ['SingleQuantumExecutor'] 

23 

24# ------------------------------- 

25# Imports of standard modules -- 

26# ------------------------------- 

27from collections import defaultdict 

28import logging 

29from itertools import chain 

30import time 

31 

32# ----------------------------- 

33# Imports for other modules -- 

34# ----------------------------- 

35from .quantumGraphExecutor import QuantumExecutor 

36from lsst.log import Log 

37from lsst.obs.base import Instrument 

38from lsst.pipe.base import ButlerQuantumContext 

39from lsst.daf.butler import Quantum 

40 

41# ---------------------------------- 

42# Local non-exported definitions -- 

43# ---------------------------------- 

44 

45_LOG = logging.getLogger(__name__.partition(".")[2]) 

46 

47 

48class SingleQuantumExecutor(QuantumExecutor): 

49 """Executor class which runs one Quantum at a time. 

50 

51 Parameters 

52 ---------- 

53 butler : `~lsst.daf.butler.Butler` 

54 Data butler. 

55 taskFactory : `~lsst.pipe.base.TaskFactory` 

56 Instance of a task factory. 

57 skipExisting : `bool`, optional 

58 If True then quanta with all existing outputs are not executed. 

59 clobberPartialOutputs : `bool`, optional 

60 If True then delete any partial outputs from quantum execution. If 

61 complete outputs exists then exception is raise if ``skipExisting`` is 

62 False. 

63 enableLsstDebug : `bool`, optional 

64 Enable debugging with ``lsstDebug`` facility for a task. 

65 """ 

66 def __init__(self, taskFactory, skipExisting=False, clobberPartialOutputs=False, enableLsstDebug=False): 

67 self.taskFactory = taskFactory 

68 self.skipExisting = skipExisting 

69 self.enableLsstDebug = enableLsstDebug 

70 self.clobberPartialOutputs = clobberPartialOutputs 

71 

72 def execute(self, taskDef, quantum, butler): 

73 

74 startTime = time.time() 

75 

76 # Docstring inherited from QuantumExecutor.execute 

77 self.setupLogging(taskDef, quantum) 

78 taskClass, config = taskDef.taskClass, taskDef.config 

79 

80 # check whether to skip or delete old outputs 

81 if self.checkExistingOutputs(quantum, butler, taskDef): 

82 _LOG.info("Quantum execution skipped due to existing outputs, " 

83 f"task={taskClass.__name__} dataId={quantum.dataId}.") 

84 return 

85 

86 quantum = self.updatedQuantumInputs(quantum, butler) 

87 

88 # enable lsstDebug debugging 

89 if self.enableLsstDebug: 

90 try: 

91 _LOG.debug("Will try to import debug.py") 

92 import debug # noqa:F401 

93 except ImportError: 

94 _LOG.warn("No 'debug' module found.") 

95 

96 # initialize global state 

97 self.initGlobals(quantum, butler) 

98 

99 # Ensure that we are executing a frozen config 

100 config.freeze() 

101 

102 task = self.makeTask(taskClass, config, butler) 

103 self.runQuantum(task, quantum, taskDef, butler) 

104 

105 stopTime = time.time() 

106 _LOG.info("Execution of task '%s' on quantum %s took %.3f seconds", 

107 taskDef.label, quantum.dataId, stopTime - startTime) 

108 

109 def setupLogging(self, taskDef, quantum): 

110 """Configure logging system for execution of this task. 

111 

112 Ths method can setup logging to attach task- or 

113 quantum-specific information to log messages. Potentially this can 

114 take into account some info from task configuration as well. 

115 

116 Parameters 

117 ---------- 

118 taskDef : `lsst.pipe.base.TaskDef` 

119 The task definition. 

120 quantum : `~lsst.daf.butler.Quantum` 

121 Single Quantum instance. 

122 """ 

123 # include quantum dataId and task label into MDC 

124 label = taskDef.label 

125 if quantum.dataId: 

126 label += f":{quantum.dataId}" 

127 Log.MDC("LABEL", label) 

128 

129 def checkExistingOutputs(self, quantum, butler, taskDef): 

130 """Decide whether this quantum needs to be executed. 

131 

132 If only partial outputs exist then they are removed if 

133 ``clobberPartialOutputs`` is True, otherwise an exception is raised. 

134 

135 Parameters 

136 ---------- 

137 quantum : `~lsst.daf.butler.Quantum` 

138 Quantum to check for existing outputs 

139 butler : `~lsst.daf.butler.Butler` 

140 Data butler. 

141 taskDef : `~lsst.pipe.base.TaskDef` 

142 Task definition structure. 

143 

144 Returns 

145 ------- 

146 exist : `bool` 

147 True if all quantum's outputs exist in a collection and 

148 ``skipExisting`` is True, False otherwise. 

149 

150 Raises 

151 ------ 

152 RuntimeError 

153 Raised if some outputs exist and some not. 

154 """ 

155 collection = butler.run 

156 registry = butler.registry 

157 

158 existingRefs = [] 

159 missingRefs = [] 

160 for datasetRefs in quantum.outputs.values(): 

161 for datasetRef in datasetRefs: 

162 ref = registry.findDataset(datasetRef.datasetType, datasetRef.dataId, 

163 collections=butler.run) 

164 if ref is None: 

165 missingRefs.append(datasetRef) 

166 else: 

167 existingRefs.append(ref) 

168 if existingRefs and missingRefs: 

169 # some outputs exist and some don't, either delete existing ones or complain 

170 _LOG.debug("Partial outputs exist for task %s dataId=%s collection=%s " 

171 "existingRefs=%s missingRefs=%s", 

172 taskDef, quantum.dataId, collection, existingRefs, missingRefs) 

173 if self.clobberPartialOutputs: 

174 _LOG.info("Removing partial outputs for task %s: %s", taskDef, existingRefs) 

175 butler.pruneDatasets(existingRefs, disassociate=True, unstore=True, purge=True) 

176 return False 

177 else: 

178 raise RuntimeError(f"Registry inconsistency while checking for existing outputs:" 

179 f" collection={collection} existingRefs={existingRefs}" 

180 f" missingRefs={missingRefs}") 

181 elif existingRefs: 

182 # complete outputs exist, this is fine only if skipExisting is set 

183 return self.skipExisting 

184 else: 

185 # no outputs exist 

186 return False 

187 

188 def makeTask(self, taskClass, config, butler): 

189 """Make new task instance. 

190 

191 Parameters 

192 ---------- 

193 taskClass : `type` 

194 Sub-class of `~lsst.pipe.base.PipelineTask`. 

195 config : `~lsst.pipe.base.PipelineTaskConfig` 

196 Configuration object for this task 

197 

198 Returns 

199 ------- 

200 task : `~lsst.pipe.base.PipelineTask` 

201 Instance of ``taskClass`` type. 

202 butler : `~lsst.daf.butler.Butler` 

203 Data butler. 

204 """ 

205 # call task factory for that 

206 return self.taskFactory.makeTask(taskClass, config, None, butler) 

207 

208 def updatedQuantumInputs(self, quantum, butler): 

209 """Update quantum with extra information, returns a new updated Quantum. 

210 

211 Some methods may require input DatasetRefs to have non-None 

212 ``dataset_id``, but in case of intermediate dataset it may not be 

213 filled during QuantumGraph construction. This method will retrieve 

214 missing info from registry. 

215 

216 Parameters 

217 ---------- 

218 quantum : `~lsst.daf.butler.Quantum` 

219 Single Quantum instance. 

220 butler : `~lsst.daf.butler.Butler` 

221 Data butler. 

222 

223 Returns 

224 ------- 

225 update : `~lsst.daf.butler.Quantum` 

226 Updated Quantum instance 

227 """ 

228 updatedInputs = defaultdict(list) 

229 for key, refsForDatasetType in quantum.inputs.items(): 

230 newRefsForDatasetType = updatedInputs[key] 

231 for ref in refsForDatasetType: 

232 if ref.id is None: 

233 resolvedRef = butler.registry.findDataset(ref.datasetType, ref.dataId, 

234 collections=butler.collections) 

235 if resolvedRef is None: 

236 raise ValueError( 

237 f"Cannot find {ref.datasetType.name} with id {ref.dataId} " 

238 f"in collections {butler.collections}." 

239 ) 

240 newRefsForDatasetType.append(resolvedRef) 

241 _LOG.debug("Updating dataset ID for %s", ref) 

242 else: 

243 newRefsForDatasetType.append(ref) 

244 return Quantum(taskName=quantum.taskName, 

245 taskClass=quantum.taskClass, 

246 dataId=quantum.dataId, 

247 initInputs=quantum.initInputs, 

248 inputs=updatedInputs, 

249 outputs=quantum.outputs 

250 ) 

251 

252 def runQuantum(self, task, quantum, taskDef, butler): 

253 """Execute task on a single quantum. 

254 

255 Parameters 

256 ---------- 

257 task : `~lsst.pipe.base.PipelineTask` 

258 Task object. 

259 quantum : `~lsst.daf.butler.Quantum` 

260 Single Quantum instance. 

261 taskDef : `~lsst.pipe.base.TaskDef` 

262 Task definition structure. 

263 butler : `~lsst.daf.butler.Butler` 

264 Data butler. 

265 """ 

266 # Create a butler that operates in the context of a quantum 

267 butlerQC = ButlerQuantumContext(butler, quantum) 

268 

269 # Get the input and output references for the task 

270 inputRefs, outputRefs = taskDef.connections.buildDatasetRefs(quantum) 

271 

272 # Call task runQuantum() method. Any exception thrown by the task 

273 # propagates to caller. 

274 task.runQuantum(butlerQC, inputRefs, outputRefs) 

275 

276 if taskDef.metadataDatasetName is not None: 

277 # DatasetRef has to be in the Quantum outputs, can lookup by name 

278 try: 

279 ref = quantum.outputs[taskDef.metadataDatasetName] 

280 except LookupError as exc: 

281 raise LookupError( 

282 f"Quantum outputs is missing metadata dataset type {taskDef.metadataDatasetName}," 

283 f" it could happen due to inconsistent options between Quantum generation" 

284 f" and execution") from exc 

285 butlerQC.put(task.getFullMetadata(), ref[0]) 

286 

287 def initGlobals(self, quantum, butler): 

288 """Initialize global state needed for task execution. 

289 

290 Parameters 

291 ---------- 

292 quantum : `~lsst.daf.butler.Quantum` 

293 Single Quantum instance. 

294 butler : `~lsst.daf.butler.Butler` 

295 Data butler. 

296 

297 Notes 

298 ----- 

299 There is an issue with initializing filters singleton which is done 

300 by instrument, to avoid requiring tasks to do it in runQuantum() 

301 we do it here when any dataId has an instrument dimension. Also for 

302 now we only allow single instrument, verify that all instrument 

303 names in all dataIds are identical. 

304 

305 This will need revision when filter singleton disappears. 

306 """ 

307 oneInstrument = None 

308 for datasetRefs in chain(quantum.inputs.values(), quantum.outputs.values()): 

309 for datasetRef in datasetRefs: 

310 dataId = datasetRef.dataId 

311 instrument = dataId.get("instrument") 

312 if instrument is not None: 

313 if oneInstrument is not None: 

314 assert instrument == oneInstrument, \ 

315 "Currently require that only one instrument is used per graph" 

316 else: 

317 oneInstrument = instrument 

318 Instrument.fromName(instrument, butler.registry)