Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22__all__ = ['SingleQuantumExecutor'] 

23 

24# ------------------------------- 

25# Imports of standard modules -- 

26# ------------------------------- 

27from collections import defaultdict 

28import logging 

29from itertools import chain 

30 

31# ----------------------------- 

32# Imports for other modules -- 

33# ----------------------------- 

34from .quantumGraphExecutor import QuantumExecutor 

35from lsst.log import Log 

36from lsst.obs.base import Instrument 

37from lsst.pipe.base import ButlerQuantumContext 

38from lsst.daf.butler import Quantum 

39 

40# ---------------------------------- 

41# Local non-exported definitions -- 

42# ---------------------------------- 

43 

44_LOG = logging.getLogger(__name__.partition(".")[2]) 

45 

46 

47class SingleQuantumExecutor(QuantumExecutor): 

48 """Executor class which runs one Quantum at a time. 

49 

50 Parameters 

51 ---------- 

52 butler : `~lsst.daf.butler.Butler` 

53 Data butler. 

54 taskFactory : `~lsst.pipe.base.TaskFactory` 

55 Instance of a task factory. 

56 skipExisting : `bool`, optional 

57 If True then quanta with all existing outputs are not executed. 

58 clobberPartialOutputs : `bool`, optional 

59 If True then delete any partial outputs from quantum execution. If 

60 complete outputs exists then exception is raise if ``skipExisting`` is 

61 False. 

62 enableLsstDebug : `bool`, optional 

63 Enable debugging with ``lsstDebug`` facility for a task. 

64 """ 

65 def __init__(self, taskFactory, skipExisting=False, clobberPartialOutputs=False, enableLsstDebug=False): 

66 self.taskFactory = taskFactory 

67 self.skipExisting = skipExisting 

68 self.enableLsstDebug = enableLsstDebug 

69 self.clobberPartialOutputs = clobberPartialOutputs 

70 

71 def execute(self, taskDef, quantum, butler): 

72 # Docstring inherited from QuantumExecutor.execute 

73 self.setupLogging(taskDef, quantum) 

74 taskClass, config = taskDef.taskClass, taskDef.config 

75 

76 # check whether to skip or delete old outputs 

77 if self.checkExistingOutputs(quantum, butler, taskDef): 

78 _LOG.info("Quantum execution skipped due to existing outputs, " 

79 f"task={taskClass.__name__} dataId={quantum.dataId}.") 

80 return 

81 

82 quantum = self.updatedQuantumInputs(quantum, butler) 

83 

84 # enable lsstDebug debugging 

85 if self.enableLsstDebug: 

86 try: 

87 _LOG.debug("Will try to import debug.py") 

88 import debug # noqa:F401 

89 except ImportError: 

90 _LOG.warn("No 'debug' module found.") 

91 

92 # initialize global state 

93 self.initGlobals(quantum, butler) 

94 

95 # Ensure that we are executing a frozen config 

96 config.freeze() 

97 

98 task = self.makeTask(taskClass, config, butler) 

99 self.runQuantum(task, quantum, taskDef, butler) 

100 

101 def setupLogging(self, taskDef, quantum): 

102 """Configure logging system for execution of this task. 

103 

104 Ths method can setup logging to attach task- or 

105 quantum-specific information to log messages. Potentially this can 

106 take into account some info from task configuration as well. 

107 

108 Parameters 

109 ---------- 

110 taskDef : `lsst.pipe.base.TaskDef` 

111 The task definition. 

112 quantum : `~lsst.daf.butler.Quantum` 

113 Single Quantum instance. 

114 """ 

115 # include quantum dataId and task label into MDC 

116 label = taskDef.label 

117 if quantum.dataId: 

118 label += f":{quantum.dataId}" 

119 Log.MDC("LABEL", label) 

120 

121 def checkExistingOutputs(self, quantum, butler, taskDef): 

122 """Decide whether this quantum needs to be executed. 

123 

124 If only partial outputs exist then they are removed if 

125 ``clobberPartialOutputs`` is True, otherwise an exception is raised. 

126 

127 Parameters 

128 ---------- 

129 quantum : `~lsst.daf.butler.Quantum` 

130 Quantum to check for existing outputs 

131 butler : `~lsst.daf.butler.Butler` 

132 Data butler. 

133 taskDef : `~lsst.pipe.base.TaskDef` 

134 Task definition structure. 

135 

136 Returns 

137 ------- 

138 exist : `bool` 

139 True if all quantum's outputs exist in a collection and 

140 ``skipExisting`` is True, False otherwise. 

141 

142 Raises 

143 ------ 

144 RuntimeError 

145 Raised if some outputs exist and some not. 

146 """ 

147 collection = butler.run 

148 registry = butler.registry 

149 

150 existingRefs = [] 

151 missingRefs = [] 

152 for datasetRefs in quantum.outputs.values(): 

153 for datasetRef in datasetRefs: 

154 ref = registry.findDataset(datasetRef.datasetType, datasetRef.dataId, 

155 collections=butler.run) 

156 if ref is None: 

157 missingRefs.append(datasetRef) 

158 else: 

159 existingRefs.append(ref) 

160 if existingRefs and missingRefs: 

161 # some outputs exist and some don't, either delete existing ones or complain 

162 _LOG.debug("Partial outputs exist for task %s dataId=%s collection=%s " 

163 "existingRefs=%s missingRefs=%s", 

164 taskDef, quantum.dataId, collection, existingRefs, missingRefs) 

165 if self.clobberPartialOutputs: 

166 _LOG.info("Removing partial outputs for task %s: %s", taskDef, existingRefs) 

167 butler.pruneDatasets(existingRefs, disassociate=True, unstore=True, purge=True) 

168 return False 

169 else: 

170 raise RuntimeError(f"Registry inconsistency while checking for existing outputs:" 

171 f" collection={collection} existingRefs={existingRefs}" 

172 f" missingRefs={missingRefs}") 

173 elif existingRefs: 

174 # complete outputs exist, this is fine only if skipExisting is set 

175 return self.skipExisting 

176 else: 

177 # no outputs exist 

178 return False 

179 

180 def makeTask(self, taskClass, config, butler): 

181 """Make new task instance. 

182 

183 Parameters 

184 ---------- 

185 taskClass : `type` 

186 Sub-class of `~lsst.pipe.base.PipelineTask`. 

187 config : `~lsst.pipe.base.PipelineTaskConfig` 

188 Configuration object for this task 

189 

190 Returns 

191 ------- 

192 task : `~lsst.pipe.base.PipelineTask` 

193 Instance of ``taskClass`` type. 

194 butler : `~lsst.daf.butler.Butler` 

195 Data butler. 

196 """ 

197 # call task factory for that 

198 return self.taskFactory.makeTask(taskClass, config, None, butler) 

199 

200 def updatedQuantumInputs(self, quantum, butler): 

201 """Update quantum with extra information, returns a new updated Quantum. 

202 

203 Some methods may require input DatasetRefs to have non-None 

204 ``dataset_id``, but in case of intermediate dataset it may not be 

205 filled during QuantumGraph construction. This method will retrieve 

206 missing info from registry. 

207 

208 Parameters 

209 ---------- 

210 quantum : `~lsst.daf.butler.Quantum` 

211 Single Quantum instance. 

212 butler : `~lsst.daf.butler.Butler` 

213 Data butler. 

214 

215 Returns 

216 ------- 

217 update : `~lsst.daf.butler.Quantum` 

218 Updated Quantum instance 

219 """ 

220 updatedInputs = defaultdict(list) 

221 for key, refsForDatasetType in quantum.inputs.items(): 

222 newRefsForDatasetType = updatedInputs[key] 

223 for ref in refsForDatasetType: 

224 if ref.id is None: 

225 resolvedRef = butler.registry.findDataset(ref.datasetType, ref.dataId, 

226 collections=butler.collections) 

227 if resolvedRef is None: 

228 raise ValueError( 

229 f"Cannot find {ref.datasetType.name} with id {ref.dataId} " 

230 f"in collections {butler.collections}." 

231 ) 

232 newRefsForDatasetType.append(resolvedRef) 

233 _LOG.debug("Updating dataset ID for %s", ref) 

234 else: 

235 newRefsForDatasetType.append(ref) 

236 return Quantum(taskName=quantum.taskName, 

237 taskClass=quantum.taskClass, 

238 dataId=quantum.dataId, 

239 initInputs=quantum.initInputs, 

240 inputs=updatedInputs, 

241 outputs=quantum.outputs 

242 ) 

243 

244 def runQuantum(self, task, quantum, taskDef, butler): 

245 """Execute task on a single quantum. 

246 

247 Parameters 

248 ---------- 

249 task : `~lsst.pipe.base.PipelineTask` 

250 Task object. 

251 quantum : `~lsst.daf.butler.Quantum` 

252 Single Quantum instance. 

253 taskDef : `~lsst.pipe.base.TaskDef` 

254 Task definition structure. 

255 butler : `~lsst.daf.butler.Butler` 

256 Data butler. 

257 """ 

258 # Create a butler that operates in the context of a quantum 

259 butlerQC = ButlerQuantumContext(butler, quantum) 

260 

261 # Get the input and output references for the task 

262 inputRefs, outputRefs = taskDef.connections.buildDatasetRefs(quantum) 

263 

264 # Call task runQuantum() method. Any exception thrown by the task 

265 # propagates to caller. 

266 task.runQuantum(butlerQC, inputRefs, outputRefs) 

267 

268 if taskDef.metadataDatasetName is not None: 

269 # DatasetRef has to be in the Quantum outputs, can lookup by name 

270 try: 

271 ref = quantum.outputs[taskDef.metadataDatasetName] 

272 except LookupError as exc: 

273 raise LookupError( 

274 f"Quantum outputs is missing metadata dataset type {taskDef.metadataDatasetName}," 

275 f" it could happen due to inconsistent options between Quantum generation" 

276 f" and execution") from exc 

277 butlerQC.put(task.getFullMetadata(), ref[0]) 

278 

279 def initGlobals(self, quantum, butler): 

280 """Initialize global state needed for task execution. 

281 

282 Parameters 

283 ---------- 

284 quantum : `~lsst.daf.butler.Quantum` 

285 Single Quantum instance. 

286 butler : `~lsst.daf.butler.Butler` 

287 Data butler. 

288 

289 Notes 

290 ----- 

291 There is an issue with initializing filters singleton which is done 

292 by instrument, to avoid requiring tasks to do it in runQuantum() 

293 we do it here when any dataId has an instrument dimension. Also for 

294 now we only allow single instrument, verify that all instrument 

295 names in all dataIds are identical. 

296 

297 This will need revision when filter singleton disappears. 

298 """ 

299 oneInstrument = None 

300 for datasetRefs in chain(quantum.inputs.values(), quantum.outputs.values()): 

301 for datasetRef in datasetRefs: 

302 dataId = datasetRef.dataId 

303 instrument = dataId.get("instrument") 

304 if instrument is not None: 

305 if oneInstrument is not None: 

306 assert instrument == oneInstrument, \ 

307 "Currently require that only one instrument is used per graph" 

308 else: 

309 oneInstrument = instrument 

310 Instrument.fromName(instrument, butler.registry)