Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22__all__ = ['SingleQuantumExecutor'] 

23 

24# ------------------------------- 

25# Imports of standard modules -- 

26# ------------------------------- 

27import logging 

28from itertools import chain 

29 

30# ----------------------------- 

31# Imports for other modules -- 

32# ----------------------------- 

33from .quantumGraphExecutor import QuantumExecutor 

34from lsst.log import Log 

35from lsst.obs.base import Instrument 

36from lsst.pipe.base import ButlerQuantumContext 

37 

38# ---------------------------------- 

39# Local non-exported definitions -- 

40# ---------------------------------- 

41 

42_LOG = logging.getLogger(__name__.partition(".")[2]) 

43 

44 

45class SingleQuantumExecutor(QuantumExecutor): 

46 """Executor class which runs one Quantum at a time. 

47 

48 Parameters 

49 ---------- 

50 butler : `~lsst.daf.butler.Butler` 

51 Data butler. 

52 taskFactory : `~lsst.pipe.base.TaskFactory` 

53 Instance of a task factory. 

54 skipExisting : `bool`, optional 

55 If True then quanta with all existing outputs are not executed. 

56 clobberPartialOutputs : `bool`, optional 

57 If True then delete any partial outputs from quantum execution. If 

58 complete outputs exists then exception is raise if ``skipExisting`` is 

59 False. 

60 enableLsstDebug : `bool`, optional 

61 Enable debugging with ``lsstDebug`` facility for a task. 

62 """ 

63 def __init__(self, taskFactory, skipExisting=False, clobberPartialOutputs=False, enableLsstDebug=False): 

64 self.taskFactory = taskFactory 

65 self.skipExisting = skipExisting 

66 self.enableLsstDebug = enableLsstDebug 

67 self.clobberPartialOutputs = clobberPartialOutputs 

68 

69 def execute(self, taskDef, quantum, butler): 

70 # Docstring inherited from QuantumExecutor.execute 

71 taskClass, config = taskDef.taskClass, taskDef.config 

72 self.setupLogging(taskClass, config, quantum) 

73 

74 # check whether to skip or delete old outputs 

75 if self.checkExistingOutputs(quantum, butler, taskDef): 

76 _LOG.info("Quantum execution skipped due to existing outputs, " 

77 f"task={taskClass.__name__} dataId={quantum.dataId}.") 

78 return 

79 

80 self.updateQuantumInputs(quantum, butler) 

81 

82 # enable lsstDebug debugging 

83 if self.enableLsstDebug: 

84 try: 

85 _LOG.debug("Will try to import debug.py") 

86 import debug # noqa:F401 

87 except ImportError: 

88 _LOG.warn("No 'debug' module found.") 

89 

90 # initialize global state 

91 self.initGlobals(quantum, butler) 

92 

93 # Ensure that we are executing a frozen config 

94 config.freeze() 

95 

96 task = self.makeTask(taskClass, config, butler) 

97 self.runQuantum(task, quantum, taskDef, butler) 

98 

99 def setupLogging(self, taskClass, config, quantum): 

100 """Configure logging system for execution of this task. 

101 

102 Ths method can setup logging to attach task- or 

103 quantum-specific information to log messages. Potentially this can 

104 take into accout some info from task configuration as well. 

105 

106 Parameters 

107 ---------- 

108 taskClass : `type` 

109 Sub-class of `~lsst.pipe.base.PipelineTask`. 

110 config : `~lsst.pipe.base.PipelineTaskConfig` 

111 Configuration object for this task 

112 quantum : `~lsst.daf.butler.Quantum` 

113 Single Quantum instance. 

114 """ 

115 # include input dataIds into MDC 

116 dataIds = set(ref.dataId for ref in chain.from_iterable(quantum.predictedInputs.values())) 

117 if dataIds: 

118 if len(dataIds) == 1: 

119 Log.MDC("LABEL", str(dataIds.pop())) 

120 else: 

121 Log.MDC("LABEL", '[' + ', '.join([str(dataId) for dataId in dataIds]) + ']') 

122 

123 def checkExistingOutputs(self, quantum, butler, taskDef): 

124 """Decide whether this quantum needs to be executed. 

125 

126 If only partial outputs exist then they are removed if 

127 ``clobberPartialOutputs`` is True, otherwise an exception is raised. 

128 

129 Parameters 

130 ---------- 

131 quantum : `~lsst.daf.butler.Quantum` 

132 Quantum to check for existing outputs 

133 butler : `~lsst.daf.butler.Butler` 

134 Data butler. 

135 taskDef : `~lsst.pipe.base.TaskDef` 

136 Task definition structure. 

137 

138 Returns 

139 ------- 

140 exist : `bool` 

141 True if all quantum's outputs exist in a collection and 

142 ``skipExisting`` is True, False otherwise. 

143 

144 Raises 

145 ------ 

146 RuntimeError 

147 Raised if some outputs exist and some not. 

148 """ 

149 collection = butler.run 

150 registry = butler.registry 

151 

152 existingRefs = [] 

153 missingRefs = [] 

154 for datasetRefs in quantum.outputs.values(): 

155 for datasetRef in datasetRefs: 

156 ref = registry.findDataset(datasetRef.datasetType, datasetRef.dataId, 

157 collections=butler.run) 

158 if ref is None: 

159 missingRefs.append(datasetRef) 

160 else: 

161 existingRefs.append(ref) 

162 if existingRefs and missingRefs: 

163 # some outputs exist and some don't, either delete existing ones or complain 

164 _LOG.debug("Partial outputs exist for task %s dataId=%s collection=%s " 

165 "existingRefs=%s missingRefs=%s", 

166 taskDef, quantum.dataId, collection, existingRefs, missingRefs) 

167 if self.clobberPartialOutputs: 

168 _LOG.info("Removing partial outputs for task %s: %s", taskDef, existingRefs) 

169 butler.pruneDatasets(existingRefs, disassociate=True, unstore=True, purge=True) 

170 return False 

171 else: 

172 raise RuntimeError(f"Registry inconsistency while checking for existing outputs:" 

173 f" collection={collection} existingRefs={existingRefs}" 

174 f" missingRefs={missingRefs}") 

175 elif existingRefs: 

176 # complete outputs exist, this is fine only if skipExisting is set 

177 return self.skipExisting 

178 else: 

179 # no outputs exist 

180 return False 

181 

182 def makeTask(self, taskClass, config, butler): 

183 """Make new task instance. 

184 

185 Parameters 

186 ---------- 

187 taskClass : `type` 

188 Sub-class of `~lsst.pipe.base.PipelineTask`. 

189 config : `~lsst.pipe.base.PipelineTaskConfig` 

190 Configuration object for this task 

191 

192 Returns 

193 ------- 

194 task : `~lsst.pipe.base.PipelineTask` 

195 Instance of ``taskClass`` type. 

196 butler : `~lsst.daf.butler.Butler` 

197 Data butler. 

198 """ 

199 # call task factory for that 

200 return self.taskFactory.makeTask(taskClass, config, None, butler) 

201 

202 def updateQuantumInputs(self, quantum, butler): 

203 """Update quantum with extra information. 

204 

205 Some methods may require input DatasetRefs to have non-None 

206 ``dataset_id``, but in case of intermediate dataset it may not be 

207 filled during QuantumGraph construction. This method will retrieve 

208 missing info from registry. 

209 

210 Parameters 

211 ---------- 

212 quantum : `~lsst.daf.butler.Quantum` 

213 Single Quantum instance. 

214 butler : `~lsst.daf.butler.Butler` 

215 Data butler. 

216 """ 

217 for refsForDatasetType in quantum.predictedInputs.values(): 

218 newRefsForDatasetType = [] 

219 for ref in refsForDatasetType: 

220 if ref.id is None: 

221 resolvedRef = butler.registry.findDataset(ref.datasetType, ref.dataId, 

222 collections=butler.collections) 

223 if resolvedRef is None: 

224 raise ValueError( 

225 f"Cannot find {ref.datasetType.name} with id {ref.dataId} " 

226 f"in collections {butler.collections}." 

227 ) 

228 newRefsForDatasetType.append(resolvedRef) 

229 _LOG.debug("Updating dataset ID for %s", ref) 

230 else: 

231 newRefsForDatasetType.append(ref) 

232 refsForDatasetType[:] = newRefsForDatasetType 

233 

234 def runQuantum(self, task, quantum, taskDef, butler): 

235 """Execute task on a single quantum. 

236 

237 Parameters 

238 ---------- 

239 task : `~lsst.pipe.base.PipelineTask` 

240 Task object. 

241 quantum : `~lsst.daf.butler.Quantum` 

242 Single Quantum instance. 

243 taskDef : `~lsst.pipe.base.TaskDef` 

244 Task definition structure. 

245 butler : `~lsst.daf.butler.Butler` 

246 Data butler. 

247 """ 

248 # Create a butler that operates in the context of a quantum 

249 butlerQC = ButlerQuantumContext(butler, quantum) 

250 

251 # Get the input and output references for the task 

252 inputRefs, outputRefs = taskDef.connections.buildDatasetRefs(quantum) 

253 

254 # Call task runQuantum() method. Any exception thrown by the task 

255 # propagates to caller. 

256 task.runQuantum(butlerQC, inputRefs, outputRefs) 

257 

258 if taskDef.metadataDatasetName is not None: 

259 # DatasetRef has to be in the Quantum outputs, can lookup by name 

260 try: 

261 ref = quantum.outputs[taskDef.metadataDatasetName] 

262 except LookupError as exc: 

263 raise LookupError( 

264 f"Quantum outputs is missing metadata dataset type {taskDef.metadataDatasetName}," 

265 f" it could happen due to inconsistent options between Quantum generation" 

266 f" and execution") from exc 

267 butlerQC.put(task.getFullMetadata(), ref[0]) 

268 

269 def initGlobals(self, quantum, butler): 

270 """Initialize global state needed for task execution. 

271 

272 Parameters 

273 ---------- 

274 quantum : `~lsst.daf.butler.Quantum` 

275 Single Quantum instance. 

276 butler : `~lsst.daf.butler.Butler` 

277 Data butler. 

278 

279 Notes 

280 ----- 

281 There is an issue with initializing filters singleton which is done 

282 by instrument, to avoid requiring tasks to do it in runQuantum() 

283 we do it here when any dataId has an instrument dimension. Also for 

284 now we only allow single instrument, verify that all instrument 

285 names in all dataIds are identical. 

286 

287 This will need revision when filter singleton disappears. 

288 """ 

289 oneInstrument = None 

290 for datasetRefs in chain(quantum.predictedInputs.values(), quantum.outputs.values()): 

291 for datasetRef in datasetRefs: 

292 dataId = datasetRef.dataId 

293 instrument = dataId.get("instrument") 

294 if instrument is not None: 

295 if oneInstrument is not None: 

296 assert instrument == oneInstrument, \ 

297 "Currently require that only one instrument is used per graph" 

298 else: 

299 oneInstrument = instrument 

300 Instrument.fromName(instrument, butler.registry)