Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22__all__ = ['SingleQuantumExecutor'] 

23 

24# ------------------------------- 

25# Imports of standard modules -- 

26# ------------------------------- 

27from collections import defaultdict 

28import logging 

29from itertools import chain 

30 

31# ----------------------------- 

32# Imports for other modules -- 

33# ----------------------------- 

34from .quantumGraphExecutor import QuantumExecutor 

35from lsst.log import Log 

36from lsst.obs.base import Instrument 

37from lsst.pipe.base import ButlerQuantumContext 

38from lsst.daf.butler import Quantum 

39 

40# ---------------------------------- 

41# Local non-exported definitions -- 

42# ---------------------------------- 

43 

44_LOG = logging.getLogger(__name__.partition(".")[2]) 

45 

46 

47class SingleQuantumExecutor(QuantumExecutor): 

48 """Executor class which runs one Quantum at a time. 

49 

50 Parameters 

51 ---------- 

52 butler : `~lsst.daf.butler.Butler` 

53 Data butler. 

54 taskFactory : `~lsst.pipe.base.TaskFactory` 

55 Instance of a task factory. 

56 skipExisting : `bool`, optional 

57 If True then quanta with all existing outputs are not executed. 

58 clobberPartialOutputs : `bool`, optional 

59 If True then delete any partial outputs from quantum execution. If 

60 complete outputs exists then exception is raise if ``skipExisting`` is 

61 False. 

62 enableLsstDebug : `bool`, optional 

63 Enable debugging with ``lsstDebug`` facility for a task. 

64 """ 

65 def __init__(self, taskFactory, skipExisting=False, clobberPartialOutputs=False, enableLsstDebug=False): 

66 self.taskFactory = taskFactory 

67 self.skipExisting = skipExisting 

68 self.enableLsstDebug = enableLsstDebug 

69 self.clobberPartialOutputs = clobberPartialOutputs 

70 

71 def execute(self, taskDef, quantum, butler): 

72 # Docstring inherited from QuantumExecutor.execute 

73 taskClass, config = taskDef.taskClass, taskDef.config 

74 self.setupLogging(taskClass, config, quantum) 

75 

76 # check whether to skip or delete old outputs 

77 if self.checkExistingOutputs(quantum, butler, taskDef): 

78 _LOG.info("Quantum execution skipped due to existing outputs, " 

79 f"task={taskClass.__name__} dataId={quantum.dataId}.") 

80 return 

81 

82 quantum = self.updatedQuantumInputs(quantum, butler) 

83 

84 # enable lsstDebug debugging 

85 if self.enableLsstDebug: 

86 try: 

87 _LOG.debug("Will try to import debug.py") 

88 import debug # noqa:F401 

89 except ImportError: 

90 _LOG.warn("No 'debug' module found.") 

91 

92 # initialize global state 

93 self.initGlobals(quantum, butler) 

94 

95 # Ensure that we are executing a frozen config 

96 config.freeze() 

97 

98 task = self.makeTask(taskClass, config, butler) 

99 self.runQuantum(task, quantum, taskDef, butler) 

100 

101 def setupLogging(self, taskClass, config, quantum): 

102 """Configure logging system for execution of this task. 

103 

104 Ths method can setup logging to attach task- or 

105 quantum-specific information to log messages. Potentially this can 

106 take into accout some info from task configuration as well. 

107 

108 Parameters 

109 ---------- 

110 taskClass : `type` 

111 Sub-class of `~lsst.pipe.base.PipelineTask`. 

112 config : `~lsst.pipe.base.PipelineTaskConfig` 

113 Configuration object for this task 

114 quantum : `~lsst.daf.butler.Quantum` 

115 Single Quantum instance. 

116 """ 

117 # include input dataIds into MDC 

118 dataIds = set(ref.dataId for ref in chain.from_iterable(quantum.inputs.values())) 

119 if dataIds: 

120 if len(dataIds) == 1: 

121 Log.MDC("LABEL", str(dataIds.pop())) 

122 else: 

123 Log.MDC("LABEL", '[' + ', '.join([str(dataId) for dataId in dataIds]) + ']') 

124 

125 def checkExistingOutputs(self, quantum, butler, taskDef): 

126 """Decide whether this quantum needs to be executed. 

127 

128 If only partial outputs exist then they are removed if 

129 ``clobberPartialOutputs`` is True, otherwise an exception is raised. 

130 

131 Parameters 

132 ---------- 

133 quantum : `~lsst.daf.butler.Quantum` 

134 Quantum to check for existing outputs 

135 butler : `~lsst.daf.butler.Butler` 

136 Data butler. 

137 taskDef : `~lsst.pipe.base.TaskDef` 

138 Task definition structure. 

139 

140 Returns 

141 ------- 

142 exist : `bool` 

143 True if all quantum's outputs exist in a collection and 

144 ``skipExisting`` is True, False otherwise. 

145 

146 Raises 

147 ------ 

148 RuntimeError 

149 Raised if some outputs exist and some not. 

150 """ 

151 collection = butler.run 

152 registry = butler.registry 

153 

154 existingRefs = [] 

155 missingRefs = [] 

156 for datasetRefs in quantum.outputs.values(): 

157 for datasetRef in datasetRefs: 

158 ref = registry.findDataset(datasetRef.datasetType, datasetRef.dataId, 

159 collections=butler.run) 

160 if ref is None: 

161 missingRefs.append(datasetRef) 

162 else: 

163 existingRefs.append(ref) 

164 if existingRefs and missingRefs: 

165 # some outputs exist and some don't, either delete existing ones or complain 

166 _LOG.debug("Partial outputs exist for task %s dataId=%s collection=%s " 

167 "existingRefs=%s missingRefs=%s", 

168 taskDef, quantum.dataId, collection, existingRefs, missingRefs) 

169 if self.clobberPartialOutputs: 

170 _LOG.info("Removing partial outputs for task %s: %s", taskDef, existingRefs) 

171 butler.pruneDatasets(existingRefs, disassociate=True, unstore=True, purge=True) 

172 return False 

173 else: 

174 raise RuntimeError(f"Registry inconsistency while checking for existing outputs:" 

175 f" collection={collection} existingRefs={existingRefs}" 

176 f" missingRefs={missingRefs}") 

177 elif existingRefs: 

178 # complete outputs exist, this is fine only if skipExisting is set 

179 return self.skipExisting 

180 else: 

181 # no outputs exist 

182 return False 

183 

184 def makeTask(self, taskClass, config, butler): 

185 """Make new task instance. 

186 

187 Parameters 

188 ---------- 

189 taskClass : `type` 

190 Sub-class of `~lsst.pipe.base.PipelineTask`. 

191 config : `~lsst.pipe.base.PipelineTaskConfig` 

192 Configuration object for this task 

193 

194 Returns 

195 ------- 

196 task : `~lsst.pipe.base.PipelineTask` 

197 Instance of ``taskClass`` type. 

198 butler : `~lsst.daf.butler.Butler` 

199 Data butler. 

200 """ 

201 # call task factory for that 

202 return self.taskFactory.makeTask(taskClass, config, None, butler) 

203 

204 def updatedQuantumInputs(self, quantum, butler): 

205 """Update quantum with extra information, returns a new updated Quantum. 

206 

207 Some methods may require input DatasetRefs to have non-None 

208 ``dataset_id``, but in case of intermediate dataset it may not be 

209 filled during QuantumGraph construction. This method will retrieve 

210 missing info from registry. 

211 

212 Parameters 

213 ---------- 

214 quantum : `~lsst.daf.butler.Quantum` 

215 Single Quantum instance. 

216 butler : `~lsst.daf.butler.Butler` 

217 Data butler. 

218 

219 Returns 

220 ------- 

221 update : `~lsst.daf.butler.Quantum` 

222 Updated Quantum instance 

223 """ 

224 updatedInputs = defaultdict(list) 

225 for key, refsForDatasetType in quantum.inputs.items(): 

226 newRefsForDatasetType = updatedInputs[key] 

227 for ref in refsForDatasetType: 

228 if ref.id is None: 

229 resolvedRef = butler.registry.findDataset(ref.datasetType, ref.dataId, 

230 collections=butler.collections) 

231 if resolvedRef is None: 

232 raise ValueError( 

233 f"Cannot find {ref.datasetType.name} with id {ref.dataId} " 

234 f"in collections {butler.collections}." 

235 ) 

236 newRefsForDatasetType.append(resolvedRef) 

237 _LOG.debug("Updating dataset ID for %s", ref) 

238 else: 

239 newRefsForDatasetType.append(ref) 

240 return Quantum(taskName=quantum.taskName, 

241 taskClass=quantum.taskClass, 

242 dataId=quantum.dataId, 

243 initInputs=quantum.initInputs, 

244 inputs=updatedInputs, 

245 outputs=quantum.outputs 

246 ) 

247 

248 def runQuantum(self, task, quantum, taskDef, butler): 

249 """Execute task on a single quantum. 

250 

251 Parameters 

252 ---------- 

253 task : `~lsst.pipe.base.PipelineTask` 

254 Task object. 

255 quantum : `~lsst.daf.butler.Quantum` 

256 Single Quantum instance. 

257 taskDef : `~lsst.pipe.base.TaskDef` 

258 Task definition structure. 

259 butler : `~lsst.daf.butler.Butler` 

260 Data butler. 

261 """ 

262 # Create a butler that operates in the context of a quantum 

263 butlerQC = ButlerQuantumContext(butler, quantum) 

264 

265 # Get the input and output references for the task 

266 inputRefs, outputRefs = taskDef.connections.buildDatasetRefs(quantum) 

267 

268 # Call task runQuantum() method. Any exception thrown by the task 

269 # propagates to caller. 

270 task.runQuantum(butlerQC, inputRefs, outputRefs) 

271 

272 if taskDef.metadataDatasetName is not None: 

273 # DatasetRef has to be in the Quantum outputs, can lookup by name 

274 try: 

275 ref = quantum.outputs[taskDef.metadataDatasetName] 

276 except LookupError as exc: 

277 raise LookupError( 

278 f"Quantum outputs is missing metadata dataset type {taskDef.metadataDatasetName}," 

279 f" it could happen due to inconsistent options between Quantum generation" 

280 f" and execution") from exc 

281 butlerQC.put(task.getFullMetadata(), ref[0]) 

282 

283 def initGlobals(self, quantum, butler): 

284 """Initialize global state needed for task execution. 

285 

286 Parameters 

287 ---------- 

288 quantum : `~lsst.daf.butler.Quantum` 

289 Single Quantum instance. 

290 butler : `~lsst.daf.butler.Butler` 

291 Data butler. 

292 

293 Notes 

294 ----- 

295 There is an issue with initializing filters singleton which is done 

296 by instrument, to avoid requiring tasks to do it in runQuantum() 

297 we do it here when any dataId has an instrument dimension. Also for 

298 now we only allow single instrument, verify that all instrument 

299 names in all dataIds are identical. 

300 

301 This will need revision when filter singleton disappears. 

302 """ 

303 oneInstrument = None 

304 for datasetRefs in chain(quantum.inputs.values(), quantum.outputs.values()): 

305 for datasetRef in datasetRefs: 

306 dataId = datasetRef.dataId 

307 instrument = dataId.get("instrument") 

308 if instrument is not None: 

309 if oneInstrument is not None: 

310 assert instrument == oneInstrument, \ 

311 "Currently require that only one instrument is used per graph" 

312 else: 

313 oneInstrument = instrument 

314 Instrument.fromName(instrument, butler.registry)