Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22__all__ = ['SingleQuantumExecutor'] 

23 

24# ------------------------------- 

25# Imports of standard modules -- 

26# ------------------------------- 

27import logging 

28from itertools import chain 

29 

30# ----------------------------- 

31# Imports for other modules -- 

32# ----------------------------- 

33from .quantumGraphExecutor import QuantumExecutor 

34from lsst.log import Log 

35from lsst.obs.base import Instrument 

36from lsst.pipe.base import ButlerQuantumContext 

37 

38# ---------------------------------- 

39# Local non-exported definitions -- 

40# ---------------------------------- 

41 

42_LOG = logging.getLogger(__name__.partition(".")[2]) 

43 

44 

45class SingleQuantumExecutor(QuantumExecutor): 

46 """Executor class which runs one Quantum at a time. 

47 

48 Parameters 

49 ---------- 

50 butler : `~lsst.daf.butler.Butler` 

51 Data butler. 

52 taskFactory : `~lsst.pipe.base.TaskFactory` 

53 Instance of a task factory. 

54 skipExisting : `bool`, optional 

55 If True then quanta with all existing outputs are not executed. 

56 clobberPartialOutputs : `bool`, optional 

57 If True then delete any partial outputs from quantum execution. If 

58 complete outputs exists then exception is raise if ``skipExisting`` is 

59 False. 

60 enableLsstDebug : `bool`, optional 

61 Enable debugging with ``lsstDebug`` facility for a task. 

62 """ 

63 def __init__(self, taskFactory, skipExisting=False, clobberPartialOutputs=False, enableLsstDebug=False): 

64 self.taskFactory = taskFactory 

65 self.skipExisting = skipExisting 

66 self.enableLsstDebug = enableLsstDebug 

67 self.clobberPartialOutputs = clobberPartialOutputs 

68 

69 def execute(self, taskDef, quantum, butler): 

70 # Docstring inherited from QuantumExecutor.execute 

71 taskClass, config = taskDef.taskClass, taskDef.config 

72 self.setupLogging(taskClass, config, quantum) 

73 

74 # check whether to skip or delete old outputs 

75 if self.checkExistingOutputs(quantum, butler, taskDef): 

76 _LOG.info("Quantum execution skipped due to existing outputs, " 

77 f"task={taskClass.__name__} dataId={quantum.dataId}.") 

78 return 

79 

80 self.updateQuantumInputs(quantum, butler) 

81 

82 # enable lsstDebug debugging 

83 if self.enableLsstDebug: 

84 try: 

85 _LOG.debug("Will try to import debug.py") 

86 import debug # noqa:F401 

87 except ImportError: 

88 _LOG.warn("No 'debug' module found.") 

89 

90 # initialize global state 

91 self.initGlobals(quantum, butler) 

92 

93 task = self.makeTask(taskClass, config, butler) 

94 self.runQuantum(task, quantum, taskDef, butler) 

95 

96 def setupLogging(self, taskClass, config, quantum): 

97 """Configure logging system for execution of this task. 

98 

99 Ths method can setup logging to attach task- or 

100 quantum-specific information to log messages. Potentially this can 

101 take into accout some info from task configuration as well. 

102 

103 Parameters 

104 ---------- 

105 taskClass : `type` 

106 Sub-class of `~lsst.pipe.base.PipelineTask`. 

107 config : `~lsst.pipe.base.PipelineTaskConfig` 

108 Configuration object for this task 

109 quantum : `~lsst.daf.butler.Quantum` 

110 Single Quantum instance. 

111 """ 

112 # include input dataIds into MDC 

113 dataIds = set(ref.dataId for ref in chain.from_iterable(quantum.predictedInputs.values())) 

114 if dataIds: 

115 if len(dataIds) == 1: 

116 Log.MDC("LABEL", str(dataIds.pop())) 

117 else: 

118 Log.MDC("LABEL", '[' + ', '.join([str(dataId) for dataId in dataIds]) + ']') 

119 

120 def checkExistingOutputs(self, quantum, butler, taskDef): 

121 """Decide whether this quantum needs to be executed. 

122 

123 If only partial outputs exist then they are removed if 

124 ``clobberPartialOutputs`` is True, otherwise an exception is raised. 

125 

126 Parameters 

127 ---------- 

128 quantum : `~lsst.daf.butler.Quantum` 

129 Quantum to check for existing outputs 

130 butler : `~lsst.daf.butler.Butler` 

131 Data butler. 

132 taskDef : `~lsst.pipe.base.TaskDef` 

133 Task definition structure. 

134 

135 Returns 

136 ------- 

137 exist : `bool` 

138 True if all quantum's outputs exist in a collection and 

139 ``skipExisting`` is True, False otherwise. 

140 

141 Raises 

142 ------ 

143 RuntimeError 

144 Raised if some outputs exist and some not. 

145 """ 

146 collection = butler.run 

147 registry = butler.registry 

148 

149 existingRefs = [] 

150 missingRefs = [] 

151 for datasetRefs in quantum.outputs.values(): 

152 for datasetRef in datasetRefs: 

153 ref = registry.findDataset(datasetRef.datasetType, datasetRef.dataId, 

154 collections=butler.run) 

155 if ref is None: 

156 missingRefs.append(datasetRef) 

157 else: 

158 existingRefs.append(ref) 

159 if existingRefs and missingRefs: 

160 # some outputs exist and some don't, either delete existing ones or complain 

161 _LOG.debug("Partial outputs exist for task %s dataId=%s collection=%s " 

162 "existingRefs=%s missingRefs=%s", 

163 taskDef, quantum.dataId, collection, existingRefs, missingRefs) 

164 if self.clobberPartialOutputs: 

165 _LOG.info("Removing partial outputs for task %s: %s", taskDef, existingRefs) 

166 butler.pruneDatasets(existingRefs, disassociate=True, unstore=True, purge=True) 

167 return False 

168 else: 

169 raise RuntimeError(f"Registry inconsistency while checking for existing outputs:" 

170 f" collection={collection} existingRefs={existingRefs}" 

171 f" missingRefs={missingRefs}") 

172 elif existingRefs: 

173 # complete outputs exist, this is fine only if skipExisting is set 

174 return self.skipExisting 

175 else: 

176 # no outputs exist 

177 return False 

178 

179 def makeTask(self, taskClass, config, butler): 

180 """Make new task instance. 

181 

182 Parameters 

183 ---------- 

184 taskClass : `type` 

185 Sub-class of `~lsst.pipe.base.PipelineTask`. 

186 config : `~lsst.pipe.base.PipelineTaskConfig` 

187 Configuration object for this task 

188 

189 Returns 

190 ------- 

191 task : `~lsst.pipe.base.PipelineTask` 

192 Instance of ``taskClass`` type. 

193 butler : `~lsst.daf.butler.Butler` 

194 Data butler. 

195 """ 

196 # call task factory for that 

197 return self.taskFactory.makeTask(taskClass, config, None, butler) 

198 

199 def updateQuantumInputs(self, quantum, butler): 

200 """Update quantum with extra information. 

201 

202 Some methods may require input DatasetRefs to have non-None 

203 ``dataset_id``, but in case of intermediate dataset it may not be 

204 filled during QuantumGraph construction. This method will retrieve 

205 missing info from registry. 

206 

207 Parameters 

208 ---------- 

209 quantum : `~lsst.daf.butler.Quantum` 

210 Single Quantum instance. 

211 butler : `~lsst.daf.butler.Butler` 

212 Data butler. 

213 """ 

214 for refsForDatasetType in quantum.predictedInputs.values(): 

215 newRefsForDatasetType = [] 

216 for ref in refsForDatasetType: 

217 if ref.id is None: 

218 resolvedRef = butler.registry.findDataset(ref.datasetType, ref.dataId, 

219 collections=butler.collections) 

220 if resolvedRef is None: 

221 raise ValueError( 

222 f"Cannot find {ref.datasetType.name} with id {ref.dataId} " 

223 f"in collections {butler.collections}." 

224 ) 

225 newRefsForDatasetType.append(resolvedRef) 

226 _LOG.debug("Updating dataset ID for %s", ref) 

227 else: 

228 newRefsForDatasetType.append(ref) 

229 refsForDatasetType[:] = newRefsForDatasetType 

230 

231 def runQuantum(self, task, quantum, taskDef, butler): 

232 """Execute task on a single quantum. 

233 

234 Parameters 

235 ---------- 

236 task : `~lsst.pipe.base.PipelineTask` 

237 Task object. 

238 quantum : `~lsst.daf.butler.Quantum` 

239 Single Quantum instance. 

240 taskDef : `~lsst.pipe.base.TaskDef` 

241 Task definition structure. 

242 butler : `~lsst.daf.butler.Butler` 

243 Data butler. 

244 """ 

245 # Create a butler that operates in the context of a quantum 

246 butlerQC = ButlerQuantumContext(butler, quantum) 

247 

248 # Get the input and output references for the task 

249 connectionInstance = task.config.connections.ConnectionsClass(config=task.config) 

250 inputRefs, outputRefs = connectionInstance.buildDatasetRefs(quantum) 

251 

252 # Call task runQuantum() method. Any exception thrown by the task 

253 # propagates to caller. 

254 task.runQuantum(butlerQC, inputRefs, outputRefs) 

255 

256 if taskDef.metadataDatasetName is not None: 

257 # DatasetRef has to be in the Quantum outputs, can lookup by name 

258 try: 

259 ref = quantum.outputs[taskDef.metadataDatasetName] 

260 except LookupError as exc: 

261 raise LookupError( 

262 f"Quantum outputs is missing metadata dataset type {taskDef.metadataDatasetName}," 

263 f" it could happen due to inconsistent options between Quantum generation" 

264 f" and execution") from exc 

265 butlerQC.put(task.getFullMetadata(), ref[0]) 

266 

267 def initGlobals(self, quantum, butler): 

268 """Initialize global state needed for task execution. 

269 

270 Parameters 

271 ---------- 

272 quantum : `~lsst.daf.butler.Quantum` 

273 Single Quantum instance. 

274 butler : `~lsst.daf.butler.Butler` 

275 Data butler. 

276 

277 Notes 

278 ----- 

279 There is an issue with initializing filters singleton which is done 

280 by instrument, to avoid requiring tasks to do it in runQuantum() 

281 we do it here when any dataId has an instrument dimension. Also for 

282 now we only allow single instrument, verify that all instrument 

283 names in all dataIds are identical. 

284 

285 This will need revision when filter singleton disappears. 

286 """ 

287 oneInstrument = None 

288 for datasetRefs in chain(quantum.predictedInputs.values(), quantum.outputs.values()): 

289 for datasetRef in datasetRefs: 

290 dataId = datasetRef.dataId 

291 instrument = dataId.get("instrument") 

292 if instrument is not None: 

293 if oneInstrument is not None: 

294 assert instrument == oneInstrument, \ 

295 "Currently require that only one instrument is used per graph" 

296 else: 

297 oneInstrument = instrument 

298 Instrument.fromName(instrument, butler.registry)