Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22__all__ = ['SingleQuantumExecutor'] 

23 

24# ------------------------------- 

25# Imports of standard modules -- 

26# ------------------------------- 

27import logging 

28from itertools import chain 

29 

30# ----------------------------- 

31# Imports for other modules -- 

32# ----------------------------- 

33from lsst.log import Log 

34from lsst.pipe.base import ButlerQuantumContext 

35 

36# ---------------------------------- 

37# Local non-exported definitions -- 

38# ---------------------------------- 

39 

40_LOG = logging.getLogger(__name__.partition(".")[2]) 

41 

42 

43class SingleQuantumExecutor: 

44 """Executor class which runs one Quantum at a time. 

45 

46 Parameters 

47 ---------- 

48 butler : `~lsst.daf.butler.Butler` 

49 Data butler. 

50 taskFactory : `~lsst.pipe.base.TaskFactory` 

51 Instance of a task factory. 

52 skipExisting : `bool`, optional 

53 If True then quanta with all existing outputs are not executed. 

54 clobberOutput : `bool`, optional 

55 It `True` then override all existing output datasets in an output 

56 collection. 

57 enableLsstDebug : `bool`, optional 

58 Enable debugging with ``lsstDebug`` facility for a task. 

59 """ 

60 def __init__(self, butler, taskFactory, skipExisting=False, clobberOutput=False, enableLsstDebug=False): 

61 self.butler = butler 

62 self.taskFactory = taskFactory 

63 self.skipExisting = skipExisting 

64 self.clobberOutput = clobberOutput 

65 self.enableLsstDebug = enableLsstDebug 

66 

67 def execute(self, taskDef, quantum): 

68 """Execute PipelineTask on a single Quantum. 

69 

70 Parameters 

71 ---------- 

72 taskDef : `~lsst.pipe.base.TaskDef` 

73 Task definition structure. 

74 quantum : `~lsst.daf.butler.Quantum` 

75 Single Quantum instance. 

76 """ 

77 taskClass, config = taskDef.taskClass, taskDef.config 

78 self.setupLogging(taskClass, config, quantum) 

79 if self.clobberOutput: 

80 self.doClobberOutputs(quantum) 

81 if self.skipExisting and self.quantumOutputsExist(quantum): 

82 _LOG.info("Quantum execution skipped due to existing outputs, " 

83 f"task={taskClass.__name__} dataId={quantum.dataId}.") 

84 return 

85 self.updateQuantumInputs(quantum) 

86 

87 # enable lsstDebug debugging 

88 if self.enableLsstDebug: 

89 try: 

90 _LOG.debug("Will try to import debug.py") 

91 import debug # noqa:F401 

92 except ImportError: 

93 _LOG.warn("No 'debug' module found.") 

94 

95 task = self.makeTask(taskClass, config) 

96 self.runQuantum(task, quantum, taskDef) 

97 

98 def setupLogging(self, taskClass, config, quantum): 

99 """Configure logging system for execution of this task. 

100 

101 Ths method can setup logging to attach task- or 

102 quantum-specific information to log messages. Potentially this can 

103 take into accout some info from task configuration as well. 

104 

105 Parameters 

106 ---------- 

107 taskClass : `type` 

108 Sub-class of `~lsst.pipe.base.PipelineTask`. 

109 config : `~lsst.pipe.base.PipelineTaskConfig` 

110 Configuration object for this task 

111 quantum : `~lsst.daf.butler.Quantum` 

112 Single Quantum instance. 

113 """ 

114 # include input dataIds into MDC 

115 dataIds = set(ref.dataId for ref in chain.from_iterable(quantum.predictedInputs.values())) 

116 if dataIds: 

117 if len(dataIds) == 1: 

118 Log.MDC("LABEL", str(dataIds.pop())) 

119 else: 

120 Log.MDC("LABEL", '[' + ', '.join([str(dataId) for dataId in dataIds]) + ']') 

121 

122 def doClobberOutputs(self, quantum): 

123 """Delete any outputs that already exist for a Quantum. 

124 

125 Parameters 

126 ---------- 

127 quantum : `~lsst.daf.butler.Quantum` 

128 Quantum to check for existing outputs. 

129 """ 

130 collection = self.butler.run 

131 registry = self.butler.registry 

132 

133 existingRefs = [] 

134 for datasetRefs in quantum.outputs.values(): 

135 for datasetRef in datasetRefs: 

136 ref = registry.find(collection, datasetRef.datasetType, datasetRef.dataId) 

137 if ref is not None: 

138 existingRefs.append(ref) 

139 for ref in existingRefs: 

140 _LOG.debug("Removing existing dataset: %s", ref) 

141 self.butler.remove(ref) 

142 

143 def quantumOutputsExist(self, quantum): 

144 """Decide whether this quantum needs to be executed. 

145 

146 Parameters 

147 ---------- 

148 quantum : `~lsst.daf.butler.Quantum` 

149 Quantum to check for existing outputs 

150 

151 Returns 

152 ------- 

153 exist : `bool` 

154 True if all quantum's outputs exist in a collection, False 

155 otherwise. 

156 

157 Raises 

158 ------ 

159 RuntimeError 

160 Raised if some outputs exist and some not. 

161 """ 

162 collection = self.butler.run 

163 registry = self.butler.registry 

164 

165 existingRefs = [] 

166 missingRefs = [] 

167 for datasetRefs in quantum.outputs.values(): 

168 for datasetRef in datasetRefs: 

169 ref = registry.find(collection, datasetRef.datasetType, datasetRef.dataId) 

170 if ref is None: 

171 missingRefs.append(datasetRefs) 

172 else: 

173 existingRefs.append(datasetRefs) 

174 if existingRefs and missingRefs: 

175 # some outputs exist and same not, can't do a thing with that 

176 raise RuntimeError(f"Registry inconsistency while checking for existing outputs:" 

177 f" collection={collection} existingRefs={existingRefs}" 

178 f" missingRefs={missingRefs}") 

179 else: 

180 return bool(existingRefs) 

181 

182 def makeTask(self, taskClass, config): 

183 """Make new task instance. 

184 

185 Parameters 

186 ---------- 

187 taskClass : `type` 

188 Sub-class of `~lsst.pipe.base.PipelineTask`. 

189 config : `~lsst.pipe.base.PipelineTaskConfig` 

190 Configuration object for this task 

191 

192 Returns 

193 ------- 

194 task : `~lsst.pipe.base.PipelineTask` 

195 Instance of ``taskClass`` type. 

196 """ 

197 # call task factory for that 

198 return self.taskFactory.makeTask(taskClass, config, None, self.butler) 

199 

200 def updateQuantumInputs(self, quantum): 

201 """Update quantum with extra information. 

202 

203 Some methods may require input DatasetRefs to have non-None 

204 ``dataset_id``, but in case of intermediate dataset it may not be 

205 filled during QuantumGraph construction. This method will retrieve 

206 missing info from registry. 

207 

208 Parameters 

209 ---------- 

210 quantum : `~lsst.daf.butler.Quantum` 

211 Single Quantum instance. 

212 """ 

213 butler = self.butler 

214 for refsForDatasetType in quantum.predictedInputs.values(): 

215 newRefsForDatasetType = [] 

216 for ref in refsForDatasetType: 

217 if ref.id is None: 

218 resolvedRef = butler.registry.find(butler.collection, ref.datasetType, ref.dataId) 

219 if resolvedRef is None: 

220 raise ValueError( 

221 f"Cannot find {ref.datasetType.name} with id {ref.dataId} " 

222 f"in collection {butler.collection}." 

223 ) 

224 newRefsForDatasetType.append(resolvedRef) 

225 _LOG.debug("Updating dataset ID for %s", ref) 

226 else: 

227 newRefsForDatasetType.append(ref) 

228 refsForDatasetType[:] = newRefsForDatasetType 

229 

230 def runQuantum(self, task, quantum, taskDef): 

231 """Execute task on a single quantum. 

232 

233 Parameters 

234 ---------- 

235 task : `~lsst.pipe.base.PipelineTask` 

236 Task object. 

237 quantum : `~lsst.daf.butler.Quantum` 

238 Single Quantum instance. 

239 taskDef : `~lsst.pipe.base.TaskDef` 

240 Task definition structure. 

241 """ 

242 # Create a butler that operates in the context of a quantum 

243 butlerQC = ButlerQuantumContext(self.butler, quantum) 

244 

245 # Get the input and output references for the task 

246 connectionInstance = task.config.connections.ConnectionsClass(config=task.config) 

247 inputRefs, outputRefs = connectionInstance.buildDatasetRefs(quantum) 

248 # Call task runQuantum() method. Any exception thrown by the task 

249 # propagates to caller. 

250 task.runQuantum(butlerQC, inputRefs, outputRefs) 

251 

252 if taskDef.metadataDatasetName is not None: 

253 # DatasetRef has to be in the Quantum outputs, can lookup by name 

254 try: 

255 ref = quantum.outputs[taskDef.metadataDatasetName] 

256 except LookupError as exc: 

257 raise LookupError( 

258 f"Quantum outputs is missing metadata dataset type {taskDef.metadataDatasetName}," 

259 f" it could happen due to inconsistent options between Quantum generation" 

260 f" and execution") from exc 

261 butlerQC.put(task.metadata, ref[0])