Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22__all__ = ['SingleQuantumExecutor'] 

23 

24# ------------------------------- 

25# Imports of standard modules -- 

26# ------------------------------- 

27import logging 

28from itertools import chain 

29 

30# ----------------------------- 

31# Imports for other modules -- 

32# ----------------------------- 

33from lsst.log import Log 

34from lsst.pipe.base import ButlerQuantumContext 

35 

36# ---------------------------------- 

37# Local non-exported definitions -- 

38# ---------------------------------- 

39 

40_LOG = logging.getLogger(__name__.partition(".")[2]) 

41 

42 

43class SingleQuantumExecutor: 

44 """Executor class which runs one Quantum at a time. 

45 

46 Parameters 

47 ---------- 

48 butler : `~lsst.daf.butler.Butler` 

49 Data butler. 

50 taskFactory : `~lsst.pipe.base.TaskFactory` 

51 Instance of a task factory. 

52 skipExisting : `bool`, optional 

53 If True then quanta with all existing outputs are not executed. 

54 enableLsstDebug : `bool`, optional 

55 Enable debugging with ``lsstDebug`` facility for a task. 

56 """ 

57 def __init__(self, butler, taskFactory, skipExisting=False, enableLsstDebug=False): 

58 self.butler = butler 

59 self.taskFactory = taskFactory 

60 self.skipExisting = skipExisting 

61 self.enableLsstDebug = enableLsstDebug 

62 

63 def execute(self, taskDef, quantum): 

64 """Execute PipelineTask on a single Quantum. 

65 

66 Parameters 

67 ---------- 

68 taskDef : `~lsst.pipe.base.TaskDef` 

69 Task definition structure. 

70 quantum : `~lsst.daf.butler.Quantum` 

71 Single Quantum instance. 

72 """ 

73 taskClass, config = taskDef.taskClass, taskDef.config 

74 self.setupLogging(taskClass, config, quantum) 

75 if self.skipExisting and self.quantumOutputsExist(quantum): 

76 _LOG.info("Quantum execution skipped due to existing outputs, " 

77 f"task={taskClass.__name__} dataId={quantum.dataId}.") 

78 return 

79 self.updateQuantumInputs(quantum) 

80 

81 # enable lsstDebug debugging 

82 if self.enableLsstDebug: 

83 try: 

84 _LOG.debug("Will try to import debug.py") 

85 import debug # noqa:F401 

86 except ImportError: 

87 _LOG.warn("No 'debug' module found.") 

88 

89 task = self.makeTask(taskClass, config) 

90 self.runQuantum(task, quantum, taskDef) 

91 

92 def setupLogging(self, taskClass, config, quantum): 

93 """Configure logging system for execution of this task. 

94 

95 Ths method can setup logging to attach task- or 

96 quantum-specific information to log messages. Potentially this can 

97 take into accout some info from task configuration as well. 

98 

99 Parameters 

100 ---------- 

101 taskClass : `type` 

102 Sub-class of `~lsst.pipe.base.PipelineTask`. 

103 config : `~lsst.pipe.base.PipelineTaskConfig` 

104 Configuration object for this task 

105 quantum : `~lsst.daf.butler.Quantum` 

106 Single Quantum instance. 

107 """ 

108 # include input dataIds into MDC 

109 dataIds = set(ref.dataId for ref in chain.from_iterable(quantum.predictedInputs.values())) 

110 if dataIds: 

111 if len(dataIds) == 1: 

112 Log.MDC("LABEL", str(dataIds.pop())) 

113 else: 

114 Log.MDC("LABEL", '[' + ', '.join([str(dataId) for dataId in dataIds]) + ']') 

115 

116 def quantumOutputsExist(self, quantum): 

117 """Decide whether this quantum needs to be executed. 

118 

119 Parameters 

120 ---------- 

121 quantum : `~lsst.daf.butler.Quantum` 

122 Quantum to check for existing outputs 

123 

124 Returns 

125 ------- 

126 exist : `bool` 

127 True if all quantum's outputs exist in a collection, False 

128 otherwise. 

129 

130 Raises 

131 ------ 

132 RuntimeError 

133 Raised if some outputs exist and some not. 

134 """ 

135 collection = self.butler.run 

136 registry = self.butler.registry 

137 

138 existingRefs = [] 

139 missingRefs = [] 

140 for datasetRefs in quantum.outputs.values(): 

141 for datasetRef in datasetRefs: 

142 ref = registry.findDataset(datasetRef.datasetType, datasetRef.dataId, 

143 collections=self.butler.run) 

144 if ref is None: 

145 missingRefs.append(datasetRefs) 

146 else: 

147 existingRefs.append(datasetRefs) 

148 if existingRefs and missingRefs: 

149 # some outputs exist and same not, can't do a thing with that 

150 raise RuntimeError(f"Registry inconsistency while checking for existing outputs:" 

151 f" collection={collection} existingRefs={existingRefs}" 

152 f" missingRefs={missingRefs}") 

153 else: 

154 return bool(existingRefs) 

155 

156 def makeTask(self, taskClass, config): 

157 """Make new task instance. 

158 

159 Parameters 

160 ---------- 

161 taskClass : `type` 

162 Sub-class of `~lsst.pipe.base.PipelineTask`. 

163 config : `~lsst.pipe.base.PipelineTaskConfig` 

164 Configuration object for this task 

165 

166 Returns 

167 ------- 

168 task : `~lsst.pipe.base.PipelineTask` 

169 Instance of ``taskClass`` type. 

170 """ 

171 # call task factory for that 

172 return self.taskFactory.makeTask(taskClass, config, None, self.butler) 

173 

174 def updateQuantumInputs(self, quantum): 

175 """Update quantum with extra information. 

176 

177 Some methods may require input DatasetRefs to have non-None 

178 ``dataset_id``, but in case of intermediate dataset it may not be 

179 filled during QuantumGraph construction. This method will retrieve 

180 missing info from registry. 

181 

182 Parameters 

183 ---------- 

184 quantum : `~lsst.daf.butler.Quantum` 

185 Single Quantum instance. 

186 """ 

187 butler = self.butler 

188 for refsForDatasetType in quantum.predictedInputs.values(): 

189 newRefsForDatasetType = [] 

190 for ref in refsForDatasetType: 

191 if ref.id is None: 

192 resolvedRef = butler.registry.findDataset(ref.datasetType, ref.dataId, 

193 collections=butler.collections) 

194 if resolvedRef is None: 

195 raise ValueError( 

196 f"Cannot find {ref.datasetType.name} with id {ref.dataId} " 

197 f"in collections {butler.collections}." 

198 ) 

199 newRefsForDatasetType.append(resolvedRef) 

200 _LOG.debug("Updating dataset ID for %s", ref) 

201 else: 

202 newRefsForDatasetType.append(ref) 

203 refsForDatasetType[:] = newRefsForDatasetType 

204 

205 def runQuantum(self, task, quantum, taskDef): 

206 """Execute task on a single quantum. 

207 

208 Parameters 

209 ---------- 

210 task : `~lsst.pipe.base.PipelineTask` 

211 Task object. 

212 quantum : `~lsst.daf.butler.Quantum` 

213 Single Quantum instance. 

214 taskDef : `~lsst.pipe.base.TaskDef` 

215 Task definition structure. 

216 """ 

217 # Create a butler that operates in the context of a quantum 

218 butlerQC = ButlerQuantumContext(self.butler, quantum) 

219 

220 # Get the input and output references for the task 

221 connectionInstance = task.config.connections.ConnectionsClass(config=task.config) 

222 inputRefs, outputRefs = connectionInstance.buildDatasetRefs(quantum) 

223 # Call task runQuantum() method. Any exception thrown by the task 

224 # propagates to caller. 

225 task.runQuantum(butlerQC, inputRefs, outputRefs) 

226 

227 if taskDef.metadataDatasetName is not None: 

228 # DatasetRef has to be in the Quantum outputs, can lookup by name 

229 try: 

230 ref = quantum.outputs[taskDef.metadataDatasetName] 

231 except LookupError as exc: 

232 raise LookupError( 

233 f"Quantum outputs is missing metadata dataset type {taskDef.metadataDatasetName}," 

234 f" it could happen due to inconsistent options between Quantum generation" 

235 f" and execution") from exc 

236 butlerQC.put(task.metadata, ref[0])