Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22__all__ = ['SingleQuantumExecutor'] 

23 

24# ------------------------------- 

25# Imports of standard modules -- 

26# ------------------------------- 

27import logging 

28from itertools import chain 

29 

30# ----------------------------- 

31# Imports for other modules -- 

32# ----------------------------- 

33from .quantumGraphExecutor import QuantumExecutor 

34from lsst.log import Log 

35from lsst.obs.base import Instrument 

36from lsst.pipe.base import ButlerQuantumContext 

37 

38# ---------------------------------- 

39# Local non-exported definitions -- 

40# ---------------------------------- 

41 

42_LOG = logging.getLogger(__name__.partition(".")[2]) 

43 

44 

45class SingleQuantumExecutor(QuantumExecutor): 

46 """Executor class which runs one Quantum at a time. 

47 

48 Parameters 

49 ---------- 

50 butler : `~lsst.daf.butler.Butler` 

51 Data butler. 

52 taskFactory : `~lsst.pipe.base.TaskFactory` 

53 Instance of a task factory. 

54 skipExisting : `bool`, optional 

55 If True then quanta with all existing outputs are not executed. 

56 enableLsstDebug : `bool`, optional 

57 Enable debugging with ``lsstDebug`` facility for a task. 

58 """ 

59 def __init__(self, taskFactory, skipExisting=False, enableLsstDebug=False): 

60 self.taskFactory = taskFactory 

61 self.skipExisting = skipExisting 

62 self.enableLsstDebug = enableLsstDebug 

63 

64 def execute(self, taskDef, quantum, butler): 

65 # Docstring inherited from QuantumExecutor.execute 

66 taskClass, config = taskDef.taskClass, taskDef.config 

67 self.setupLogging(taskClass, config, quantum) 

68 if self.skipExisting and self.quantumOutputsExist(quantum, butler): 

69 _LOG.info("Quantum execution skipped due to existing outputs, " 

70 f"task={taskClass.__name__} dataId={quantum.dataId}.") 

71 return 

72 self.updateQuantumInputs(quantum, butler) 

73 

74 # enable lsstDebug debugging 

75 if self.enableLsstDebug: 

76 try: 

77 _LOG.debug("Will try to import debug.py") 

78 import debug # noqa:F401 

79 except ImportError: 

80 _LOG.warn("No 'debug' module found.") 

81 

82 # initialize global state 

83 self.initGlobals(quantum, butler) 

84 

85 task = self.makeTask(taskClass, config, butler) 

86 self.runQuantum(task, quantum, taskDef, butler) 

87 

88 def setupLogging(self, taskClass, config, quantum): 

89 """Configure logging system for execution of this task. 

90 

91 Ths method can setup logging to attach task- or 

92 quantum-specific information to log messages. Potentially this can 

93 take into accout some info from task configuration as well. 

94 

95 Parameters 

96 ---------- 

97 taskClass : `type` 

98 Sub-class of `~lsst.pipe.base.PipelineTask`. 

99 config : `~lsst.pipe.base.PipelineTaskConfig` 

100 Configuration object for this task 

101 quantum : `~lsst.daf.butler.Quantum` 

102 Single Quantum instance. 

103 """ 

104 # include input dataIds into MDC 

105 dataIds = set(ref.dataId for ref in chain.from_iterable(quantum.predictedInputs.values())) 

106 if dataIds: 

107 if len(dataIds) == 1: 

108 Log.MDC("LABEL", str(dataIds.pop())) 

109 else: 

110 Log.MDC("LABEL", '[' + ', '.join([str(dataId) for dataId in dataIds]) + ']') 

111 

112 def quantumOutputsExist(self, quantum, butler): 

113 """Decide whether this quantum needs to be executed. 

114 

115 Parameters 

116 ---------- 

117 quantum : `~lsst.daf.butler.Quantum` 

118 Quantum to check for existing outputs 

119 butler : `~lsst.daf.butler.Butler` 

120 Data butler. 

121 

122 Returns 

123 ------- 

124 exist : `bool` 

125 True if all quantum's outputs exist in a collection, False 

126 otherwise. 

127 

128 Raises 

129 ------ 

130 RuntimeError 

131 Raised if some outputs exist and some not. 

132 """ 

133 collection = butler.run 

134 registry = butler.registry 

135 

136 existingRefs = [] 

137 missingRefs = [] 

138 for datasetRefs in quantum.outputs.values(): 

139 for datasetRef in datasetRefs: 

140 ref = registry.findDataset(datasetRef.datasetType, datasetRef.dataId, 

141 collections=butler.run) 

142 if ref is None: 

143 missingRefs.append(datasetRefs) 

144 else: 

145 existingRefs.append(datasetRefs) 

146 if existingRefs and missingRefs: 

147 # some outputs exist and same not, can't do a thing with that 

148 raise RuntimeError(f"Registry inconsistency while checking for existing outputs:" 

149 f" collection={collection} existingRefs={existingRefs}" 

150 f" missingRefs={missingRefs}") 

151 else: 

152 return bool(existingRefs) 

153 

154 def makeTask(self, taskClass, config, butler): 

155 """Make new task instance. 

156 

157 Parameters 

158 ---------- 

159 taskClass : `type` 

160 Sub-class of `~lsst.pipe.base.PipelineTask`. 

161 config : `~lsst.pipe.base.PipelineTaskConfig` 

162 Configuration object for this task 

163 

164 Returns 

165 ------- 

166 task : `~lsst.pipe.base.PipelineTask` 

167 Instance of ``taskClass`` type. 

168 butler : `~lsst.daf.butler.Butler` 

169 Data butler. 

170 """ 

171 # call task factory for that 

172 return self.taskFactory.makeTask(taskClass, config, None, butler) 

173 

174 def updateQuantumInputs(self, quantum, butler): 

175 """Update quantum with extra information. 

176 

177 Some methods may require input DatasetRefs to have non-None 

178 ``dataset_id``, but in case of intermediate dataset it may not be 

179 filled during QuantumGraph construction. This method will retrieve 

180 missing info from registry. 

181 

182 Parameters 

183 ---------- 

184 quantum : `~lsst.daf.butler.Quantum` 

185 Single Quantum instance. 

186 butler : `~lsst.daf.butler.Butler` 

187 Data butler. 

188 """ 

189 for refsForDatasetType in quantum.predictedInputs.values(): 

190 newRefsForDatasetType = [] 

191 for ref in refsForDatasetType: 

192 if ref.id is None: 

193 resolvedRef = butler.registry.findDataset(ref.datasetType, ref.dataId, 

194 collections=butler.collections) 

195 if resolvedRef is None: 

196 raise ValueError( 

197 f"Cannot find {ref.datasetType.name} with id {ref.dataId} " 

198 f"in collections {butler.collections}." 

199 ) 

200 newRefsForDatasetType.append(resolvedRef) 

201 _LOG.debug("Updating dataset ID for %s", ref) 

202 else: 

203 newRefsForDatasetType.append(ref) 

204 refsForDatasetType[:] = newRefsForDatasetType 

205 

206 def runQuantum(self, task, quantum, taskDef, butler): 

207 """Execute task on a single quantum. 

208 

209 Parameters 

210 ---------- 

211 task : `~lsst.pipe.base.PipelineTask` 

212 Task object. 

213 quantum : `~lsst.daf.butler.Quantum` 

214 Single Quantum instance. 

215 taskDef : `~lsst.pipe.base.TaskDef` 

216 Task definition structure. 

217 butler : `~lsst.daf.butler.Butler` 

218 Data butler. 

219 """ 

220 # Create a butler that operates in the context of a quantum 

221 butlerQC = ButlerQuantumContext(butler, quantum) 

222 

223 # Get the input and output references for the task 

224 connectionInstance = task.config.connections.ConnectionsClass(config=task.config) 

225 inputRefs, outputRefs = connectionInstance.buildDatasetRefs(quantum) 

226 

227 # Call task runQuantum() method. Any exception thrown by the task 

228 # propagates to caller. 

229 task.runQuantum(butlerQC, inputRefs, outputRefs) 

230 

231 if taskDef.metadataDatasetName is not None: 

232 # DatasetRef has to be in the Quantum outputs, can lookup by name 

233 try: 

234 ref = quantum.outputs[taskDef.metadataDatasetName] 

235 except LookupError as exc: 

236 raise LookupError( 

237 f"Quantum outputs is missing metadata dataset type {taskDef.metadataDatasetName}," 

238 f" it could happen due to inconsistent options between Quantum generation" 

239 f" and execution") from exc 

240 butlerQC.put(task.metadata, ref[0]) 

241 

242 def initGlobals(self, quantum, butler): 

243 """Initialize global state needed for task execution. 

244 

245 Parameters 

246 ---------- 

247 quantum : `~lsst.daf.butler.Quantum` 

248 Single Quantum instance. 

249 butler : `~lsst.daf.butler.Butler` 

250 Data butler. 

251 

252 Notes 

253 ----- 

254 There is an issue with initializing filters singleton which is done 

255 by instrument, to avoid requiring tasks to do it in runQuantum() 

256 we do it here when any dataId has an instrument dimension. Also for 

257 now we only allow single instrument, verify that all instrument 

258 names in all dataIds are identical. 

259 

260 This will need revision when filter singleton disappears. 

261 """ 

262 oneInstrument = None 

263 for datasetRefs in chain(quantum.predictedInputs.values(), quantum.outputs.values()): 

264 for datasetRef in datasetRefs: 

265 dataId = datasetRef.dataId 

266 instrument = dataId.get("instrument") 

267 if instrument is not None: 

268 if oneInstrument is not None: 

269 assert instrument == oneInstrument, \ 

270 "Currently require that only one instrument is used per graph" 

271 else: 

272 oneInstrument = instrument 

273 Instrument.fromName(instrument, butler.registry)