Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

# This file is part of ctrl_mpexec. 

# 

# Developed for the LSST Data Management System. 

# This product includes software developed by the LSST Project 

# (http://www.lsst.org). 

# See the COPYRIGHT file at the top-level directory of this distribution 

# for details of code ownership. 

# 

# This program is free software: you can redistribute it and/or modify 

# it under the terms of the GNU General Public License as published by 

# the Free Software Foundation, either version 3 of the License, or 

# (at your option) any later version. 

# 

# This program is distributed in the hope that it will be useful, 

# but WITHOUT ANY WARRANTY; without even the implied warranty of 

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

# GNU General Public License for more details. 

# 

# You should have received a copy of the GNU General Public License 

# along with this program. If not, see <http://www.gnu.org/licenses/>. 

 

__all__ = ['SingleQuantumExecutor'] 

 

# ------------------------------- 

# Imports of standard modules -- 

# ------------------------------- 

import logging 

from itertools import chain 

 

# ----------------------------- 

# Imports for other modules -- 

# ----------------------------- 

from lsst.log import Log 

from lsst.pipe.base import ButlerQuantumContext 

 

# ---------------------------------- 

# Local non-exported definitions -- 

# ---------------------------------- 

 

_LOG = logging.getLogger(__name__.partition(".")[2]) 

 

 

class SingleQuantumExecutor: 

"""Executor class which runs one Quantum at a time. 

 

Parameters 

---------- 

butler : `~lsst.daf.butler.Butler` 

Data butler. 

taskFactory : `~lsst.pipe.base.TaskFactory` 

Instance of a task factory. 

skipExisting : `bool`, optional 

If True then quanta with all existing outputs are not executed. 

clobberOutput : `bool`, optional 

It `True` then override all existing output datasets in an output 

collection. 

enableLsstDebug : `bool`, optional 

Enable debugging with ``lsstDebug`` facility for a task. 

""" 

def __init__(self, butler, taskFactory, skipExisting=False, clobberOutput=False, enableLsstDebug=False): 

self.butler = butler 

self.taskFactory = taskFactory 

self.skipExisting = skipExisting 

self.clobberOutput = clobberOutput 

self.enableLsstDebug = enableLsstDebug 

 

def execute(self, taskDef, quantum): 

"""Execute PipelineTask on a single Quantum. 

 

Parameters 

---------- 

taskDef : `~lsst.pipe.base.TaskDef` 

Task definition structure. 

quantum : `~lsst.daf.butler.Quantum` 

Single Quantum instance. 

""" 

taskClass, config = taskDef.taskClass, taskDef.config 

self.setupLogging(taskClass, config, quantum) 

if self.clobberOutput: 

self.doClobberOutputs(quantum) 

if self.skipExisting and self.quantumOutputsExist(quantum): 

_LOG.info("Quantum execution skipped due to existing outputs, " 

f"task={taskClass.__name__} dataId={quantum.dataId}.") 

return 

self.updateQuantumInputs(quantum) 

 

# enable lsstDebug debugging 

if self.enableLsstDebug: 

try: 

_LOG.debug("Will try to import debug.py") 

import debug # noqa:F401 

except ImportError: 

_LOG.warn("No 'debug' module found.") 

 

task = self.makeTask(taskClass, config) 

self.runQuantum(task, quantum, taskDef) 

 

def setupLogging(self, taskClass, config, quantum): 

"""Configure logging system for execution of this task. 

 

Ths method can setup logging to attach task- or 

quantum-specific information to log messages. Potentially this can 

take into accout some info from task configuration as well. 

 

Parameters 

---------- 

taskClass : `type` 

Sub-class of `~lsst.pipe.base.PipelineTask`. 

config : `~lsst.pipe.base.PipelineTaskConfig` 

Configuration object for this task 

quantum : `~lsst.daf.butler.Quantum` 

Single Quantum instance. 

""" 

# include input dataIds into MDC 

dataIds = set(ref.dataId for ref in chain.from_iterable(quantum.predictedInputs.values())) 

if dataIds: 

if len(dataIds) == 1: 

Log.MDC("LABEL", str(dataIds.pop())) 

else: 

Log.MDC("LABEL", '[' + ', '.join([str(dataId) for dataId in dataIds]) + ']') 

 

def doClobberOutputs(self, quantum): 

"""Delete any outputs that already exist for a Quantum. 

 

Parameters 

---------- 

quantum : `~lsst.daf.butler.Quantum` 

Quantum to check for existing outputs. 

""" 

collection = self.butler.run.collection 

registry = self.butler.registry 

 

existingRefs = [] 

for datasetRefs in quantum.outputs.values(): 

for datasetRef in datasetRefs: 

ref = registry.find(collection, datasetRef.datasetType, datasetRef.dataId) 

if ref is not None: 

existingRefs.append(ref) 

for ref in existingRefs: 

_LOG.debug("Removing existing dataset: %s", ref) 

self.butler.remove(ref) 

 

def quantumOutputsExist(self, quantum): 

"""Decide whether this quantum needs to be executed. 

 

Parameters 

---------- 

quantum : `~lsst.daf.butler.Quantum` 

Quantum to check for existing outputs 

 

Returns 

------- 

exist : `bool` 

True if all quantum's outputs exist in a collection, False 

otherwise. 

 

Raises 

------ 

RuntimeError 

Raised if some outputs exist and some not. 

""" 

collection = self.butler.run.collection 

registry = self.butler.registry 

 

existingRefs = [] 

missingRefs = [] 

for datasetRefs in quantum.outputs.values(): 

for datasetRef in datasetRefs: 

ref = registry.find(collection, datasetRef.datasetType, datasetRef.dataId) 

if ref is None: 

missingRefs.append(datasetRefs) 

else: 

existingRefs.append(datasetRefs) 

if existingRefs and missingRefs: 

# some outputs exist and same not, can't do a thing with that 

raise RuntimeError(f"Registry inconsistency while checking for existing outputs:" 

f" collection={collection} existingRefs={existingRefs}" 

f" missingRefs={missingRefs}") 

else: 

return bool(existingRefs) 

 

def makeTask(self, taskClass, config): 

"""Make new task instance. 

 

Parameters 

---------- 

taskClass : `type` 

Sub-class of `~lsst.pipe.base.PipelineTask`. 

config : `~lsst.pipe.base.PipelineTaskConfig` 

Configuration object for this task 

 

Returns 

------- 

task : `~lsst.pipe.base.PipelineTask` 

Instance of ``taskClass`` type. 

""" 

# call task factory for that 

return self.taskFactory.makeTask(taskClass, config, None, self.butler) 

 

def updateQuantumInputs(self, quantum): 

"""Update quantum with extra information. 

 

Some methods may require input DatasetRefs to have non-None 

``dataset_id``, but in case of intermediate dataset it may not be 

filled during QuantumGraph construction. This method will retrieve 

missing info from registry. 

 

Parameters 

---------- 

quantum : `~lsst.daf.butler.Quantum` 

Single Quantum instance. 

""" 

butler = self.butler 

for refsForDatasetType in quantum.predictedInputs.values(): 

newRefsForDatasetType = [] 

for ref in refsForDatasetType: 

if ref.id is None: 

resolvedRef = butler.registry.find(butler.collection, ref.datasetType, ref.dataId) 

if resolvedRef is None: 

raise ValueError( 

f"Cannot find {ref.datasetType.name} with id {ref.dataId} " 

f"in collection {butler.collection}." 

) 

newRefsForDatasetType.append(resolvedRef) 

_LOG.debug("Updating dataset ID for %s", ref) 

else: 

newRefsForDatasetType.append(ref) 

refsForDatasetType[:] = newRefsForDatasetType 

 

def runQuantum(self, task, quantum, taskDef): 

"""Execute task on a single quantum. 

 

Parameters 

---------- 

task : `~lsst.pipe.base.PipelineTask` 

Task object. 

quantum : `~lsst.daf.butler.Quantum` 

Single Quantum instance. 

taskDef : `~lsst.pipe.base.TaskDef` 

Task definition structure. 

""" 

# Create a butler that operates in the context of a quantum 

butlerQC = ButlerQuantumContext(self.butler, quantum) 

 

# Get the input and output references for the task 

connectionInstance = task.config.connections.ConnectionsClass(config=task.config) 

inputRefs, outputRefs = connectionInstance.buildDatasetRefs(quantum) 

# Call task runQuantum() method. Any exception thrown by the task 

# propagates to caller. 

task.runQuantum(butlerQC, inputRefs, outputRefs) 

 

if taskDef.metadataDatasetName is not None: 

# DatasetRef has to be in the Quantum outputs, can lookup by name 

try: 

ref = quantum.outputs[taskDef.metadataDatasetName] 

except LookupError as exc: 

raise LookupError( 

f"Quantum outputs is missing metadata dataset type {taskDef.metadataDatasetName}," 

f" it could happen due to inconsistent options between Quantum generation" 

f" and execution") from exc 

butlerQC.put(task.metadata, ref[0])