Coverage for python/lsst/ctrl/mpexec/mock_task.py: 19%

91 statements  

« prev     ^ index     » next       coverage.py v7.2.3, created at 2023-04-20 10:51 +0000

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22import logging 

23import warnings 

24from typing import Any, List, Optional, Union 

25 

26from lsst.daf.butler import Butler, DatasetRef, Quantum, UnresolvedRefWarning 

27from lsst.pex.config import Field 

28from lsst.pipe.base import ( 

29 ButlerQuantumContext, 

30 DeferredDatasetRef, 

31 InputQuantizedConnection, 

32 OutputQuantizedConnection, 

33 PipelineTask, 

34 PipelineTaskConfig, 

35 PipelineTaskConnections, 

36) 

37from lsst.utils import doImportType 

38from lsst.utils.introspection import get_full_type_name 

39 

40from .dataid_match import DataIdMatch 

41 

42_LOG = logging.getLogger(__name__) 

43 

44 

45class MockButlerQuantumContext(ButlerQuantumContext): 

46 """Implementation of ButlerQuantumContext to use with a mock task. 

47 

48 Parameters 

49 ---------- 

50 butler : `~lsst.daf.butler.Butler` 

51 Data butler instance. 

52 quantum : `~lsst.daf.butler.Quantum` 

53 Execution quantum. 

54 

55 Notes 

56 ----- 

57 This implementation overrides get method to try to retrieve dataset from a 

58 mock dataset type if it exists. Get method always returns a dictionary. 

59 Put method stores the data with a mock dataset type, but also registers 

60 DatasetRef with registry using original dataset type. 

61 """ 

62 

63 def __init__(self, butler: Butler, quantum: Quantum): 

64 super().__init__(butler=butler, limited=butler, quantum=quantum) 

65 self.butler = butler 

66 self.registry = butler.registry 

67 

68 @classmethod 

69 def mockDatasetTypeName(cls, datasetTypeName: str) -> str: 

70 """Make mock dataset type name from actual dataset type name.""" 

71 return "_mock_" + datasetTypeName 

72 

73 def _get(self, ref: Optional[Union[DeferredDatasetRef, DatasetRef]]) -> Any: 

74 # docstring is inherited from the base class 

75 if ref is None: 

76 return None 

77 if isinstance(ref, DeferredDatasetRef): 

78 ref = ref.datasetRef 

79 datasetType = ref.datasetType 

80 

81 typeName, component = datasetType.nameAndComponent() 

82 if component is not None: 

83 mockDatasetTypeName = self.mockDatasetTypeName(typeName) 

84 else: 

85 mockDatasetTypeName = self.mockDatasetTypeName(datasetType.name) 

86 

87 try: 

88 # Try to use the mock DatasetType if it is defined. 

89 mockDatasetType = self.butler.registry.getDatasetType(mockDatasetTypeName) 

90 data = self.butler.get(mockDatasetType, ref.dataId) 

91 except KeyError: 

92 data = super()._get(ref) 

93 # If the input as an actual non-mock data then we want to replace 

94 # it with a provenance data which will be stored as a part of 

95 # output dataset. 

96 data = { 

97 "ref": { 

98 "dataId": {key.name: ref.dataId[key] for key in ref.dataId.keys()}, 

99 "datasetType": ref.datasetType.name, 

100 }, 

101 "type": get_full_type_name(type(data)), 

102 } 

103 if component is not None: 

104 data.update(component=component) 

105 return data 

106 

107 def _put(self, value: Any, ref: DatasetRef) -> None: 

108 # docstring is inherited from the base class 

109 

110 mockDatasetType = self.registry.getDatasetType(self.mockDatasetTypeName(ref.datasetType.name)) 

111 with warnings.catch_warnings(): 

112 warnings.simplefilter("ignore", category=UnresolvedRefWarning) 

113 mockRef = DatasetRef(mockDatasetType, ref.dataId) 

114 value.setdefault("ref", {}).update(datasetType=mockDatasetType.name) 

115 self.butler.put(value, mockRef) 

116 

117 # also "store" non-mock refs, make sure it is not resolved. 

118 with warnings.catch_warnings(): 

119 warnings.simplefilter("ignore", category=UnresolvedRefWarning) 

120 self.registry._importDatasets([ref.unresolved()]) 

121 

122 def _checkMembership(self, ref: Union[List[DatasetRef], DatasetRef], inout: set) -> None: 

123 # docstring is inherited from the base class 

124 return 

125 

126 

127class MockPipelineTaskConfig(PipelineTaskConfig, pipelineConnections=PipelineTaskConnections): 

128 failCondition: Field[str] = Field( 

129 dtype=str, 

130 default="", 

131 doc=( 

132 "Condition on DataId to raise an exception. String expression which includes attributes of " 

133 "quantum DataId using a syntax of daf_butler user expressions (e.g. 'visit = 123')." 

134 ), 

135 ) 

136 

137 failException: Field[str] = Field( 

138 dtype=str, 

139 default="builtins.ValueError", 

140 doc=( 

141 "Class name of the exception to raise when fail condition is triggered. Can be " 

142 "'lsst.pipe.base.NoWorkFound' to specify non-failure exception." 

143 ), 

144 ) 

145 

146 def dataIdMatch(self) -> Optional[DataIdMatch]: 

147 if not self.failCondition: 

148 return None 

149 return DataIdMatch(self.failCondition) 

150 

151 

152class MockPipelineTask(PipelineTask): 

153 """Implementation of PipelineTask used for running a mock pipeline. 

154 

155 Notes 

156 ----- 

157 This class overrides `runQuantum` to read all input datasetRefs and to 

158 store simple dictionary as output data. Output dictionary contains some 

159 provenance data about inputs, the task that produced it, and corresponding 

160 quantum. This class depends on `MockButlerQuantumContext` which knows how 

161 to store the output dictionary data with special dataset types. 

162 """ 

163 

164 ConfigClass = MockPipelineTaskConfig 

165 

166 def __init__(self, *, config: Optional[MockPipelineTaskConfig] = None, **kwargs: Any): 

167 super().__init__(config=config, **kwargs) 

168 

169 self.failException: Optional[type] = None 

170 self.dataIdMatch: Optional[DataIdMatch] = None 

171 if config is not None: 

172 self.dataIdMatch = config.dataIdMatch() 

173 if self.dataIdMatch: 

174 self.failException = doImportType(config.failException) 

175 

176 def runQuantum( 

177 self, 

178 butlerQC: ButlerQuantumContext, 

179 inputRefs: InputQuantizedConnection, 

180 outputRefs: OutputQuantizedConnection, 

181 ) -> None: 

182 # docstring is inherited from the base class 

183 quantum = butlerQC.quantum 

184 

185 _LOG.info("Mocking execution of task '%s' on quantum %s", self.getName(), quantum.dataId) 

186 

187 assert quantum.dataId is not None, "Quantum DataId cannot be None" 

188 

189 # Possibly raise an exception. 

190 if self.dataIdMatch is not None and self.dataIdMatch.match(quantum.dataId): 

191 _LOG.info("Simulating failure of task '%s' on quantum %s", self.getName(), quantum.dataId) 

192 message = f"Simulated failure: task={self.getName()} dataId={quantum.dataId}" 

193 assert self.failException is not None, "Exception type must be defined" 

194 raise self.failException(message) 

195 

196 # read all inputs 

197 inputs = butlerQC.get(inputRefs) 

198 

199 _LOG.info("Read input data for task '%s' on quantum %s", self.getName(), quantum.dataId) 

200 

201 # To avoid very deep provenance we trim inputs to a single level 

202 for name, data in inputs.items(): 

203 if isinstance(data, dict): 

204 data = [data] 

205 if isinstance(data, list): 

206 for item in data: 

207 qdata = item.get("quantum", {}) 

208 qdata.pop("inputs", None) 

209 

210 # store mock outputs 

211 for name, refs in outputRefs: 

212 if not isinstance(refs, list): 

213 refs = [refs] 

214 for ref in refs: 

215 data = { 

216 "ref": { 

217 "dataId": {key.name: ref.dataId[key] for key in ref.dataId.keys()}, 

218 "datasetType": ref.datasetType.name, 

219 }, 

220 "quantum": { 

221 "task": self.getName(), 

222 "dataId": {key.name: quantum.dataId[key] for key in quantum.dataId.keys()}, 

223 "inputs": inputs, 

224 }, 

225 "outputName": name, 

226 } 

227 butlerQC.put(data, ref) 

228 

229 _LOG.info("Finished mocking task '%s' on quantum %s", self.getName(), quantum.dataId)