Coverage for python/lsst/ctrl/mpexec/mock_task.py: 20%

87 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-02-01 10:07 +0000

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22import logging 

23from typing import Any, List, Optional, Union 

24 

25from lsst.daf.butler import Butler, DatasetRef, Quantum 

26from lsst.pex.config import Field 

27from lsst.pipe.base import ( 

28 ButlerQuantumContext, 

29 DeferredDatasetRef, 

30 InputQuantizedConnection, 

31 OutputQuantizedConnection, 

32 PipelineTask, 

33 PipelineTaskConfig, 

34 PipelineTaskConnections, 

35) 

36from lsst.utils import doImportType 

37from lsst.utils.introspection import get_full_type_name 

38 

39from .dataid_match import DataIdMatch 

40 

41_LOG = logging.getLogger(__name__) 

42 

43 

44class MockButlerQuantumContext(ButlerQuantumContext): 

45 """Implementation of ButlerQuantumContext to use with a mock task. 

46 

47 Parameters 

48 ---------- 

49 butler : `~lsst.daf.butler.Butler` 

50 Data butler instance. 

51 quantum : `~lsst.daf.butler.Quantum` 

52 Execution quantum. 

53 

54 Notes 

55 ----- 

56 This implementation overrides get method to try to retrieve dataset from a 

57 mock dataset type if it exists. Get method always returns a dictionary. 

58 Put method stores the data with a mock dataset type, but also registers 

59 DatasetRef with registry using original dataset type. 

60 """ 

61 

62 def __init__(self, butler: Butler, quantum: Quantum): 

63 super().__init__(butler=butler, limited=butler, quantum=quantum) 

64 self.butler = butler 

65 self.registry = butler.registry 

66 

67 @classmethod 

68 def mockDatasetTypeName(cls, datasetTypeName: str) -> str: 

69 """Make mock dataset type name from actual dataset type name.""" 

70 return "_mock_" + datasetTypeName 

71 

72 def _get(self, ref: Optional[Union[DeferredDatasetRef, DatasetRef]]) -> Any: 

73 # docstring is inherited from the base class 

74 if ref is None: 

75 return None 

76 if isinstance(ref, DeferredDatasetRef): 

77 ref = ref.datasetRef 

78 datasetType = ref.datasetType 

79 

80 typeName, component = datasetType.nameAndComponent() 

81 if component is not None: 

82 mockDatasetTypeName = self.mockDatasetTypeName(typeName) 

83 else: 

84 mockDatasetTypeName = self.mockDatasetTypeName(datasetType.name) 

85 

86 try: 

87 mockDatasetType = self.butler.registry.getDatasetType(mockDatasetTypeName) 

88 ref = DatasetRef(mockDatasetType, ref.dataId) 

89 data = self.butler.get(ref) 

90 except KeyError: 

91 data = super()._get(ref) 

92 # If the input as an actual non-mock data then we want to replace 

93 # it with a provenance data which will be stored as a part of 

94 # output dataset. 

95 data = { 

96 "ref": { 

97 "dataId": {key.name: ref.dataId[key] for key in ref.dataId.keys()}, 

98 "datasetType": ref.datasetType.name, 

99 }, 

100 "type": get_full_type_name(type(data)), 

101 } 

102 if component is not None: 

103 data.update(component=component) 

104 return data 

105 

106 def _put(self, value: Any, ref: DatasetRef) -> None: 

107 # docstring is inherited from the base class 

108 

109 mockDatasetType = self.registry.getDatasetType(self.mockDatasetTypeName(ref.datasetType.name)) 

110 mockRef = DatasetRef(mockDatasetType, ref.dataId) 

111 value.setdefault("ref", {}).update(datasetType=mockDatasetType.name) 

112 self.butler.put(value, mockRef) 

113 

114 # also "store" non-mock refs, make sure it is not resolved. 

115 self.registry._importDatasets([ref.unresolved()]) 

116 

117 def _checkMembership(self, ref: Union[List[DatasetRef], DatasetRef], inout: set) -> None: 

118 # docstring is inherited from the base class 

119 return 

120 

121 

122class MockPipelineTaskConfig(PipelineTaskConfig, pipelineConnections=PipelineTaskConnections): 

123 

124 failCondition: Field[str] = Field( 

125 dtype=str, 

126 default="", 

127 doc=( 

128 "Condition on DataId to raise an exception. String expression which includes attributes of " 

129 "quantum DataId using a syntax of daf_butler user expressions (e.g. 'visit = 123')." 

130 ), 

131 ) 

132 

133 failException: Field[str] = Field( 

134 dtype=str, 

135 default="builtins.ValueError", 

136 doc=( 

137 "Class name of the exception to raise when fail condition is triggered. Can be " 

138 "'lsst.pipe.base.NoWorkFound' to specify non-failure exception." 

139 ), 

140 ) 

141 

142 def dataIdMatch(self) -> Optional[DataIdMatch]: 

143 if not self.failCondition: 

144 return None 

145 return DataIdMatch(self.failCondition) 

146 

147 

148class MockPipelineTask(PipelineTask): 

149 """Implementation of PipelineTask used for running a mock pipeline. 

150 

151 Notes 

152 ----- 

153 This class overrides `runQuantum` to read all input datasetRefs and to 

154 store simple dictionary as output data. Output dictionary contains some 

155 provenance data about inputs, the task that produced it, and corresponding 

156 quantum. This class depends on `MockButlerQuantumContext` which knows how 

157 to store the output dictionary data with special dataset types. 

158 """ 

159 

160 ConfigClass = MockPipelineTaskConfig 

161 

162 def __init__(self, *, config: Optional[MockPipelineTaskConfig] = None, **kwargs: Any): 

163 super().__init__(config=config, **kwargs) 

164 

165 self.failException: Optional[type] = None 

166 self.dataIdMatch: Optional[DataIdMatch] = None 

167 if config is not None: 

168 self.dataIdMatch = config.dataIdMatch() 

169 if self.dataIdMatch: 

170 self.failException = doImportType(config.failException) 

171 

172 def runQuantum( 

173 self, 

174 butlerQC: ButlerQuantumContext, 

175 inputRefs: InputQuantizedConnection, 

176 outputRefs: OutputQuantizedConnection, 

177 ) -> None: 

178 # docstring is inherited from the base class 

179 quantum = butlerQC.quantum 

180 

181 _LOG.info("Mocking execution of task '%s' on quantum %s", self.getName(), quantum.dataId) 

182 

183 assert quantum.dataId is not None, "Quantum DataId cannot be None" 

184 

185 # Possibly raise an exception. 

186 if self.dataIdMatch is not None and self.dataIdMatch.match(quantum.dataId): 

187 _LOG.info("Simulating failure of task '%s' on quantum %s", self.getName(), quantum.dataId) 

188 message = f"Simulated failure: task={self.getName()} dataId={quantum.dataId}" 

189 assert self.failException is not None, "Exception type must be defined" 

190 raise self.failException(message) 

191 

192 # read all inputs 

193 inputs = butlerQC.get(inputRefs) 

194 

195 _LOG.info("Read input data for task '%s' on quantum %s", self.getName(), quantum.dataId) 

196 

197 # To avoid very deep provenance we trim inputs to a single level 

198 for name, data in inputs.items(): 

199 if isinstance(data, dict): 

200 data = [data] 

201 if isinstance(data, list): 

202 for item in data: 

203 qdata = item.get("quantum", {}) 

204 qdata.pop("inputs", None) 

205 

206 # store mock outputs 

207 for name, refs in outputRefs: 

208 if not isinstance(refs, list): 

209 refs = [refs] 

210 for ref in refs: 

211 data = { 

212 "ref": { 

213 "dataId": {key.name: ref.dataId[key] for key in ref.dataId.keys()}, 

214 "datasetType": ref.datasetType.name, 

215 }, 

216 "quantum": { 

217 "task": self.getName(), 

218 "dataId": {key.name: quantum.dataId[key] for key in quantum.dataId.keys()}, 

219 "inputs": inputs, 

220 }, 

221 "outputName": name, 

222 } 

223 butlerQC.put(data, ref) 

224 

225 _LOG.info("Finished mocking task '%s' on quantum %s", self.getName(), quantum.dataId)