Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22__all__ = ['PreExecInit'] 

23 

24# ------------------------------- 

25# Imports of standard modules -- 

26# ------------------------------- 

27import logging 

28import itertools 

29 

30# ----------------------------- 

31# Imports for other modules -- 

32# ----------------------------- 

33from lsst.pipe.base import PipelineDatasetTypes 

34 

35_LOG = logging.getLogger(__name__.partition(".")[2]) 

36 

37 

38class PreExecInit: 

39 """Initialization of registry for QuantumGraph execution. 

40 

41 This class encapsulates all necessary operations that have to be performed 

42 on butler and registry to prepare them for QuantumGraph execution. 

43 

44 Parameters 

45 ---------- 

46 butler : `~lsst.daf.butler.Butler` 

47 Data butler instance. 

48 taskFactory : `~lsst.pipe.base.TaskFactory` 

49 Task factory. 

50 skipExisting : `bool`, optional 

51 If `True` then do not try to overwrite any datasets that might exist 

52 in the butler. If `False` then any existing conflicting dataset will 

53 cause butler exception. 

54 """ 

55 def __init__(self, butler, taskFactory, skipExisting=False): 

56 self.butler = butler 

57 self.taskFactory = taskFactory 

58 self.skipExisting = skipExisting 

59 

60 def initialize(self, graph, saveInitOutputs=True, registerDatasetTypes=False): 

61 """Perform all initialization steps. 

62 

63 Convenience method to execute all initialization steps. Instead of 

64 calling this method and providing all options it is also possible to 

65 call methods individually. 

66 

67 Parameters 

68 ---------- 

69 graph : `~lsst.pipe.base.QuantumGraph` 

70 Execution graph. 

71 saveInitOutputs : `bool`, optional 

72 If ``True`` (default) then save task "init outputs" to butler. 

73 registerDatasetTypes : `bool`, optional 

74 If ``True`` then register dataset types in registry, otherwise 

75 they must be already registered. 

76 """ 

77 # register dataset types or check consistency 

78 self.initializeDatasetTypes(graph, registerDatasetTypes) 

79 

80 # Save task initialization data or check that saved data 

81 # is consistent with what tasks would save 

82 if saveInitOutputs: 

83 self.saveInitOutputs(graph) 

84 

85 def initializeDatasetTypes(self, graph, registerDatasetTypes=False): 

86 """Save or check DatasetTypes output by the tasks in a graph. 

87 

88 Iterates over all DatasetTypes for all tasks in a graph and either 

89 tries to add them to registry or compares them to exising ones. 

90 

91 Parameters 

92 ---------- 

93 graph : `~lsst.pipe.base.QuantumGraph` 

94 Execution graph. 

95 registerDatasetTypes : `bool`, optional 

96 If ``True`` then register dataset types in registry, otherwise 

97 they must be already registered. 

98 

99 Raises 

100 ------ 

101 ValueError 

102 Raised if existing DatasetType is different from DatasetType 

103 in a graph. 

104 KeyError 

105 Raised if ``registerDatasetTypes`` is ``False`` and DatasetType 

106 does not exist in registry. 

107 """ 

108 pipeline = list(nodes.taskDef for nodes in graph) 

109 datasetTypes = PipelineDatasetTypes.fromPipeline(pipeline, registry=self.butler.registry) 

110 for datasetType in itertools.chain(datasetTypes.initIntermediates, datasetTypes.initOutputs, 

111 datasetTypes.intermediates, datasetTypes.outputs): 

112 if registerDatasetTypes: 

113 _LOG.debug("Registering DatasetType %s with registry", datasetType) 

114 # this is a no-op if it already exists and is consistent, 

115 # and it raises if it is inconsistent. 

116 self.butler.registry.registerDatasetType(datasetType) 

117 else: 

118 _LOG.debug("Checking DatasetType %s against registry", datasetType) 

119 expected = self.butler.registry.getDatasetType(datasetType.name) 

120 if expected != datasetType: 

121 raise ValueError(f"DatasetType configuration does not match Registry: " 

122 f"{datasetType} != {expected}") 

123 

124 def saveInitOutputs(self, graph): 

125 """Write any datasets produced by initializing tasks in a graph. 

126 

127 Parameters 

128 ---------- 

129 graph : `~lsst.pipe.base.QuantumGraph` 

130 Execution graph. 

131 

132 Raises 

133 ------ 

134 Exception 

135 Raised if ``skipExisting`` is `False` and datasets already 

136 exists. Content of a butler collection may be changed if 

137 exception is raised. 

138 

139 Note 

140 ---- 

141 If ``skipExisting`` is `True` then existing datasets are not 

142 overwritten, instead we should check that their stored object is 

143 exactly the same as what we would save at this time. Comparing 

144 arbitrary types of object is, of course, non-trivial. Current 

145 implementation only checks the existence of the datasets and their 

146 types against the types of objects produced by tasks. Ideally we 

147 would like to check that object data is identical too but presently 

148 there is no generic way to compare objects. In the future we can 

149 potentially introduce some extensible mechanism for that. 

150 """ 

151 _LOG.debug("Will save InitOutputs for all tasks") 

152 for taskNodes in graph: 

153 taskDef = taskNodes.taskDef 

154 task = self.taskFactory.makeTask(taskDef.taskClass, taskDef.config, None, self.butler) 

155 for name in taskDef.connections.initOutputs: 

156 attribute = getattr(taskDef.connections, name) 

157 initOutputVar = getattr(task, name) 

158 objFromStore = None 

159 if self.skipExisting: 

160 # check if it is there already 

161 _LOG.debug("Retrieving InitOutputs for task=%s key=%s dsTypeName=%s", 

162 task, name, attribute.name) 

163 objFromStore = self.butler.get(attribute.name, {}) 

164 if objFromStore is not None: 

165 # Types are supposed to be identical. 

166 # TODO: Check that object contents is identical too. 

167 if type(objFromStore) is not type(initOutputVar): 

168 raise TypeError(f"Stored initOutput object type {type(objFromStore)} " 

169 f"is different from task-generated type " 

170 f"{type(initOutputVar)} for task {taskDef}") 

171 if objFromStore is None: 

172 # butler will raise exception if dataset is already there 

173 _LOG.debug("Saving InitOutputs for task=%s key=%s", task, name) 

174 self.butler.put(initOutputVar, attribute.name, {})