Coverage for python/lsst/ctrl/mpexec/cli/script/run.py: 38%

11 statements  

« prev     ^ index     » next       coverage.py v7.3.0, created at 2023-09-01 09:30 +0000

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22import logging 

23from types import SimpleNamespace 

24 

25from ... import CmdLineFwk, TaskFactory 

26 

27_log = logging.getLogger(__name__) 

28 

29 

30def run( # type: ignore 

31 pdb, 

32 graph_fixup, 

33 init_only, 

34 no_versions, 

35 processes, 

36 start_method, 

37 profile, 

38 qgraphObj, 

39 register_dataset_types, 

40 skip_init_writes, 

41 timeout, 

42 butler_config, 

43 input, 

44 output, 

45 output_run, 

46 extend_run, 

47 replace_run, 

48 prune_replaced, 

49 data_query, 

50 skip_existing_in, 

51 skip_existing, 

52 debug, 

53 fail_fast, 

54 clobber_outputs, 

55 summary, 

56 mock, 

57 unmocked_dataset_types, 

58 mock_failure, 

59 enable_implicit_threading, 

60 cores_per_quantum: int, 

61 memory_per_quantum: str, 

62 rebase, 

63 **kwargs, 

64): 

65 """Implement the command line interface `pipetask run` subcommand. 

66 

67 Should only be called by command line tools and unit test code that test 

68 this function. 

69 

70 Parameters 

71 ---------- 

72 pdb : `bool` 

73 Drop into pdb on exception? 

74 graph_fixup : `str` 

75 The name of the class or factory method which makes an instance used 

76 for execution graph fixup. 

77 init_only : `bool` 

78 If true, do not actually run; just register dataset types and/or save 

79 init outputs. 

80 no_versions : `bool` 

81 If true, do not save or check package versions. 

82 processes : `int` 

83 The number of processes to use. 

84 start_method : `str` or `None` 

85 Start method from `multiprocessing` module, `None` selects the best 

86 one for current platform. 

87 profile : `str` 

88 File name to dump cProfile information to. 

89 qgraphObj : `lsst.pipe.base.QuantumGraph` 

90 A QuantumGraph generated by a previous subcommand. 

91 register_dataset_types : `bool` 

92 If true, register DatasetTypes that do not already exist in the 

93 Registry. 

94 skip_init_writes : `bool` 

95 If true, do not write collection-wide 'init output' datasets (e.g. 

96 schemas). 

97 timeout : `int` 

98 Timeout for multiprocessing; maximum wall time (sec). 

99 butler_config : `str`, `dict`, or `lsst.daf.butler.Config` 

100 If `str`, `butler_config` is the path location of the gen3 

101 butler/registry config file. If `dict`, `butler_config` is key value 

102 pairs used to init or update the `lsst.daf.butler.Config` instance. If 

103 `Config`, it is the object used to configure a Butler. 

104 input : `list` [ `str` ] 

105 List of names of the input collection(s). 

106 output : `str` 

107 Name of the output CHAINED collection. This may either be an existing 

108 CHAINED collection to use as both input and output (if `input` is 

109 `None`), or a new CHAINED collection created to include all inputs 

110 (if `input` is not `None`). In both cases, the collection's children 

111 will start with an output RUN collection that directly holds all new 

112 datasets (see `output_run`). 

113 output_run : `str` 

114 Name of the new output RUN collection. If not provided then `output` 

115 must be provided and a new RUN collection will be created by appending 

116 a timestamp to the value passed with `output`. If this collection 

117 already exists then `extend_run` must be passed. 

118 extend_run : `bool` 

119 Instead of creating a new RUN collection, insert datasets into either 

120 the one given by `output_run` (if provided) or the first child 

121 collection of `output` (which must be of type RUN). 

122 replace_run : `bool` 

123 Before creating a new RUN collection in an existing CHAINED collection, 

124 remove the first child collection (which must be of type RUN). This can 

125 be used to repeatedly write to the same (parent) collection during 

126 development, but it does not delete the datasets associated with the 

127 replaced run unless `prune-replaced` is also True. Requires `output`, 

128 and `extend_run` must be `None`. 

129 prune_replaced : "unstore", "purge", or `None`. 

130 If not `None`, delete the datasets in the collection replaced by 

131 `replace_run`, either just from the datastore ("unstore") or by 

132 removing them and the RUN completely ("purge"). Requires `replace_run`. 

133 data_query : `str` 

134 User query selection expression. 

135 skip_existing_in : `list` [ `str` ] 

136 Accepts list of collections, if all Quantum outputs already exist in 

137 the specified list of collections then that Quantum will be excluded 

138 from the QuantumGraph. 

139 skip_existing : `bool` 

140 Appends output RUN collection to the ``skip_existing_in`` list. 

141 debug : `bool` 

142 If true, enable debugging output using lsstDebug facility (imports 

143 debug.py). 

144 fail_fast : `bool` 

145 If true then stop processing at first error, otherwise process as many 

146 tasks as possible. 

147 clobber_outputs : `bool` 

148 Remove outputs from previous execution of the same quantum before new 

149 execution. Only applies to failed quanta if skip_existing is also 

150 given. 

151 summary : `str` 

152 File path to store job report in JSON format. 

153 mock : `bool`, optional 

154 If `True` then run mock pipeline instead of real one. Ignored if an 

155 existing QuantumGraph is provided. 

156 unmocked_dataset_types : `collections.abc.Sequence` [ `str` ] 

157 List of overall-input dataset types that should not be mocked. 

158 Ignored if an existing QuantumGraph is provided. 

159 mock_failure : `~collections.abc.Sequence`, optional 

160 List of quanta that should raise exceptions. 

161 enable_implicit_threading : `bool`, optional 

162 If `True`, do not disable implicit threading by third-party libraries. 

163 Implicit threading is always disabled during actual quantum execution 

164 if ``processes > 1``. 

165 cores_per_quantum : `int` 

166 Number of cores that can be used by each quantum. 

167 memory_per_quantum : `str` 

168 Amount of memory that each quantum can be allowed to use. Empty string 

169 implies no limit. The string can be either a single integer (implying 

170 units of MB) or a combination of number and unit. 

171 rebase : `bool` 

172 If `True` then reset output collection chain if it is inconsistent with 

173 the ``inputs``. 

174 kwargs : `dict` [`str`, `str`] 

175 Ignored; click commands may accept options for more than one script 

176 function and pass all the option kwargs to each of the script functions 

177 which ignore these unused kwargs. 

178 """ 

179 args = SimpleNamespace( 

180 pdb=pdb, 

181 graph_fixup=graph_fixup, 

182 init_only=init_only, 

183 no_versions=no_versions, 

184 processes=processes, 

185 start_method=start_method, 

186 profile=profile, 

187 skip_init_writes=skip_init_writes, 

188 timeout=timeout, 

189 register_dataset_types=register_dataset_types, 

190 butler_config=butler_config, 

191 input=input, 

192 output=output, 

193 output_run=output_run, 

194 extend_run=extend_run, 

195 replace_run=replace_run, 

196 prune_replaced=prune_replaced, 

197 data_query=data_query, 

198 skip_existing_in=skip_existing_in, 

199 skip_existing=skip_existing, 

200 enableLsstDebug=debug, 

201 fail_fast=fail_fast, 

202 clobber_outputs=clobber_outputs, 

203 summary=summary, 

204 # Mock options only used by qgraph. 

205 enable_implicit_threading=enable_implicit_threading, 

206 cores_per_quantum=cores_per_quantum, 

207 memory_per_quantum=memory_per_quantum, 

208 rebase=rebase, 

209 ) 

210 

211 f = CmdLineFwk() 

212 taskFactory = TaskFactory() 

213 

214 # If we have no output run specified, use the one from the graph rather 

215 # than letting a new timestamped run be created. 

216 if not args.output_run and qgraphObj.metadata and (output_run := qgraphObj.metadata.get("output_run")): 

217 args.output_run = output_run 

218 

219 f.runPipeline(qgraphObj, taskFactory, args)