Coverage for python/lsst/ctrl/mpexec/cli/script/run.py: 26%

17 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2024-04-25 10:28 -0700

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28import logging 

29from types import SimpleNamespace 

30 

31from lsst.utils.threads import disable_implicit_threading 

32 

33from ... import CmdLineFwk, TaskFactory 

34 

35_log = logging.getLogger(__name__) 

36 

37 

38def run( # type: ignore 

39 pdb, 

40 graph_fixup, 

41 init_only, 

42 no_versions, 

43 processes, 

44 start_method, 

45 profile, 

46 qgraphObj, 

47 register_dataset_types, 

48 skip_init_writes, 

49 timeout, 

50 butler_config, 

51 input, 

52 output, 

53 output_run, 

54 extend_run, 

55 replace_run, 

56 prune_replaced, 

57 data_query, 

58 skip_existing_in, 

59 skip_existing, 

60 debug, 

61 fail_fast, 

62 clobber_outputs, 

63 summary, 

64 mock, 

65 unmocked_dataset_types, 

66 mock_failure, 

67 enable_implicit_threading, 

68 cores_per_quantum: int, 

69 memory_per_quantum: str, 

70 rebase, 

71 **kwargs, 

72): 

73 """Implement the command line interface `pipetask run` subcommand. 

74 

75 Should only be called by command line tools and unit test code that test 

76 this function. 

77 

78 Parameters 

79 ---------- 

80 pdb : `bool` 

81 Drop into pdb on exception or not. 

82 graph_fixup : `str` 

83 The name of the class or factory method which makes an instance used 

84 for execution graph fixup. 

85 init_only : `bool` 

86 If true, do not actually run; just register dataset types and/or save 

87 init outputs. 

88 no_versions : `bool` 

89 If true, do not save or check package versions. 

90 processes : `int` 

91 The number of processes to use. 

92 start_method : `str` or `None` 

93 Start method from `multiprocessing` module, `None` selects the best 

94 one for current platform. 

95 profile : `str` 

96 File name to dump cProfile information to. 

97 qgraphObj : `lsst.pipe.base.QuantumGraph` 

98 A QuantumGraph generated by a previous subcommand. 

99 register_dataset_types : `bool` 

100 If true, register DatasetTypes that do not already exist in the 

101 Registry. 

102 skip_init_writes : `bool` 

103 If true, do not write collection-wide 'init output' datasets (e.g. 

104 schemas). 

105 timeout : `int` 

106 Timeout for multiprocessing; maximum wall time (sec). 

107 butler_config : `str`, `dict`, or `lsst.daf.butler.Config` 

108 If `str`, `butler_config` is the path location of the gen3 

109 butler/registry config file. If `dict`, `butler_config` is key value 

110 pairs used to init or update the `lsst.daf.butler.Config` instance. If 

111 `Config`, it is the object used to configure a Butler. 

112 input : `list` [ `str` ] 

113 List of names of the input collection(s). 

114 output : `str` 

115 Name of the output CHAINED collection. This may either be an existing 

116 CHAINED collection to use as both input and output (if `input` is 

117 `None`), or a new CHAINED collection created to include all inputs 

118 (if `input` is not `None`). In both cases, the collection's children 

119 will start with an output RUN collection that directly holds all new 

120 datasets (see `output_run`). 

121 output_run : `str` 

122 Name of the new output RUN collection. If not provided then `output` 

123 must be provided and a new RUN collection will be created by appending 

124 a timestamp to the value passed with `output`. If this collection 

125 already exists then `extend_run` must be passed. 

126 extend_run : `bool` 

127 Instead of creating a new RUN collection, insert datasets into either 

128 the one given by `output_run` (if provided) or the first child 

129 collection of `output` (which must be of type RUN). 

130 replace_run : `bool` 

131 Before creating a new RUN collection in an existing CHAINED collection, 

132 remove the first child collection (which must be of type RUN). This can 

133 be used to repeatedly write to the same (parent) collection during 

134 development, but it does not delete the datasets associated with the 

135 replaced run unless `prune-replaced` is also True. Requires `output`, 

136 and `extend_run` must be `None`. 

137 prune_replaced : "unstore", "purge", or `None` 

138 If not `None`, delete the datasets in the collection replaced by 

139 `replace_run`, either just from the datastore ("unstore") or by 

140 removing them and the RUN completely ("purge"). Requires `replace_run`. 

141 data_query : `str` 

142 User query selection expression. 

143 skip_existing_in : `list` [ `str` ] 

144 Accepts list of collections, if all Quantum outputs already exist in 

145 the specified list of collections then that Quantum will be excluded 

146 from the QuantumGraph. 

147 skip_existing : `bool` 

148 Appends output RUN collection to the ``skip_existing_in`` list. 

149 debug : `bool` 

150 If true, enable debugging output using lsstDebug facility (imports 

151 debug.py). 

152 fail_fast : `bool` 

153 If true then stop processing at first error, otherwise process as many 

154 tasks as possible. 

155 clobber_outputs : `bool` 

156 Remove outputs from previous execution of the same quantum before new 

157 execution. Only applies to failed quanta if skip_existing is also 

158 given. 

159 summary : `str` 

160 File path to store job report in JSON format. 

161 mock : `bool`, optional 

162 If `True` then run mock pipeline instead of real one. Ignored if an 

163 existing QuantumGraph is provided. 

164 unmocked_dataset_types : `collections.abc.Sequence` [ `str` ] 

165 List of overall-input dataset types that should not be mocked. 

166 Ignored if an existing QuantumGraph is provided. 

167 mock_failure : `~collections.abc.Sequence`, optional 

168 List of quanta that should raise exceptions. 

169 enable_implicit_threading : `bool`, optional 

170 If `True`, do not disable implicit threading by third-party libraries. 

171 Implicit threading is always disabled during actual quantum execution 

172 if ``processes > 1``. 

173 cores_per_quantum : `int` 

174 Number of cores that can be used by each quantum. 

175 memory_per_quantum : `str` 

176 Amount of memory that each quantum can be allowed to use. Empty string 

177 implies no limit. The string can be either a single integer (implying 

178 units of MB) or a combination of number and unit. 

179 rebase : `bool` 

180 If `True` then reset output collection chain if it is inconsistent with 

181 the ``inputs``. 

182 **kwargs : `dict` [`str`, `str`] 

183 Ignored; click commands may accept options for more than one script 

184 function and pass all the option kwargs to each of the script functions 

185 which ignore these unused kwargs. 

186 """ 

187 # Fork option still exists for compatibility but we use spawn instead. 

188 if start_method == "fork": 

189 start_method = "spawn" 

190 _log.warning("Option --start-method=fork is unsafe and no longer supported, will use spawn instead.") 

191 

192 if not enable_implicit_threading: 

193 disable_implicit_threading() 

194 

195 args = SimpleNamespace( 

196 pdb=pdb, 

197 graph_fixup=graph_fixup, 

198 init_only=init_only, 

199 no_versions=no_versions, 

200 processes=processes, 

201 start_method=start_method, 

202 profile=profile, 

203 skip_init_writes=skip_init_writes, 

204 timeout=timeout, 

205 register_dataset_types=register_dataset_types, 

206 butler_config=butler_config, 

207 input=input, 

208 output=output, 

209 output_run=output_run, 

210 extend_run=extend_run, 

211 replace_run=replace_run, 

212 prune_replaced=prune_replaced, 

213 data_query=data_query, 

214 skip_existing_in=skip_existing_in, 

215 skip_existing=skip_existing, 

216 enableLsstDebug=debug, 

217 fail_fast=fail_fast, 

218 clobber_outputs=clobber_outputs, 

219 summary=summary, 

220 # Mock options only used by qgraph. 

221 enable_implicit_threading=enable_implicit_threading, 

222 cores_per_quantum=cores_per_quantum, 

223 memory_per_quantum=memory_per_quantum, 

224 rebase=rebase, 

225 ) 

226 

227 f = CmdLineFwk() 

228 taskFactory = TaskFactory() 

229 

230 # If we have no output run specified, use the one from the graph rather 

231 # than letting a new timestamped run be created. 

232 if not args.output_run and qgraphObj.metadata and (output_run := qgraphObj.metadata.get("output_run")): 

233 args.output_run = output_run 

234 

235 f.runPipeline(qgraphObj, taskFactory, args)