Coverage for python/lsst/ctrl/mpexec/cli/script/run.py: 38%

11 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-11-04 09:49 +0000

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28import logging 

29from types import SimpleNamespace 

30 

31from ... import CmdLineFwk, TaskFactory 

32 

33_log = logging.getLogger(__name__) 

34 

35 

36def run( # type: ignore 

37 pdb, 

38 graph_fixup, 

39 init_only, 

40 no_versions, 

41 processes, 

42 start_method, 

43 profile, 

44 qgraphObj, 

45 register_dataset_types, 

46 skip_init_writes, 

47 timeout, 

48 butler_config, 

49 input, 

50 output, 

51 output_run, 

52 extend_run, 

53 replace_run, 

54 prune_replaced, 

55 data_query, 

56 skip_existing_in, 

57 skip_existing, 

58 debug, 

59 fail_fast, 

60 clobber_outputs, 

61 summary, 

62 mock, 

63 unmocked_dataset_types, 

64 mock_failure, 

65 enable_implicit_threading, 

66 cores_per_quantum: int, 

67 memory_per_quantum: str, 

68 rebase, 

69 **kwargs, 

70): 

71 """Implement the command line interface `pipetask run` subcommand. 

72 

73 Should only be called by command line tools and unit test code that test 

74 this function. 

75 

76 Parameters 

77 ---------- 

78 pdb : `bool` 

79 Drop into pdb on exception? 

80 graph_fixup : `str` 

81 The name of the class or factory method which makes an instance used 

82 for execution graph fixup. 

83 init_only : `bool` 

84 If true, do not actually run; just register dataset types and/or save 

85 init outputs. 

86 no_versions : `bool` 

87 If true, do not save or check package versions. 

88 processes : `int` 

89 The number of processes to use. 

90 start_method : `str` or `None` 

91 Start method from `multiprocessing` module, `None` selects the best 

92 one for current platform. 

93 profile : `str` 

94 File name to dump cProfile information to. 

95 qgraphObj : `lsst.pipe.base.QuantumGraph` 

96 A QuantumGraph generated by a previous subcommand. 

97 register_dataset_types : `bool` 

98 If true, register DatasetTypes that do not already exist in the 

99 Registry. 

100 skip_init_writes : `bool` 

101 If true, do not write collection-wide 'init output' datasets (e.g. 

102 schemas). 

103 timeout : `int` 

104 Timeout for multiprocessing; maximum wall time (sec). 

105 butler_config : `str`, `dict`, or `lsst.daf.butler.Config` 

106 If `str`, `butler_config` is the path location of the gen3 

107 butler/registry config file. If `dict`, `butler_config` is key value 

108 pairs used to init or update the `lsst.daf.butler.Config` instance. If 

109 `Config`, it is the object used to configure a Butler. 

110 input : `list` [ `str` ] 

111 List of names of the input collection(s). 

112 output : `str` 

113 Name of the output CHAINED collection. This may either be an existing 

114 CHAINED collection to use as both input and output (if `input` is 

115 `None`), or a new CHAINED collection created to include all inputs 

116 (if `input` is not `None`). In both cases, the collection's children 

117 will start with an output RUN collection that directly holds all new 

118 datasets (see `output_run`). 

119 output_run : `str` 

120 Name of the new output RUN collection. If not provided then `output` 

121 must be provided and a new RUN collection will be created by appending 

122 a timestamp to the value passed with `output`. If this collection 

123 already exists then `extend_run` must be passed. 

124 extend_run : `bool` 

125 Instead of creating a new RUN collection, insert datasets into either 

126 the one given by `output_run` (if provided) or the first child 

127 collection of `output` (which must be of type RUN). 

128 replace_run : `bool` 

129 Before creating a new RUN collection in an existing CHAINED collection, 

130 remove the first child collection (which must be of type RUN). This can 

131 be used to repeatedly write to the same (parent) collection during 

132 development, but it does not delete the datasets associated with the 

133 replaced run unless `prune-replaced` is also True. Requires `output`, 

134 and `extend_run` must be `None`. 

135 prune_replaced : "unstore", "purge", or `None`. 

136 If not `None`, delete the datasets in the collection replaced by 

137 `replace_run`, either just from the datastore ("unstore") or by 

138 removing them and the RUN completely ("purge"). Requires `replace_run`. 

139 data_query : `str` 

140 User query selection expression. 

141 skip_existing_in : `list` [ `str` ] 

142 Accepts list of collections, if all Quantum outputs already exist in 

143 the specified list of collections then that Quantum will be excluded 

144 from the QuantumGraph. 

145 skip_existing : `bool` 

146 Appends output RUN collection to the ``skip_existing_in`` list. 

147 debug : `bool` 

148 If true, enable debugging output using lsstDebug facility (imports 

149 debug.py). 

150 fail_fast : `bool` 

151 If true then stop processing at first error, otherwise process as many 

152 tasks as possible. 

153 clobber_outputs : `bool` 

154 Remove outputs from previous execution of the same quantum before new 

155 execution. Only applies to failed quanta if skip_existing is also 

156 given. 

157 summary : `str` 

158 File path to store job report in JSON format. 

159 mock : `bool`, optional 

160 If `True` then run mock pipeline instead of real one. Ignored if an 

161 existing QuantumGraph is provided. 

162 unmocked_dataset_types : `collections.abc.Sequence` [ `str` ] 

163 List of overall-input dataset types that should not be mocked. 

164 Ignored if an existing QuantumGraph is provided. 

165 mock_failure : `~collections.abc.Sequence`, optional 

166 List of quanta that should raise exceptions. 

167 enable_implicit_threading : `bool`, optional 

168 If `True`, do not disable implicit threading by third-party libraries. 

169 Implicit threading is always disabled during actual quantum execution 

170 if ``processes > 1``. 

171 cores_per_quantum : `int` 

172 Number of cores that can be used by each quantum. 

173 memory_per_quantum : `str` 

174 Amount of memory that each quantum can be allowed to use. Empty string 

175 implies no limit. The string can be either a single integer (implying 

176 units of MB) or a combination of number and unit. 

177 rebase : `bool` 

178 If `True` then reset output collection chain if it is inconsistent with 

179 the ``inputs``. 

180 kwargs : `dict` [`str`, `str`] 

181 Ignored; click commands may accept options for more than one script 

182 function and pass all the option kwargs to each of the script functions 

183 which ignore these unused kwargs. 

184 """ 

185 args = SimpleNamespace( 

186 pdb=pdb, 

187 graph_fixup=graph_fixup, 

188 init_only=init_only, 

189 no_versions=no_versions, 

190 processes=processes, 

191 start_method=start_method, 

192 profile=profile, 

193 skip_init_writes=skip_init_writes, 

194 timeout=timeout, 

195 register_dataset_types=register_dataset_types, 

196 butler_config=butler_config, 

197 input=input, 

198 output=output, 

199 output_run=output_run, 

200 extend_run=extend_run, 

201 replace_run=replace_run, 

202 prune_replaced=prune_replaced, 

203 data_query=data_query, 

204 skip_existing_in=skip_existing_in, 

205 skip_existing=skip_existing, 

206 enableLsstDebug=debug, 

207 fail_fast=fail_fast, 

208 clobber_outputs=clobber_outputs, 

209 summary=summary, 

210 # Mock options only used by qgraph. 

211 enable_implicit_threading=enable_implicit_threading, 

212 cores_per_quantum=cores_per_quantum, 

213 memory_per_quantum=memory_per_quantum, 

214 rebase=rebase, 

215 ) 

216 

217 f = CmdLineFwk() 

218 taskFactory = TaskFactory() 

219 

220 # If we have no output run specified, use the one from the graph rather 

221 # than letting a new timestamped run be created. 

222 if not args.output_run and qgraphObj.metadata and (output_run := qgraphObj.metadata.get("output_run")): 

223 args.output_run = output_run 

224 

225 f.runPipeline(qgraphObj, taskFactory, args)