Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of ctrl_mpexec. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Module defining `makeParser` factory method. 

23""" 

24 

25__all__ = ["makeParser"] 

26 

27# ------------------------------- 

28# Imports of standard modules -- 

29# ------------------------------- 

30from argparse import Action, ArgumentParser, RawDescriptionHelpFormatter 

31import collections 

32import copy 

33import re 

34import textwrap 

35 

36# ----------------------------- 

37# Imports for other modules -- 

38# ----------------------------- 

39 

40# ---------------------------------- 

41# Local non-exported definitions -- 

42# ---------------------------------- 

43 

44# Class which determines an action that needs to be performed 

45# when building pipeline, its attributes are: 

46# action: the name of the action, e.g. "new_task", "delete_task" 

47# label: task label, can be None if action does not require label 

48# value: argument value excluding task label. 

49_PipelineAction = collections.namedtuple("_PipelineAction", "action,label,value") 

50 

51 

52class _PipelineActionType: 

53 """Class defining a callable type which converts strings into 

54 _PipelineAction instances. 

55 

56 Parameters 

57 ---------- 

58 action : str 

59 Name of the action, will become `action` attribute of instance. 

60 regex : str 

61 Regular expression for argument value, it can define groups 'label' 

62 and 'value' which will become corresponding attributes of a 

63 returned instance. 

64 """ 

65 

66 def __init__(self, action, regex='.*', valueType=str): 

67 self.action = action 

68 self.regex = re.compile(regex) 

69 self.valueType = valueType 

70 

71 def __call__(self, value): 

72 match = self.regex.match(value) 

73 if not match: 

74 raise TypeError("Unrecognized option syntax: " + value) 

75 # get "label" group or use None as label 

76 try: 

77 label = match.group("label") 

78 except IndexError: 

79 label = None 

80 # if "value" group is not defined use whole string 

81 try: 

82 value = match.group("value") 

83 except IndexError: 

84 pass 

85 value = self.valueType(value) 

86 return _PipelineAction(self.action, label, value) 

87 

88 def __repr__(self): 

89 """String representation of this class. 

90 

91 argparse can use this for some error messages, default implementation 

92 makes those messages incomprehensible. 

93 """ 

94 return f"_PipelineActionType(action={self.action})" 

95 

96 

97_ACTION_ADD_TASK = _PipelineActionType("new_task", "(?P<value>[^:]+)(:(?P<label>.+))?") 

98_ACTION_DELETE_TASK = _PipelineActionType("delete_task", "(?P<value>)(?P<label>.+)") 

99_ACTION_CONFIG = _PipelineActionType("config", "(?P<label>.+):(?P<value>.+=.+)") 

100_ACTION_CONFIG_FILE = _PipelineActionType("configfile", "(?P<label>.+):(?P<value>.+)") 

101_ACTION_ADD_INSTRUMENT = _PipelineActionType("add_instrument", "(?P<value>[^:]+)") 

102 

103 

104class _LogLevelAction(Action): 

105 """Action class which collects logging levels. 

106 

107 This action class collects arguments in the form "LEVEL" or 

108 "COMPONENT=LEVEL" where LEVEL is the name of the logging level (case- 

109 insensitive). It converts the series of arguments into the list of 

110 tuples (COMPONENT, LEVEL). If component name is missing then first 

111 item in tuple is set to `None`. Second item in tuple is converted to 

112 upper case. 

113 """ 

114 

115 permittedLevels = set(['TRACE', 'DEBUG', 'INFO', 'WARN', 'ERROR', 'FATAL']) 

116 

117 def __call__(self, parser, namespace, values, option_string=None): 

118 """Re-implementation of the base class method. 

119 

120 See `argparse.Action` documentation for parameter description. 

121 """ 

122 dest = getattr(namespace, self.dest) 

123 if dest is None: 

124 dest = [] 

125 setattr(namespace, self.dest, dest) 

126 

127 component, _, levelStr = values.partition("=") 

128 if not levelStr: 

129 levelStr, component = component, None 

130 logLevelUpr = levelStr.upper() 

131 if logLevelUpr not in self.permittedLevels: 

132 parser.error("loglevel=%s not one of %s" % (levelStr, tuple(self.permittedLevels))) 

133 dest.append((component, logLevelUpr)) 

134 

135 

136class _InputCollectionAction(Action): 

137 """Action class which collects input collection names. 

138 

139 This action type accepts string values in format: 

140 

141 value :== collection[,collection[...]] 

142 collection :== [dataset_type:]collection_name 

143 

144 and converts value into a dictionary whose keys a dataset type names 

145 (or empty string when dataset type name is missing) and values are 

146 ordered lists of collection names. Values from multiple arguments are 

147 all collected into the same dictionary. Resulting list of collections 

148 could contain multiple instances of the same collection name if it 

149 appears multiple time on command line. 

150 """ 

151 

152 def __call__(self, parser, namespace, values, option_string=None): 

153 """Re-implementation of the base class method. 

154 

155 See `argparse.Action` documentation for parameter description. 

156 """ 

157 dest = getattr(namespace, self.dest, []) 

158 # In case default is set to a list (empty or not) we want to use 

159 # new copy of that list as initial value to avoid 

160 # modifying default value. 

161 if dest is self.default: 

162 dest = copy.copy(dest) 

163 

164 # split on commas, collection can be preceeded by dataset type 

165 for collstr in values.split(","): 

166 dsType, sep, collection = collstr.partition(':') 

167 if not sep: 

168 dsType, collection = ..., dsType 

169 dest.append((collection, dsType)) 

170 

171 setattr(namespace, self.dest, dest) 

172 

173 

174_EPILOG = """\ 

175Notes: 

176 * many options can appear multiple times; all values are used, in order 

177 left to right 

178 * @file reads command-line options from the specified file: 

179 * data may be distributed among multiple lines (e.g. one option per line) 

180 * data after # is treated as a comment and ignored 

181 * blank lines and lines starting with # are ignored 

182""" 

183 

184 

185def _makeButlerOptions(parser): 

186 """Add a set of options for data butler to a parser. 

187 

188 Parameters 

189 ---------- 

190 parser : `argparse.ArgumentParser` 

191 """ 

192 group = parser.add_argument_group("Data repository and selection options") 

193 group.add_argument( 

194 "-b", "--butler-config", dest="butler_config", default=None, metavar="PATH", 

195 help="Location of the gen3 butler/registry config file." 

196 ) 

197 group.add_argument( 

198 "-i", "--input", dest="input", action=_InputCollectionAction, 

199 metavar="COLL,DSTYPE:COLL", default=[], 

200 help=( 

201 "Comma-separated names of the input collection(s). Any entry " 

202 "includes a colon (:), the first string is a dataset type name " 

203 "that restricts the search in that collection. " 

204 "May be passed multiple times (all arguments are concatenated)." 

205 ) 

206 ) 

207 group.add_argument( 

208 "-o", "--output", dest="output", default=None, metavar="COLL", 

209 help=( 

210 "Name of the output CHAINED collection. This may either be an " 

211 "existing CHAINED collection to use as both input and output " 

212 "(incompatible with --input), or a new CHAINED collection created " 

213 "to include all inputs (requires --input). " 

214 "In both cases, the collection's children will start with an " 

215 "output RUN collection that directly holds all new datasets (see " 

216 "--output-run)." 

217 ) 

218 ) 

219 group.add_argument( 

220 "--output-run", dest="output_run", default=None, metavar="COLL", 

221 help=( 

222 "Name of the new output RUN collection. If not provided, " 

223 "--output must be, a new RUN collection will be created by " 

224 "appending a timestamp to the value passed with --output. " 

225 "If this collection already exists, --extend-run must be passed." 

226 ) 

227 ) 

228 groupex = group.add_mutually_exclusive_group() 

229 groupex.add_argument( 

230 "--extend-run", dest="extend_run", default=False, action="store_true", 

231 help=( 

232 "Instead of creating a new RUN collection, insert datasets into " 

233 "either the one given by --output-run (if provided) or the first " 

234 "child collection of --output (which must be of type RUN)." 

235 ) 

236 ) 

237 groupex.add_argument( 

238 "--replace-run", dest="replace_run", default=False, action="store_true", 

239 help=( 

240 "Before creating a new RUN collection in an existing CHAINED " 

241 "collection, remove the first child collection (which must be of " 

242 "type RUN). " 

243 "This can be used to repeatedly write to the same (parent) " 

244 "collection during development, but it does not delete the " 

245 "datasets associated with the replaced run unless " 

246 "--prune-replaced is also passed. " 

247 "Requires --output, and incompatible with --extend-run." 

248 ) 

249 ) 

250 group.add_argument( 

251 "--prune-replaced", dest="prune_replaced", choices=("unstore", "purge"), default=None, 

252 help=( 

253 "Delete the datasets in the collection replaced by --replace-run, " 

254 "either just from the datastore ('unstore') or by removing them " 

255 "and the RUN completely ('purge'). Requires --replace-run." 

256 ) 

257 ) 

258 group.add_argument("-d", "--data-query", dest="data_query", default="", metavar="QUERY", 

259 help="User data selection expression.") 

260 

261 

262def _makeMetaOutputOptions(parser): 

263 """Add a set of options describing output metadata. 

264 

265 Parameters 

266 ---------- 

267 parser : `argparse.ArgumentParser` 

268 """ 

269 group = parser.add_argument_group("Meta-information output options") 

270 group.add_argument("--skip-init-writes", dest="skip_init_writes", default=False, 

271 action="store_true", 

272 help="Do not write collection-wide 'init output' datasets (e.g. schemas).") 

273 group.add_argument("--init-only", dest="init_only", default=False, 

274 action="store_true", 

275 help=("Do not actually run; just register dataset types and/or save init outputs.")) 

276 group.add_argument("--register-dataset-types", dest="register_dataset_types", default=False, 

277 action="store_true", 

278 help="Register DatasetTypes that do not already exist in the Registry.") 

279 group.add_argument("--no-versions", dest="no_versions", default=False, 

280 action="store_true", 

281 help="Do not save or check package versions.") 

282 

283 

284def _makeLoggingOptions(parser): 

285 """Add a set of options for logging configuration. 

286 

287 Parameters 

288 ---------- 

289 parser : `argparse.ArgumentParser` 

290 """ 

291 group = parser.add_argument_group("Logging options") 

292 group.add_argument("-L", "--loglevel", action=_LogLevelAction, default=[], 

293 help="logging level; supported levels are [trace|debug|info|warn|error|fatal]", 

294 metavar="LEVEL|COMPONENT=LEVEL") 

295 group.add_argument("--longlog", action="store_true", help="use a more verbose format for the logging") 

296 group.add_argument("--debug", action="store_true", dest="enableLsstDebug", 

297 help="enable debugging output using lsstDebug facility (imports debug.py)") 

298 

299 

300def _makePipelineOptions(parser): 

301 """Add a set of options for building a pipeline. 

302 

303 Parameters 

304 ---------- 

305 parser : `argparse.ArgumentParser` 

306 """ 

307 group = parser.add_argument_group("Pipeline building options") 

308 group.add_argument("-p", "--pipeline", dest="pipeline", 

309 help="Location of a pipeline definition file in YAML format.", 

310 metavar="PATH") 

311 group.add_argument("-t", "--task", metavar="TASK[:LABEL]", 

312 dest="pipeline_actions", action='append', type=_ACTION_ADD_TASK, 

313 help="Task name to add to pipeline, must be a fully qualified task name. " 

314 "Task name can be followed by colon and " 

315 "label name, if label is not given than task base name (class name) " 

316 "is used as label.") 

317 group.add_argument("--delete", metavar="LABEL", 

318 dest="pipeline_actions", action='append', type=_ACTION_DELETE_TASK, 

319 help="Delete task with given label from pipeline.") 

320 group.add_argument("-c", "--config", metavar="LABEL:NAME=VALUE", 

321 dest="pipeline_actions", action='append', type=_ACTION_CONFIG, 

322 help="Configuration override(s) for a task with specified label, " 

323 "e.g. -c task:foo=newfoo -c task:bar.baz=3.") 

324 group.add_argument("-C", "--configfile", metavar="LABEL:PATH", 

325 dest="pipeline_actions", action='append', type=_ACTION_CONFIG_FILE, 

326 help="Configuration override file(s), applies to a task with a given label.") 

327 group.add_argument("--order-pipeline", dest="order_pipeline", 

328 default=False, action="store_true", 

329 help="Order tasks in pipeline based on their data dependencies, " 

330 "ordering is performed as last step before saving or executing " 

331 "pipeline.") 

332 group.add_argument("-s", "--save-pipeline", dest="save_pipeline", 

333 help="Location for storing resulting pipeline definition in YAML format.", 

334 metavar="PATH") 

335 group.add_argument("--pipeline-dot", dest="pipeline_dot", 

336 help="Location for storing GraphViz DOT representation of a pipeline.", 

337 metavar="PATH") 

338 group.add_argument("--instrument", metavar="instrument", 

339 dest="pipeline_actions", action="append", type=_ACTION_ADD_INSTRUMENT, 

340 help="Add an instrument which will be used to load config overrides when" 

341 " defining a pipeline. This must be the fully qualified class name") 

342 

343 

344def _makeQuantumGraphOptions(parser): 

345 """Add a set of options controlling quantum graph generation. 

346 

347 Parameters 

348 ---------- 

349 parser : `argparse.ArgumentParser` 

350 """ 

351 group = parser.add_argument_group("Quantum graph building options") 

352 group.add_argument("-g", "--qgraph", dest="qgraph", 

353 help="Location for a serialized quantum graph definition " 

354 "(pickle file). If this option is given then all input data " 

355 "options and pipeline-building options cannot be used.", 

356 metavar="PATH") 

357 # TODO: I've made --skip-existing apply to _just_ the output run (which 

358 # means that it requires --extend-run), but a variant where we can also 

359 # skip anything in the input chained collection may also be useful; need to 

360 # think about whether that should be a separate argument or a conversion to 

361 # make this one take a value. 

362 group.add_argument("--skip-existing", dest="skip_existing", 

363 default=False, action="store_true", 

364 help=("If all Quantum outputs already exist in the output RUN collection " 

365 "then that Quantum will be excluded from the QuantumGraph. " 

366 "Requires --extend-run.")) 

367 group.add_argument("-q", "--save-qgraph", dest="save_qgraph", 

368 help="Location for storing a serialized quantum graph definition " 

369 "(pickle file).", 

370 metavar="PATH") 

371 group.add_argument("--save-single-quanta", dest="save_single_quanta", 

372 help="Format string of locations for storing individual quantum graph " 

373 "definition (pickle files). The curly brace {} in the input string " 

374 "will be replaced by a quantum number.", 

375 metavar="PATH") 

376 group.add_argument("--qgraph-dot", dest="qgraph_dot", 

377 help="Location for storing GraphViz DOT representation of a " 

378 "quantum graph.", 

379 metavar="PATH") 

380 

381 

382def _makeExecOptions(parser): 

383 """Add options controlling how tasks are executed. 

384 

385 Parameters 

386 ---------- 

387 parser : `argparse.ArgumentParser` 

388 """ 

389 group = parser.add_argument_group("Execution options") 

390 group.add_argument("--doraise", action="store_true", 

391 help="raise an exception on error (else log a message and continue)?") 

392 group.add_argument("--profile", metavar="PATH", help="Dump cProfile statistics to filename") 

393 

394 # parallelism options 

395 group.add_argument("-j", "--processes", type=int, default=1, help="Number of processes to use") 

396 group.add_argument("--timeout", type=float, 

397 help="Timeout for multiprocessing; maximum wall time (sec)") 

398 

399 # run-time graph fixup option 

400 group.add_argument("--graph-fixup", type=str, default=None, 

401 help="Name of the class or factory method which makes an instance " 

402 "used for execution graph fixup.") 

403 

404# ------------------------ 

405# Exported definitions -- 

406# ------------------------ 

407 

408 

409def makeParser(fromfile_prefix_chars='@', parser_class=ArgumentParser, **kwargs): 

410 """Make instance of command line parser for `CmdLineFwk`. 

411 

412 Creates instance of parser populated with all options that are supported 

413 by command line activator. There is no additional logic in this class, 

414 all semantics is handled by the activator class. 

415 

416 Parameters 

417 ---------- 

418 fromfile_prefix_chars : `str`, optional 

419 Prefix for arguments to be used as options files (default: `@`) 

420 parser_class : `type`, optional 

421 Specifies the class of the argument parser, by default 

422 `ArgumentParser` is used. 

423 kwargs : extra keyword arguments 

424 Passed directly to `parser_class` constructor 

425 

426 Returns 

427 ------- 

428 instance of `parser_class` 

429 """ 

430 

431 parser = parser_class(usage="%(prog)s subcommand [options]", 

432 fromfile_prefix_chars=fromfile_prefix_chars, 

433 epilog=_EPILOG, 

434 formatter_class=RawDescriptionHelpFormatter, 

435 **kwargs) 

436 

437 # define sub-commands 

438 subparsers = parser.add_subparsers(dest="subcommand", 

439 title="commands", 

440 description=("Valid commands, use `<command> --help' to get " 

441 "more info about each command:"), 

442 prog=parser.prog) 

443 # Python3 workaround, see http://bugs.python.org/issue9253#msg186387 

444 # The issue was fixed in Python 3.6, workaround is not need starting with that version 

445 subparsers.required = True 

446 

447 for subcommand in ("build", "qgraph", "run"): 

448 # show/run sub-commands, they are all identical except for the 

449 # command itself and description 

450 

451 if subcommand == "build": 

452 description = textwrap.dedent("""\ 

453 Build and optionally save pipeline definition. 

454 This does not require input data to be specified.""") 

455 elif subcommand == "qgraph": 

456 description = textwrap.dedent("""\ 

457 Build and optionally save pipeline and quantum graph.""") 

458 else: 

459 description = textwrap.dedent("""\ 

460 Build and execute pipeline and quantum graph.""") 

461 

462 subparser = subparsers.add_parser(subcommand, 

463 description=description, 

464 epilog=_EPILOG, 

465 formatter_class=RawDescriptionHelpFormatter) 

466 subparser.set_defaults(subparser=subparser, 

467 pipeline_actions=[]) 

468 _makeLoggingOptions(subparser) 

469 _makePipelineOptions(subparser) 

470 

471 if subcommand in ("qgraph", "run"): 

472 _makeQuantumGraphOptions(subparser) 

473 _makeButlerOptions(subparser) 

474 

475 if subcommand == "run": 

476 _makeExecOptions(subparser) 

477 _makeMetaOutputOptions(subparser) 

478 

479 subparser.add_argument("--show", metavar="ITEM|ITEM=VALUE", action="append", default=[], 

480 help="Dump various info to standard output. Possible items are: " 

481 "`config', `config=[Task::]<PATTERN>' or " 

482 "`config=[Task::]<PATTERN>:NOIGNORECASE' to dump configuration " 

483 "fields possibly matching given pattern and/or task label; " 

484 "`history=<FIELD>' to dump configuration history for a field, " 

485 "field name is specified as [Task::][SubTask.]Field; " 

486 "`dump-config', `dump-config=Task' to dump complete configuration " 

487 "for a task given its label or all tasks; " 

488 "`pipeline' to show pipeline composition; " 

489 "`graph' to show information about quanta; " 

490 "`workflow' to show information about quanta and their dependency; " 

491 "`tasks' to show task composition.") 

492 

493 return parser