Coverage for python/lsst/ctrl/mpexec/cmdLineFwk.py: 13%
423 statements
« prev ^ index » next coverage.py v6.4.4, created at 2022-08-23 02:34 -0700
« prev ^ index » next coverage.py v6.4.4, created at 2022-08-23 02:34 -0700
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Module defining CmdLineFwk class and related methods.
23"""
25from __future__ import annotations
27__all__ = ["CmdLineFwk"]
29# -------------------------------
30# Imports of standard modules --
31# -------------------------------
32import copy
33import datetime
34import fnmatch
35import getpass
36import logging
37import re
38import sys
39import warnings
40from types import SimpleNamespace
41from typing import Any, Iterable, Optional, Tuple
43import lsst.pex.config as pexConfig
44import lsst.pex.config.history as pexConfigHistory
45from lsst.daf.butler import Butler, CollectionSearch, CollectionType, DatasetRef, Registry
46from lsst.daf.butler.registry import MissingCollectionError, RegistryDefaults
47from lsst.pipe.base import (
48 GraphBuilder,
49 Instrument,
50 Pipeline,
51 PipelineDatasetTypes,
52 QuantumGraph,
53 TaskDef,
54 TaskFactory,
55 buildExecutionButler,
56)
57from lsst.utils import doImportType
59from . import util
60from .dotTools import graph2dot, pipeline2dot
61from .executionGraphFixup import ExecutionGraphFixup
62from .mpGraphExecutor import MPGraphExecutor
63from .preExecInit import PreExecInit
64from .singleQuantumExecutor import SingleQuantumExecutor
66# ----------------------------------
67# Local non-exported definitions --
68# ----------------------------------
70_LOG = logging.getLogger(__name__)
73class _OutputChainedCollectionInfo:
74 """A helper class for handling command-line arguments related to an output
75 `~lsst.daf.butler.CollectionType.CHAINED` collection.
77 Parameters
78 ----------
79 registry : `lsst.daf.butler.Registry`
80 Butler registry that collections will be added to and/or queried from.
81 name : `str`
82 Name of the collection given on the command line.
83 """
85 def __init__(self, registry: Registry, name: str):
86 self.name = name
87 try:
88 self.chain = tuple(registry.getCollectionChain(name))
89 self.exists = True
90 except MissingCollectionError:
91 self.chain = ()
92 self.exists = False
94 def __str__(self) -> str:
95 return self.name
97 name: str
98 """Name of the collection provided on the command line (`str`).
99 """
101 exists: bool
102 """Whether this collection already exists in the registry (`bool`).
103 """
105 chain: Tuple[str, ...]
106 """The definition of the collection, if it already exists (`tuple`[`str`]).
108 Empty if the collection does not already exist.
109 """
112class _OutputRunCollectionInfo:
113 """A helper class for handling command-line arguments related to an output
114 `~lsst.daf.butler.CollectionType.RUN` collection.
116 Parameters
117 ----------
118 registry : `lsst.daf.butler.Registry`
119 Butler registry that collections will be added to and/or queried from.
120 name : `str`
121 Name of the collection given on the command line.
122 """
124 def __init__(self, registry: Registry, name: str):
125 self.name = name
126 try:
127 actualType = registry.getCollectionType(name)
128 if actualType is not CollectionType.RUN:
129 raise TypeError(f"Collection '{name}' exists but has type {actualType.name}, not RUN.")
130 self.exists = True
131 except MissingCollectionError:
132 self.exists = False
134 name: str
135 """Name of the collection provided on the command line (`str`).
136 """
138 exists: bool
139 """Whether this collection already exists in the registry (`bool`).
140 """
143class _ButlerFactory:
144 """A helper class for processing command-line arguments related to input
145 and output collections.
147 Parameters
148 ----------
149 registry : `lsst.daf.butler.Registry`
150 Butler registry that collections will be added to and/or queried from.
152 args : `types.SimpleNamespace`
153 Parsed command-line arguments. The following attributes are used,
154 either at construction or in later methods.
156 ``output``
157 The name of a `~lsst.daf.butler.CollectionType.CHAINED`
158 input/output collection.
160 ``output_run``
161 The name of a `~lsst.daf.butler.CollectionType.RUN` input/output
162 collection.
164 ``extend_run``
165 A boolean indicating whether ``output_run`` should already exist
166 and be extended.
168 ``replace_run``
169 A boolean indicating that (if `True`) ``output_run`` should already
170 exist but will be removed from the output chained collection and
171 replaced with a new one.
173 ``prune_replaced``
174 A boolean indicating whether to prune the replaced run (requires
175 ``replace_run``).
177 ``inputs``
178 Input collections of any type; may be any type handled by
179 `lsst.daf.butler.registry.CollectionSearch.fromExpression`.
181 ``butler_config``
182 Path to a data repository root or configuration file.
184 writeable : `bool`
185 If `True`, a `Butler` is being initialized in a context where actual
186 writes should happens, and hence no output run is necessary.
188 Raises
189 ------
190 ValueError
191 Raised if ``writeable is True`` but there are no output collections.
192 """
194 def __init__(self, registry: Registry, args: SimpleNamespace, writeable: bool):
195 if args.output is not None:
196 self.output = _OutputChainedCollectionInfo(registry, args.output)
197 else:
198 self.output = None
199 if args.output_run is not None:
200 self.outputRun = _OutputRunCollectionInfo(registry, args.output_run)
201 elif self.output is not None:
202 if args.extend_run:
203 if not self.output.chain:
204 raise ValueError("Cannot use --extend-run option with non-existing or empty output chain")
205 runName = self.output.chain[0]
206 else:
207 runName = "{}/{}".format(self.output, Instrument.makeCollectionTimestamp())
208 self.outputRun = _OutputRunCollectionInfo(registry, runName)
209 elif not writeable:
210 # If we're not writing yet, ok to have no output run.
211 self.outputRun = None
212 else:
213 raise ValueError("Cannot write without at least one of (--output, --output-run).")
214 # Recursively flatten any input CHAINED collections. We do this up
215 # front so we can tell if the user passes the same inputs on subsequent
216 # calls, even though we also flatten when we define the output CHAINED
217 # collection.
218 self.inputs = tuple(registry.queryCollections(args.input, flattenChains=True)) if args.input else ()
220 def check(self, args: SimpleNamespace) -> None:
221 """Check command-line options for consistency with each other and the
222 data repository.
224 Parameters
225 ----------
226 args : `types.SimpleNamespace`
227 Parsed command-line arguments. See class documentation for the
228 construction parameter of the same name.
229 """
230 assert not (args.extend_run and args.replace_run), "In mutually-exclusive group in ArgumentParser."
231 if self.inputs and self.output is not None and self.output.exists:
232 # Passing the same inputs that were used to initialize the output
233 # collection is allowed; this means they must _end_ with the same
234 # collections, because we push new runs to the front of the chain.
235 for c1, c2 in zip(self.inputs[::-1], self.output.chain[::-1]):
236 if c1 != c2:
237 raise ValueError(
238 f"Output CHAINED collection {self.output.name!r} exists, but it ends with "
239 "a different sequence of input collections than those given: "
240 f"{c1!r} != {c2!r} in inputs={self.inputs} vs "
241 f"{self.output.name}={self.output.chain}."
242 )
243 if len(self.inputs) > len(self.output.chain):
244 nNew = len(self.inputs) - len(self.output.chain)
245 raise ValueError(
246 f"Cannot add new input collections {self.inputs[:nNew]} after "
247 "output collection is first created."
248 )
249 if args.extend_run:
250 if self.outputRun is None:
251 raise ValueError("Cannot --extend-run when no output collection is given.")
252 elif not self.outputRun.exists:
253 raise ValueError(
254 f"Cannot --extend-run; output collection '{self.outputRun.name}' does not exist."
255 )
256 if not args.extend_run and self.outputRun is not None and self.outputRun.exists:
257 raise ValueError(
258 f"Output run '{self.outputRun.name}' already exists, but --extend-run was not given."
259 )
260 if args.prune_replaced and not args.replace_run:
261 raise ValueError("--prune-replaced requires --replace-run.")
262 if args.replace_run and (self.output is None or not self.output.exists):
263 raise ValueError("--output must point to an existing CHAINED collection for --replace-run.")
265 @classmethod
266 def _makeReadParts(cls, args: SimpleNamespace) -> tuple[Butler, CollectionSearch, _ButlerFactory]:
267 """Common implementation for `makeReadButler` and
268 `makeButlerAndCollections`.
270 Parameters
271 ----------
272 args : `types.SimpleNamespace`
273 Parsed command-line arguments. See class documentation for the
274 construction parameter of the same name.
276 Returns
277 -------
278 butler : `lsst.daf.butler.Butler`
279 A read-only butler constructed from the repo at
280 ``args.butler_config``, but with no default collections.
281 inputs : `lsst.daf.butler.registry.CollectionSearch`
282 A collection search path constructed according to ``args``.
283 self : `_ButlerFactory`
284 A new `_ButlerFactory` instance representing the processed version
285 of ``args``.
286 """
287 butler = Butler(args.butler_config, writeable=False)
288 self = cls(butler.registry, args, writeable=False)
289 self.check(args)
290 if self.output and self.output.exists:
291 if args.replace_run:
292 replaced = self.output.chain[0]
293 inputs = list(self.output.chain[1:])
294 _LOG.debug(
295 "Simulating collection search in '%s' after removing '%s'.", self.output.name, replaced
296 )
297 else:
298 inputs = [self.output.name]
299 else:
300 inputs = list(self.inputs)
301 if args.extend_run:
302 assert self.outputRun is not None, "Output collection has to be specified."
303 inputs.insert(0, self.outputRun.name)
304 collSearch = CollectionSearch.fromExpression(inputs)
305 return butler, collSearch, self
307 @classmethod
308 def makeReadButler(cls, args: SimpleNamespace) -> Butler:
309 """Construct a read-only butler according to the given command-line
310 arguments.
312 Parameters
313 ----------
314 args : `types.SimpleNamespace`
315 Parsed command-line arguments. See class documentation for the
316 construction parameter of the same name.
318 Returns
319 -------
320 butler : `lsst.daf.butler.Butler`
321 A read-only butler initialized with the collections specified by
322 ``args``.
323 """
324 butler, inputs, _ = cls._makeReadParts(args)
325 _LOG.debug("Preparing butler to read from %s.", inputs)
326 return Butler(butler=butler, collections=inputs)
328 @classmethod
329 def makeButlerAndCollections(
330 cls, args: SimpleNamespace
331 ) -> Tuple[Butler, CollectionSearch, Optional[str]]:
332 """Return a read-only registry, a collection search path, and the name
333 of the run to be used for future writes.
335 Parameters
336 ----------
337 args : `types.SimpleNamespace`
338 Parsed command-line arguments. See class documentation for the
339 construction parameter of the same name.
341 Returns
342 -------
343 butler : `lsst.daf.butler.Butler`
344 A read-only butler that collections will be added to and/or queried
345 from.
346 inputs : `lsst.daf.butler.registry.CollectionSearch`
347 Collections to search for datasets.
348 run : `str` or `None`
349 Name of the output `~lsst.daf.butler.CollectionType.RUN` collection
350 if it already exists, or `None` if it does not.
351 """
352 butler, inputs, self = cls._makeReadParts(args)
353 run: Optional[str] = None
354 if args.extend_run:
355 assert self.outputRun is not None, "Output collection has to be specified."
356 run = self.outputRun.name
357 _LOG.debug("Preparing registry to read from %s and expect future writes to '%s'.", inputs, run)
358 return butler, inputs, run
360 @classmethod
361 def makeWriteButler(cls, args: SimpleNamespace, taskDefs: Optional[Iterable[TaskDef]] = None) -> Butler:
362 """Return a read-write butler initialized to write to and read from
363 the collections specified by the given command-line arguments.
365 Parameters
366 ----------
367 args : `types.SimpleNamespace`
368 Parsed command-line arguments. See class documentation for the
369 construction parameter of the same name.
370 taskDefs : iterable of `TaskDef`, optional
371 Definitions for tasks in a pipeline. This argument is only needed
372 if ``args.replace_run`` is `True` and ``args.prune_replaced`` is
373 "unstore".
375 Returns
376 -------
377 butler : `lsst.daf.butler.Butler`
378 A read-write butler initialized according to the given arguments.
379 """
380 butler = Butler(args.butler_config, writeable=True)
381 self = cls(butler.registry, args, writeable=True)
382 self.check(args)
383 assert self.outputRun is not None, "Output collection has to be specified." # for mypy
384 if self.output is not None:
385 chainDefinition = list(self.output.chain if self.output.exists else self.inputs)
386 if args.replace_run:
387 replaced = chainDefinition.pop(0)
388 if args.prune_replaced == "unstore":
389 # Remove datasets from datastore
390 with butler.transaction():
391 refs: Iterable[DatasetRef] = butler.registry.queryDatasets(..., collections=replaced)
392 # we want to remove regular outputs but keep
393 # initOutputs, configs, and versions.
394 if taskDefs is not None:
395 initDatasetNames = set(PipelineDatasetTypes.initOutputNames(taskDefs))
396 refs = [ref for ref in refs if ref.datasetType.name not in initDatasetNames]
397 butler.pruneDatasets(refs, unstore=True, disassociate=False)
398 elif args.prune_replaced == "purge":
399 # Erase entire collection and all datasets, need to remove
400 # collection from its chain collection first.
401 with butler.transaction():
402 butler.registry.setCollectionChain(self.output.name, chainDefinition, flatten=True)
403 butler.pruneCollection(replaced, purge=True, unstore=True)
404 elif args.prune_replaced is not None:
405 raise NotImplementedError(f"Unsupported --prune-replaced option '{args.prune_replaced}'.")
406 if not self.output.exists:
407 butler.registry.registerCollection(self.output.name, CollectionType.CHAINED)
408 if not args.extend_run:
409 butler.registry.registerCollection(self.outputRun.name, CollectionType.RUN)
410 chainDefinition.insert(0, self.outputRun.name)
411 butler.registry.setCollectionChain(self.output.name, chainDefinition, flatten=True)
412 _LOG.debug(
413 "Preparing butler to write to '%s' and read from '%s'=%s",
414 self.outputRun.name,
415 self.output.name,
416 chainDefinition,
417 )
418 butler.registry.defaults = RegistryDefaults(run=self.outputRun.name, collections=self.output.name)
419 else:
420 inputs = CollectionSearch.fromExpression((self.outputRun.name,) + self.inputs)
421 _LOG.debug("Preparing butler to write to '%s' and read from %s.", self.outputRun.name, inputs)
422 butler.registry.defaults = RegistryDefaults(run=self.outputRun.name, collections=inputs)
423 return butler
425 output: Optional[_OutputChainedCollectionInfo]
426 """Information about the output chained collection, if there is or will be
427 one (`_OutputChainedCollectionInfo` or `None`).
428 """
430 outputRun: Optional[_OutputRunCollectionInfo]
431 """Information about the output run collection, if there is or will be
432 one (`_OutputRunCollectionInfo` or `None`).
433 """
435 inputs: Tuple[str, ...]
436 """Input collections provided directly by the user (`tuple` [ `str` ]).
437 """
440class _FilteredStream:
441 """A file-like object that filters some config fields.
443 Note
444 ----
445 This class depends on implementation details of ``Config.saveToStream``
446 methods, in particular that that method uses single call to write()
447 method to save information about single config field, and that call
448 combines comments string(s) for a field and field path and value.
449 This class will not work reliably on the "import" strings, so imports
450 should be disabled by passing ``skipImports=True`` to ``saveToStream()``.
451 """
453 def __init__(self, pattern: str):
454 # obey case if pattern isn't lowercase or requests NOIGNORECASE
455 mat = re.search(r"(.*):NOIGNORECASE$", pattern)
457 if mat:
458 pattern = mat.group(1)
459 self._pattern = re.compile(fnmatch.translate(pattern))
460 else:
461 if pattern != pattern.lower():
462 print(
463 f'Matching "{pattern}" without regard to case ' "(append :NOIGNORECASE to prevent this)",
464 file=sys.stdout,
465 )
466 self._pattern = re.compile(fnmatch.translate(pattern), re.IGNORECASE)
468 def write(self, showStr: str) -> None:
469 # Strip off doc string line(s) and cut off at "=" for string matching
470 matchStr = showStr.rstrip().split("\n")[-1].split("=")[0]
471 if self._pattern.search(matchStr):
472 sys.stdout.write(showStr)
475# ------------------------
476# Exported definitions --
477# ------------------------
480class CmdLineFwk:
481 """PipelineTask framework which executes tasks from command line.
483 In addition to executing tasks this activator provides additional methods
484 for task management like dumping configuration or execution chain.
485 """
487 MP_TIMEOUT = 3600 * 24 * 30 # Default timeout (sec) for multiprocessing
489 def __init__(self) -> None:
490 pass
492 def makePipeline(self, args: SimpleNamespace) -> Pipeline:
493 """Build a pipeline from command line arguments.
495 Parameters
496 ----------
497 args : `types.SimpleNamespace`
498 Parsed command line
500 Returns
501 -------
502 pipeline : `~lsst.pipe.base.Pipeline`
503 """
504 if args.pipeline:
505 pipeline = Pipeline.from_uri(args.pipeline)
506 else:
507 pipeline = Pipeline("anonymous")
509 # loop over all pipeline actions and apply them in order
510 for action in args.pipeline_actions:
511 if action.action == "add_instrument":
513 pipeline.addInstrument(action.value)
515 elif action.action == "new_task":
517 pipeline.addTask(action.value, action.label)
519 elif action.action == "delete_task":
521 pipeline.removeTask(action.label)
523 elif action.action == "config":
525 # action value string is "field=value", split it at '='
526 field, _, value = action.value.partition("=")
527 pipeline.addConfigOverride(action.label, field, value)
529 elif action.action == "configfile":
531 pipeline.addConfigFile(action.label, action.value)
533 else:
535 raise ValueError(f"Unexpected pipeline action: {action.action}")
537 if args.save_pipeline:
538 pipeline.write_to_uri(args.save_pipeline)
540 if args.pipeline_dot:
541 pipeline2dot(pipeline, args.pipeline_dot)
543 return pipeline
545 def makeGraph(self, pipeline: Pipeline, args: SimpleNamespace) -> Optional[QuantumGraph]:
546 """Build a graph from command line arguments.
548 Parameters
549 ----------
550 pipeline : `~lsst.pipe.base.Pipeline`
551 Pipeline, can be empty or ``None`` if graph is read from a file.
552 args : `types.SimpleNamespace`
553 Parsed command line
555 Returns
556 -------
557 graph : `~lsst.pipe.base.QuantumGraph` or `None`
558 If resulting graph is empty then `None` is returned.
559 """
561 # make sure that --extend-run always enables --skip-existing
562 if args.extend_run:
563 args.skip_existing = True
565 butler, collections, run = _ButlerFactory.makeButlerAndCollections(args)
567 if args.skip_existing and run:
568 args.skip_existing_in += (run,)
570 if args.qgraph:
571 # click passes empty tuple as default value for qgraph_node_id
572 nodes = args.qgraph_node_id or None
573 qgraph = QuantumGraph.loadUri(
574 args.qgraph, butler.registry.dimensions, nodes=nodes, graphID=args.qgraph_id
575 )
577 # pipeline can not be provided in this case
578 if pipeline:
579 raise ValueError("Pipeline must not be given when quantum graph is read from file.")
580 if args.show_qgraph_header:
581 print(QuantumGraph.readHeader(args.qgraph))
582 else:
583 # make execution plan (a.k.a. DAG) for pipeline
584 graphBuilder = GraphBuilder(
585 butler.registry,
586 skipExistingIn=args.skip_existing_in,
587 clobberOutputs=args.clobber_outputs,
588 datastore=butler.datastore if args.qgraph_datastore_records else None,
589 )
590 # accumulate metadata
591 metadata = {
592 "input": args.input,
593 "output": args.output,
594 "butler_argument": args.butler_config,
595 "output_run": args.output_run,
596 "extend_run": args.extend_run,
597 "skip_existing_in": args.skip_existing_in,
598 "skip_existing": args.skip_existing,
599 "data_query": args.data_query,
600 "user": getpass.getuser(),
601 "time": f"{datetime.datetime.now()}",
602 }
603 qgraph = graphBuilder.makeGraph(
604 pipeline,
605 collections,
606 run,
607 args.data_query,
608 metadata=metadata,
609 datasetQueryConstraint=args.dataset_query_constraint,
610 )
611 if args.show_qgraph_header:
612 qgraph.buildAndPrintHeader()
614 # Count quanta in graph and give a warning if it's empty and return
615 # None.
616 nQuanta = len(qgraph)
617 if nQuanta == 0:
618 warnings.warn("QuantumGraph is empty", stacklevel=2)
619 return None
620 else:
621 _LOG.info(
622 "QuantumGraph contains %d quanta for %d tasks, graph ID: %r",
623 nQuanta,
624 len(qgraph.taskGraph),
625 qgraph.graphID,
626 )
628 if args.save_qgraph:
629 qgraph.saveUri(args.save_qgraph)
631 if args.save_single_quanta:
632 for quantumNode in qgraph:
633 sqgraph = qgraph.subset(quantumNode)
634 uri = args.save_single_quanta.format(quantumNode)
635 sqgraph.saveUri(uri)
637 if args.qgraph_dot:
638 graph2dot(qgraph, args.qgraph_dot)
640 if args.execution_butler_location:
641 butler = Butler(args.butler_config)
642 newArgs = copy.deepcopy(args)
644 def builderShim(butler: Butler) -> Butler:
645 newArgs.butler_config = butler._config
646 # Calling makeWriteButler is done for the side effects of
647 # calling that method, maining parsing all the args into
648 # collection names, creating collections, etc.
649 newButler = _ButlerFactory.makeWriteButler(newArgs)
650 return newButler
652 # Include output collection in collections for input
653 # files if it exists in the repo.
654 all_inputs = args.input
655 if args.output is not None:
656 try:
657 all_inputs += (next(iter(butler.registry.queryCollections(args.output))),)
658 except MissingCollectionError:
659 pass
661 _LOG.debug("Calling buildExecutionButler with collections=%s", all_inputs)
662 buildExecutionButler(
663 butler,
664 qgraph,
665 args.execution_butler_location,
666 run,
667 butlerModifier=builderShim,
668 collections=all_inputs,
669 clobber=args.clobber_execution_butler,
670 datastoreRoot=args.target_datastore_root,
671 transfer=args.transfer,
672 )
674 return qgraph
676 def runPipeline(
677 self,
678 graph: QuantumGraph,
679 taskFactory: TaskFactory,
680 args: SimpleNamespace,
681 butler: Optional[Butler] = None,
682 ) -> None:
683 """Execute complete QuantumGraph.
685 Parameters
686 ----------
687 graph : `QuantumGraph`
688 Execution graph.
689 taskFactory : `~lsst.pipe.base.TaskFactory`
690 Task factory
691 args : `types.SimpleNamespace`
692 Parsed command line
693 butler : `~lsst.daf.butler.Butler`, optional
694 Data Butler instance, if not defined then new instance is made
695 using command line options.
696 """
697 # make sure that --extend-run always enables --skip-existing
698 if args.extend_run:
699 args.skip_existing = True
701 # make butler instance
702 if butler is None:
703 butler = _ButlerFactory.makeWriteButler(args, graph.iterTaskGraph())
705 if args.skip_existing:
706 args.skip_existing_in += (butler.run,)
708 # Enable lsstDebug debugging. Note that this is done once in the
709 # main process before PreExecInit and it is also repeated before
710 # running each task in SingleQuantumExecutor (which may not be
711 # needed if `multipocessing` always uses fork start method).
712 if args.enableLsstDebug:
713 try:
714 _LOG.debug("Will try to import debug.py")
715 import debug # type: ignore # noqa:F401
716 except ImportError:
717 _LOG.warn("No 'debug' module found.")
719 # Save all InitOutputs, configs, etc.
720 preExecInit = PreExecInit(butler, taskFactory, extendRun=args.extend_run, mock=args.mock)
721 preExecInit.initialize(
722 graph,
723 saveInitOutputs=not args.skip_init_writes,
724 registerDatasetTypes=args.register_dataset_types,
725 saveVersions=not args.no_versions,
726 )
728 if not args.init_only:
729 graphFixup = self._importGraphFixup(args)
730 quantumExecutor = SingleQuantumExecutor(
731 taskFactory,
732 skipExistingIn=args.skip_existing_in,
733 clobberOutputs=args.clobber_outputs,
734 enableLsstDebug=args.enableLsstDebug,
735 exitOnKnownError=args.fail_fast,
736 mock=args.mock,
737 mock_configs=args.mock_configs,
738 )
739 timeout = self.MP_TIMEOUT if args.timeout is None else args.timeout
740 executor = MPGraphExecutor(
741 numProc=args.processes,
742 timeout=timeout,
743 startMethod=args.start_method,
744 quantumExecutor=quantumExecutor,
745 failFast=args.fail_fast,
746 pdb=args.pdb,
747 executionGraphFixup=graphFixup,
748 )
749 try:
750 with util.profile(args.profile, _LOG):
751 executor.execute(graph, butler)
752 finally:
753 if args.summary:
754 report = executor.getReport()
755 if report:
756 with open(args.summary, "w") as out:
757 # Do not save fields that are not set.
758 out.write(report.json(exclude_none=True, indent=2))
760 def showInfo(
761 self, args: SimpleNamespace, pipeline: Pipeline, graph: Optional[QuantumGraph] = None
762 ) -> None:
763 """Display useful info about pipeline and environment.
765 Parameters
766 ----------
767 args : `types.SimpleNamespace`
768 Parsed command line
769 pipeline : `Pipeline`
770 Pipeline definition
771 graph : `QuantumGraph`, optional
772 Execution graph
773 """
774 showOpts = args.show
775 for what in showOpts:
776 showCommand, _, showArgs = what.partition("=")
778 if showCommand in ["pipeline", "config", "history", "tasks"]:
779 if not pipeline:
780 _LOG.warning("Pipeline is required for --show=%s", showCommand)
781 continue
783 if showCommand in ["graph", "workflow", "uri"]:
784 if not graph:
785 _LOG.warning("QuantumGraph is required for --show=%s", showCommand)
786 continue
788 if showCommand == "pipeline":
789 print(pipeline)
790 elif showCommand == "config":
791 self._showConfig(pipeline, showArgs, False)
792 elif showCommand == "dump-config":
793 self._showConfig(pipeline, showArgs, True)
794 elif showCommand == "history":
795 self._showConfigHistory(pipeline, showArgs)
796 elif showCommand == "tasks":
797 self._showTaskHierarchy(pipeline)
798 elif showCommand == "graph":
799 if graph:
800 self._showGraph(graph)
801 elif showCommand == "uri":
802 if graph:
803 self._showUri(graph, args)
804 elif showCommand == "workflow":
805 if graph:
806 self._showWorkflow(graph, args)
807 else:
808 print(
809 "Unknown value for show: %s (choose from '%s')"
810 % (what, "', '".join("pipeline config[=XXX] history=XXX tasks graph".split())),
811 file=sys.stderr,
812 )
813 sys.exit(1)
815 def _showConfig(self, pipeline: Pipeline, showArgs: str, dumpFullConfig: bool) -> None:
816 """Show task configuration
818 Parameters
819 ----------
820 pipeline : `Pipeline`
821 Pipeline definition
822 showArgs : `str`
823 Defines what to show
824 dumpFullConfig : `bool`
825 If true then dump complete task configuration with all imports.
826 """
827 stream: Any = sys.stdout
828 if dumpFullConfig:
829 # Task label can be given with this option
830 taskName = showArgs
831 else:
832 # The argument can have form [TaskLabel::][pattern:NOIGNORECASE]
833 matConfig = re.search(r"^(?:(\w+)::)?(?:config.)?(.+)?", showArgs)
834 assert matConfig is not None, "regex always matches"
835 taskName = matConfig.group(1)
836 pattern = matConfig.group(2)
837 if pattern:
838 stream = _FilteredStream(pattern)
840 tasks = util.filterTasks(pipeline, taskName)
841 if not tasks:
842 print("Pipeline has no tasks named {}".format(taskName), file=sys.stderr)
843 sys.exit(1)
845 for taskDef in tasks:
846 print("### Configuration for task `{}'".format(taskDef.label))
847 taskDef.config.saveToStream(stream, root="config", skipImports=not dumpFullConfig)
849 def _showConfigHistory(self, pipeline: Pipeline, showArgs: str) -> None:
850 """Show history for task configuration
852 Parameters
853 ----------
854 pipeline : `Pipeline`
855 Pipeline definition
856 showArgs : `str`
857 Defines what to show
858 """
860 taskName = None
861 pattern = None
862 matHistory = re.search(r"^(?:(\w+)::)?(?:config[.])?(.+)", showArgs)
863 if matHistory:
864 taskName = matHistory.group(1)
865 pattern = matHistory.group(2)
866 if not pattern:
867 print("Please provide a value with --show history (e.g. history=Task::param)", file=sys.stderr)
868 sys.exit(1)
870 tasks = util.filterTasks(pipeline, taskName)
871 if not tasks:
872 print(f"Pipeline has no tasks named {taskName}", file=sys.stderr)
873 sys.exit(1)
875 found = False
876 for taskDef in tasks:
878 config = taskDef.config
880 # Look for any matches in the config hierarchy for this name
881 for nmatch, thisName in enumerate(fnmatch.filter(config.names(), pattern)):
882 if nmatch > 0:
883 print("")
885 cpath, _, cname = thisName.rpartition(".")
886 try:
887 if not cpath:
888 # looking for top-level field
889 hconfig = taskDef.config
890 else:
891 hconfig = eval("config." + cpath, {}, {"config": config})
892 except AttributeError:
893 print(
894 f"Error: Unable to extract attribute {cpath} from task {taskDef.label}",
895 file=sys.stderr,
896 )
897 hconfig = None
899 # Sometimes we end up with a non-Config so skip those
900 if isinstance(hconfig, (pexConfig.Config, pexConfig.ConfigurableInstance)) and hasattr(
901 hconfig, cname
902 ):
903 print(f"### Configuration field for task `{taskDef.label}'")
904 print(pexConfigHistory.format(hconfig, cname))
905 found = True
907 if not found:
908 print(f"None of the tasks has field matching {pattern}", file=sys.stderr)
909 sys.exit(1)
911 def _showTaskHierarchy(self, pipeline: Pipeline) -> None:
912 """Print task hierarchy to stdout
914 Parameters
915 ----------
916 pipeline: `Pipeline`
917 """
918 for taskDef in pipeline.toExpandedPipeline():
919 print("### Subtasks for task `{}'".format(taskDef.taskName))
921 for configName, taskName in util.subTaskIter(taskDef.config):
922 print("{}: {}".format(configName, taskName))
924 def _showGraph(self, graph: QuantumGraph) -> None:
925 """Print quanta information to stdout
927 Parameters
928 ----------
929 graph : `QuantumGraph`
930 Execution graph.
931 """
932 for taskNode in graph.taskGraph:
933 print(taskNode)
935 for iq, quantum in enumerate(graph.getQuantaForTask(taskNode)):
936 print(" Quantum {}:".format(iq))
937 print(" inputs:")
938 for key, refs in quantum.inputs.items():
939 dataIds = ["DataId({})".format(ref.dataId) for ref in refs]
940 print(" {}: [{}]".format(key, ", ".join(dataIds)))
941 print(" outputs:")
942 for key, refs in quantum.outputs.items():
943 dataIds = ["DataId({})".format(ref.dataId) for ref in refs]
944 print(" {}: [{}]".format(key, ", ".join(dataIds)))
946 def _showWorkflow(self, graph: QuantumGraph, args: SimpleNamespace) -> None:
947 """Print quanta information and dependency to stdout
949 Parameters
950 ----------
951 graph : `QuantumGraph`
952 Execution graph.
953 args : `types.SimpleNamespace`
954 Parsed command line
955 """
956 for node in graph:
957 print(f"Quantum {node.nodeId}: {node.taskDef.taskName}")
958 for parent in graph.determineInputsToQuantumNode(node):
959 print(f"Parent Quantum {parent.nodeId} - Child Quantum {node.nodeId}")
961 def _showUri(self, graph: QuantumGraph, args: SimpleNamespace) -> None:
962 """Print input and predicted output URIs to stdout
964 Parameters
965 ----------
966 graph : `QuantumGraph`
967 Execution graph
968 args : `types.SimpleNamespace`
969 Parsed command line
970 """
972 def dumpURIs(thisRef: DatasetRef) -> None:
973 primary, components = butler.getURIs(thisRef, predict=True, run="TBD")
974 if primary:
975 print(f" {primary}")
976 else:
977 print(" (disassembled artifact)")
978 for compName, compUri in components.items():
979 print(f" {compName}: {compUri}")
981 butler = _ButlerFactory.makeReadButler(args)
982 for node in graph:
983 print(f"Quantum {node.nodeId}: {node.taskDef.taskName}")
984 print(" inputs:")
985 for key, refs in node.quantum.inputs.items():
986 for ref in refs:
987 dumpURIs(ref)
988 print(" outputs:")
989 for key, refs in node.quantum.outputs.items():
990 for ref in refs:
991 dumpURIs(ref)
993 def _importGraphFixup(self, args: SimpleNamespace) -> Optional[ExecutionGraphFixup]:
994 """Import/instantiate graph fixup object.
996 Parameters
997 ----------
998 args : `types.SimpleNamespace`
999 Parsed command line.
1001 Returns
1002 -------
1003 fixup : `ExecutionGraphFixup` or `None`
1005 Raises
1006 ------
1007 ValueError
1008 Raised if import fails, method call raises exception, or returned
1009 instance has unexpected type.
1010 """
1011 if args.graph_fixup:
1012 try:
1013 factory = doImportType(args.graph_fixup)
1014 except Exception as exc:
1015 raise ValueError("Failed to import graph fixup class/method") from exc
1016 try:
1017 fixup = factory()
1018 except Exception as exc:
1019 raise ValueError("Failed to make instance of graph fixup") from exc
1020 if not isinstance(fixup, ExecutionGraphFixup):
1021 raise ValueError("Graph fixup is not an instance of ExecutionGraphFixup class")
1022 return fixup
1023 return None