Coverage for python/lsst/ctrl/mpexec/cmdLineFwk.py: 16%
365 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-08-06 02:30 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-08-06 02:30 +0000
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Module defining CmdLineFwk class and related methods.
23"""
25from __future__ import annotations
27__all__ = ["CmdLineFwk"]
29import atexit
30import contextlib
31import copy
32import datetime
33import getpass
34import logging
35import shutil
36from collections.abc import Iterable, Mapping, Sequence
37from types import SimpleNamespace
38from typing import TYPE_CHECKING
40import astropy.units as u
41from astropy.table import Table
42from lsst.daf.butler import (
43 Butler,
44 CollectionType,
45 DatasetId,
46 DatasetRef,
47 DatastoreCacheManager,
48 QuantumBackedButler,
49)
50from lsst.daf.butler.registry import MissingCollectionError, RegistryDefaults
51from lsst.daf.butler.registry.wildcards import CollectionWildcard
52from lsst.pipe.base import (
53 ExecutionResources,
54 GraphBuilder,
55 Instrument,
56 Pipeline,
57 PipelineDatasetTypes,
58 QuantumGraph,
59 buildExecutionButler,
60)
61from lsst.utils import doImportType
62from lsst.utils.threads import disable_implicit_threading
64from . import util
65from .dotTools import graph2dot, pipeline2dot
66from .executionGraphFixup import ExecutionGraphFixup
67from .mpGraphExecutor import MPGraphExecutor
68from .preExecInit import PreExecInit, PreExecInitLimited
69from .singleQuantumExecutor import SingleQuantumExecutor
71if TYPE_CHECKING:
72 from lsst.daf.butler import (
73 Config,
74 DatasetType,
75 DatastoreRecordData,
76 DimensionUniverse,
77 LimitedButler,
78 Quantum,
79 Registry,
80 )
81 from lsst.pipe.base import TaskDef, TaskFactory
84# ----------------------------------
85# Local non-exported definitions --
86# ----------------------------------
88_LOG = logging.getLogger(__name__)
91class _OutputChainedCollectionInfo:
92 """A helper class for handling command-line arguments related to an output
93 `~lsst.daf.butler.CollectionType.CHAINED` collection.
95 Parameters
96 ----------
97 registry : `lsst.daf.butler.Registry`
98 Butler registry that collections will be added to and/or queried from.
99 name : `str`
100 Name of the collection given on the command line.
101 """
103 def __init__(self, registry: Registry, name: str):
104 self.name = name
105 try:
106 self.chain = tuple(registry.getCollectionChain(name))
107 self.exists = True
108 except MissingCollectionError:
109 self.chain = ()
110 self.exists = False
112 def __str__(self) -> str:
113 return self.name
115 name: str
116 """Name of the collection provided on the command line (`str`).
117 """
119 exists: bool
120 """Whether this collection already exists in the registry (`bool`).
121 """
123 chain: tuple[str, ...]
124 """The definition of the collection, if it already exists (`tuple`[`str`]).
126 Empty if the collection does not already exist.
127 """
130class _OutputRunCollectionInfo:
131 """A helper class for handling command-line arguments related to an output
132 `~lsst.daf.butler.CollectionType.RUN` collection.
134 Parameters
135 ----------
136 registry : `lsst.daf.butler.Registry`
137 Butler registry that collections will be added to and/or queried from.
138 name : `str`
139 Name of the collection given on the command line.
140 """
142 def __init__(self, registry: Registry, name: str):
143 self.name = name
144 try:
145 actualType = registry.getCollectionType(name)
146 if actualType is not CollectionType.RUN:
147 raise TypeError(f"Collection '{name}' exists but has type {actualType.name}, not RUN.")
148 self.exists = True
149 except MissingCollectionError:
150 self.exists = False
152 name: str
153 """Name of the collection provided on the command line (`str`).
154 """
156 exists: bool
157 """Whether this collection already exists in the registry (`bool`).
158 """
161class _ButlerFactory:
162 """A helper class for processing command-line arguments related to input
163 and output collections.
165 Parameters
166 ----------
167 registry : `lsst.daf.butler.Registry`
168 Butler registry that collections will be added to and/or queried from.
170 args : `types.SimpleNamespace`
171 Parsed command-line arguments. The following attributes are used,
172 either at construction or in later methods.
174 ``output``
175 The name of a `~lsst.daf.butler.CollectionType.CHAINED`
176 input/output collection.
178 ``output_run``
179 The name of a `~lsst.daf.butler.CollectionType.RUN` input/output
180 collection.
182 ``extend_run``
183 A boolean indicating whether ``output_run`` should already exist
184 and be extended.
186 ``replace_run``
187 A boolean indicating that (if `True`) ``output_run`` should already
188 exist but will be removed from the output chained collection and
189 replaced with a new one.
191 ``prune_replaced``
192 A boolean indicating whether to prune the replaced run (requires
193 ``replace_run``).
195 ``inputs``
196 Input collections of any type; see
197 :ref:`daf_butler_ordered_collection_searches` for details.
199 ``butler_config``
200 Path to a data repository root or configuration file.
202 writeable : `bool`
203 If `True`, a `~lsst.daf.butler.Butler` is being initialized in a
204 context where actual writes should happens, and hence no output run
205 is necessary.
207 Raises
208 ------
209 ValueError
210 Raised if ``writeable is True`` but there are no output collections.
211 """
213 def __init__(self, registry: Registry, args: SimpleNamespace, writeable: bool):
214 if args.output is not None:
215 self.output = _OutputChainedCollectionInfo(registry, args.output)
216 else:
217 self.output = None
218 if args.output_run is not None:
219 self.outputRun = _OutputRunCollectionInfo(registry, args.output_run)
220 elif self.output is not None:
221 if args.extend_run:
222 if not self.output.chain:
223 raise ValueError("Cannot use --extend-run option with non-existing or empty output chain")
224 runName = self.output.chain[0]
225 else:
226 runName = f"{self.output}/{Instrument.makeCollectionTimestamp()}"
227 self.outputRun = _OutputRunCollectionInfo(registry, runName)
228 elif not writeable:
229 # If we're not writing yet, ok to have no output run.
230 self.outputRun = None
231 else:
232 raise ValueError("Cannot write without at least one of (--output, --output-run).")
233 # Recursively flatten any input CHAINED collections. We do this up
234 # front so we can tell if the user passes the same inputs on subsequent
235 # calls, even though we also flatten when we define the output CHAINED
236 # collection.
237 self.inputs = tuple(registry.queryCollections(args.input, flattenChains=True)) if args.input else ()
239 def check(self, args: SimpleNamespace) -> None:
240 """Check command-line options for consistency with each other and the
241 data repository.
243 Parameters
244 ----------
245 args : `types.SimpleNamespace`
246 Parsed command-line arguments. See class documentation for the
247 construction parameter of the same name.
248 """
249 assert not (args.extend_run and args.replace_run), "In mutually-exclusive group in ArgumentParser."
250 if self.inputs and self.output is not None and self.output.exists:
251 # Passing the same inputs that were used to initialize the output
252 # collection is allowed; this means they must _end_ with the same
253 # collections, because we push new runs to the front of the chain.
254 for c1, c2 in zip(self.inputs[::-1], self.output.chain[::-1], strict=False):
255 if c1 != c2:
256 raise ValueError(
257 f"Output CHAINED collection {self.output.name!r} exists, but it ends with "
258 "a different sequence of input collections than those given: "
259 f"{c1!r} != {c2!r} in inputs={self.inputs} vs "
260 f"{self.output.name}={self.output.chain}."
261 )
262 if len(self.inputs) > len(self.output.chain):
263 nNew = len(self.inputs) - len(self.output.chain)
264 raise ValueError(
265 f"Cannot add new input collections {self.inputs[:nNew]} after "
266 "output collection is first created."
267 )
268 if args.extend_run:
269 if self.outputRun is None:
270 raise ValueError("Cannot --extend-run when no output collection is given.")
271 elif not self.outputRun.exists:
272 raise ValueError(
273 f"Cannot --extend-run; output collection '{self.outputRun.name}' does not exist."
274 )
275 if not args.extend_run and self.outputRun is not None and self.outputRun.exists:
276 raise ValueError(
277 f"Output run '{self.outputRun.name}' already exists, but --extend-run was not given."
278 )
279 if args.prune_replaced and not args.replace_run:
280 raise ValueError("--prune-replaced requires --replace-run.")
281 if args.replace_run and (self.output is None or not self.output.exists):
282 raise ValueError("--output must point to an existing CHAINED collection for --replace-run.")
284 @classmethod
285 def _makeReadParts(cls, args: SimpleNamespace) -> tuple[Butler, Sequence[str], _ButlerFactory]:
286 """Parse arguments to support implementations of `makeReadButler` and
287 `makeButlerAndCollections`.
289 Parameters
290 ----------
291 args : `types.SimpleNamespace`
292 Parsed command-line arguments. See class documentation for the
293 construction parameter of the same name.
295 Returns
296 -------
297 butler : `lsst.daf.butler.Butler`
298 A read-only butler constructed from the repo at
299 ``args.butler_config``, but with no default collections.
300 inputs : `~collections.abc.Sequence` [ `str` ]
301 A collection search path constructed according to ``args``.
302 self : `_ButlerFactory`
303 A new `_ButlerFactory` instance representing the processed version
304 of ``args``.
305 """
306 butler = Butler(args.butler_config, writeable=False)
307 self = cls(butler.registry, args, writeable=False)
308 self.check(args)
309 if self.output and self.output.exists:
310 if args.replace_run:
311 replaced = self.output.chain[0]
312 inputs = list(self.output.chain[1:])
313 _LOG.debug(
314 "Simulating collection search in '%s' after removing '%s'.", self.output.name, replaced
315 )
316 else:
317 inputs = [self.output.name]
318 else:
319 inputs = list(self.inputs)
320 if args.extend_run:
321 assert self.outputRun is not None, "Output collection has to be specified."
322 inputs.insert(0, self.outputRun.name)
323 collSearch = CollectionWildcard.from_expression(inputs).require_ordered()
324 return butler, collSearch, self
326 @classmethod
327 def makeReadButler(cls, args: SimpleNamespace) -> Butler:
328 """Construct a read-only butler according to the given command-line
329 arguments.
331 Parameters
332 ----------
333 args : `types.SimpleNamespace`
334 Parsed command-line arguments. See class documentation for the
335 construction parameter of the same name.
337 Returns
338 -------
339 butler : `lsst.daf.butler.Butler`
340 A read-only butler initialized with the collections specified by
341 ``args``.
342 """
343 cls.defineDatastoreCache() # Ensure that this butler can use a shared cache.
344 butler, inputs, _ = cls._makeReadParts(args)
345 _LOG.debug("Preparing butler to read from %s.", inputs)
346 return Butler(butler=butler, collections=inputs)
348 @classmethod
349 def makeButlerAndCollections(cls, args: SimpleNamespace) -> tuple[Butler, Sequence[str], str | None]:
350 """Return a read-only registry, a collection search path, and the name
351 of the run to be used for future writes.
353 Parameters
354 ----------
355 args : `types.SimpleNamespace`
356 Parsed command-line arguments. See class documentation for the
357 construction parameter of the same name.
359 Returns
360 -------
361 butler : `lsst.daf.butler.Butler`
362 A read-only butler that collections will be added to and/or queried
363 from.
364 inputs : `Sequence` [ `str` ]
365 Collections to search for datasets.
366 run : `str` or `None`
367 Name of the output `~lsst.daf.butler.CollectionType.RUN` collection
368 if it already exists, or `None` if it does not.
369 """
370 butler, inputs, self = cls._makeReadParts(args)
371 run: str | None = None
372 if args.extend_run:
373 assert self.outputRun is not None, "Output collection has to be specified."
374 if self.outputRun is not None:
375 run = self.outputRun.name
376 _LOG.debug("Preparing registry to read from %s and expect future writes to '%s'.", inputs, run)
377 return butler, inputs, run
379 @staticmethod
380 def defineDatastoreCache() -> None:
381 """Define where datastore cache directories should be found.
383 Notes
384 -----
385 All the jobs should share a datastore cache if applicable. This
386 method asks for a shared fallback cache to be defined and then
387 configures an exit handler to clean it up.
388 """
389 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset()
390 if defined:
391 atexit.register(shutil.rmtree, cache_dir, ignore_errors=True)
392 _LOG.debug("Defining shared datastore cache directory to %s", cache_dir)
394 @classmethod
395 def makeWriteButler(cls, args: SimpleNamespace, taskDefs: Iterable[TaskDef] | None = None) -> Butler:
396 """Return a read-write butler initialized to write to and read from
397 the collections specified by the given command-line arguments.
399 Parameters
400 ----------
401 args : `types.SimpleNamespace`
402 Parsed command-line arguments. See class documentation for the
403 construction parameter of the same name.
404 taskDefs : iterable of `TaskDef`, optional
405 Definitions for tasks in a pipeline. This argument is only needed
406 if ``args.replace_run`` is `True` and ``args.prune_replaced`` is
407 "unstore".
409 Returns
410 -------
411 butler : `lsst.daf.butler.Butler`
412 A read-write butler initialized according to the given arguments.
413 """
414 cls.defineDatastoreCache() # Ensure that this butler can use a shared cache.
415 butler = Butler(args.butler_config, writeable=True)
416 self = cls(butler.registry, args, writeable=True)
417 self.check(args)
418 assert self.outputRun is not None, "Output collection has to be specified." # for mypy
419 if self.output is not None:
420 chainDefinition = list(self.output.chain if self.output.exists else self.inputs)
421 if args.replace_run:
422 replaced = chainDefinition.pop(0)
423 if args.prune_replaced == "unstore":
424 # Remove datasets from datastore
425 with butler.transaction():
426 refs: Iterable[DatasetRef] = butler.registry.queryDatasets(..., collections=replaced)
427 # we want to remove regular outputs but keep
428 # initOutputs, configs, and versions.
429 if taskDefs is not None:
430 initDatasetNames = set(PipelineDatasetTypes.initOutputNames(taskDefs))
431 refs = [ref for ref in refs if ref.datasetType.name not in initDatasetNames]
432 butler.pruneDatasets(refs, unstore=True, disassociate=False)
433 elif args.prune_replaced == "purge":
434 # Erase entire collection and all datasets, need to remove
435 # collection from its chain collection first.
436 with butler.transaction():
437 butler.registry.setCollectionChain(self.output.name, chainDefinition, flatten=True)
438 butler.removeRuns([replaced], unstore=True)
439 elif args.prune_replaced is not None:
440 raise NotImplementedError(f"Unsupported --prune-replaced option '{args.prune_replaced}'.")
441 if not self.output.exists:
442 butler.registry.registerCollection(self.output.name, CollectionType.CHAINED)
443 if not args.extend_run:
444 butler.registry.registerCollection(self.outputRun.name, CollectionType.RUN)
445 chainDefinition.insert(0, self.outputRun.name)
446 butler.registry.setCollectionChain(self.output.name, chainDefinition, flatten=True)
447 _LOG.debug(
448 "Preparing butler to write to '%s' and read from '%s'=%s",
449 self.outputRun.name,
450 self.output.name,
451 chainDefinition,
452 )
453 butler.registry.defaults = RegistryDefaults(run=self.outputRun.name, collections=self.output.name)
454 else:
455 inputs = (self.outputRun.name,) + self.inputs
456 _LOG.debug("Preparing butler to write to '%s' and read from %s.", self.outputRun.name, inputs)
457 butler.registry.defaults = RegistryDefaults(run=self.outputRun.name, collections=inputs)
458 return butler
460 output: _OutputChainedCollectionInfo | None
461 """Information about the output chained collection, if there is or will be
462 one (`_OutputChainedCollectionInfo` or `None`).
463 """
465 outputRun: _OutputRunCollectionInfo | None
466 """Information about the output run collection, if there is or will be
467 one (`_OutputRunCollectionInfo` or `None`).
468 """
470 inputs: tuple[str, ...]
471 """Input collections provided directly by the user (`tuple` [ `str` ]).
472 """
475class _QBBFactory:
476 """Class which is a callable for making QBB instances."""
478 def __init__(
479 self, butler_config: Config, dimensions: DimensionUniverse, dataset_types: Mapping[str, DatasetType]
480 ):
481 self.butler_config = butler_config
482 self.dimensions = dimensions
483 self.dataset_types = dataset_types
485 def __call__(self, quantum: Quantum) -> LimitedButler:
486 """Return freshly initialized `~lsst.daf.butler.QuantumBackedButler`.
488 Factory method to create QuantumBackedButler instances.
489 """
490 return QuantumBackedButler.initialize(
491 config=self.butler_config,
492 quantum=quantum,
493 dimensions=self.dimensions,
494 dataset_types=self.dataset_types,
495 )
498# ------------------------
499# Exported definitions --
500# ------------------------
503class CmdLineFwk:
504 """PipelineTask framework which executes tasks from command line.
506 In addition to executing tasks this activator provides additional methods
507 for task management like dumping configuration or execution chain.
508 """
510 MP_TIMEOUT = 3600 * 24 * 30 # Default timeout (sec) for multiprocessing
512 def __init__(self) -> None:
513 pass
515 def makePipeline(self, args: SimpleNamespace) -> Pipeline:
516 """Build a pipeline from command line arguments.
518 Parameters
519 ----------
520 args : `types.SimpleNamespace`
521 Parsed command line
523 Returns
524 -------
525 pipeline : `~lsst.pipe.base.Pipeline`
526 """
527 if args.pipeline:
528 pipeline = Pipeline.from_uri(args.pipeline)
529 else:
530 pipeline = Pipeline("anonymous")
532 # loop over all pipeline actions and apply them in order
533 for action in args.pipeline_actions:
534 if action.action == "add_instrument":
535 pipeline.addInstrument(action.value)
537 elif action.action == "new_task":
538 pipeline.addTask(action.value, action.label)
540 elif action.action == "delete_task":
541 pipeline.removeTask(action.label)
543 elif action.action == "config":
544 # action value string is "field=value", split it at '='
545 field, _, value = action.value.partition("=")
546 pipeline.addConfigOverride(action.label, field, value)
548 elif action.action == "configfile":
549 pipeline.addConfigFile(action.label, action.value)
551 else:
552 raise ValueError(f"Unexpected pipeline action: {action.action}")
554 if args.save_pipeline:
555 pipeline.write_to_uri(args.save_pipeline)
557 if args.pipeline_dot:
558 pipeline2dot(pipeline, args.pipeline_dot)
560 return pipeline
562 def makeGraph(self, pipeline: Pipeline, args: SimpleNamespace) -> QuantumGraph | None:
563 """Build a graph from command line arguments.
565 Parameters
566 ----------
567 pipeline : `~lsst.pipe.base.Pipeline`
568 Pipeline, can be empty or ``None`` if graph is read from a file.
569 args : `types.SimpleNamespace`
570 Parsed command line
572 Returns
573 -------
574 graph : `~lsst.pipe.base.QuantumGraph` or `None`
575 If resulting graph is empty then `None` is returned.
576 """
577 # make sure that --extend-run always enables --skip-existing
578 if args.extend_run:
579 args.skip_existing = True
581 butler, collections, run = _ButlerFactory.makeButlerAndCollections(args)
583 if args.skip_existing and run:
584 args.skip_existing_in += (run,)
586 if args.qgraph:
587 # click passes empty tuple as default value for qgraph_node_id
588 nodes = args.qgraph_node_id or None
589 qgraph = QuantumGraph.loadUri(args.qgraph, butler.dimensions, nodes=nodes, graphID=args.qgraph_id)
591 # pipeline can not be provided in this case
592 if pipeline:
593 raise ValueError("Pipeline must not be given when quantum graph is read from file.")
594 if args.show_qgraph_header:
595 print(QuantumGraph.readHeader(args.qgraph))
596 else:
597 task_defs = list(pipeline.toExpandedPipeline())
598 if args.mock:
599 from lsst.pipe.base.tests.mocks import mock_task_defs
601 task_defs = mock_task_defs(
602 task_defs,
603 unmocked_dataset_types=args.unmocked_dataset_types,
604 force_failures=args.mock_failure,
605 )
606 # make execution plan (a.k.a. DAG) for pipeline
607 graphBuilder = GraphBuilder(
608 butler.registry,
609 skipExistingIn=args.skip_existing_in,
610 clobberOutputs=args.clobber_outputs,
611 datastore=butler._datastore if args.qgraph_datastore_records else None,
612 )
613 # accumulate metadata
614 metadata = {
615 "input": args.input,
616 "output": args.output,
617 "butler_argument": args.butler_config,
618 "output_run": run,
619 "extend_run": args.extend_run,
620 "skip_existing_in": args.skip_existing_in,
621 "skip_existing": args.skip_existing,
622 "data_query": args.data_query,
623 "user": getpass.getuser(),
624 "time": f"{datetime.datetime.now()}",
625 }
626 assert run is not None, "Butler output run collection must be defined"
627 qgraph = graphBuilder.makeGraph(
628 task_defs,
629 collections,
630 run,
631 args.data_query,
632 metadata=metadata,
633 datasetQueryConstraint=args.dataset_query_constraint,
634 dataId=pipeline.get_data_id(butler.dimensions),
635 )
636 if args.show_qgraph_header:
637 qgraph.buildAndPrintHeader()
639 # Count quanta in graph; give a warning if it's empty and return None.
640 nQuanta = len(qgraph)
641 if nQuanta == 0:
642 return None
643 else:
644 if _LOG.isEnabledFor(logging.INFO):
645 qg_task_table = self._generateTaskTable(qgraph)
646 qg_task_table_formatted = "\n".join(qg_task_table.pformat_all())
647 _LOG.info(
648 "QuantumGraph contains %d quanta for %d tasks, graph ID: %r\n%s",
649 nQuanta,
650 len(qgraph.taskGraph),
651 qgraph.graphID,
652 qg_task_table_formatted,
653 )
655 if args.save_qgraph:
656 qgraph.saveUri(args.save_qgraph)
658 if args.save_single_quanta:
659 for quantumNode in qgraph:
660 sqgraph = qgraph.subset(quantumNode)
661 uri = args.save_single_quanta.format(quantumNode)
662 sqgraph.saveUri(uri)
664 if args.qgraph_dot:
665 graph2dot(qgraph, args.qgraph_dot)
667 if args.execution_butler_location:
668 butler = Butler(args.butler_config)
669 newArgs = copy.deepcopy(args)
671 def builderShim(butler: Butler) -> Butler:
672 newArgs.butler_config = butler._config
673 # Calling makeWriteButler is done for the side effects of
674 # calling that method, maining parsing all the args into
675 # collection names, creating collections, etc.
676 newButler = _ButlerFactory.makeWriteButler(newArgs)
677 return newButler
679 # Include output collection in collections for input
680 # files if it exists in the repo.
681 all_inputs = args.input
682 if args.output is not None:
683 with contextlib.suppress(MissingCollectionError):
684 all_inputs += (next(iter(butler.registry.queryCollections(args.output))),)
686 _LOG.debug("Calling buildExecutionButler with collections=%s", all_inputs)
687 buildExecutionButler(
688 butler,
689 qgraph,
690 args.execution_butler_location,
691 run,
692 butlerModifier=builderShim,
693 collections=all_inputs,
694 clobber=args.clobber_execution_butler,
695 datastoreRoot=args.target_datastore_root,
696 transfer=args.transfer,
697 )
699 return qgraph
701 def _make_execution_resources(self, args: SimpleNamespace) -> ExecutionResources:
702 """Construct the execution resource class from arguments.
704 Parameters
705 ----------
706 args : `types.SimpleNamespace`
707 Parsed command line.
709 Returns
710 -------
711 resources : `~lsst.pipe.base.ExecutionResources`
712 The resources available to each quantum.
713 """
714 return ExecutionResources(
715 num_cores=args.cores_per_quantum, max_mem=args.memory_per_quantum, default_mem_units=u.MB
716 )
718 def runPipeline(
719 self,
720 graph: QuantumGraph,
721 taskFactory: TaskFactory,
722 args: SimpleNamespace,
723 butler: Butler | None = None,
724 ) -> None:
725 """Execute complete QuantumGraph.
727 Parameters
728 ----------
729 graph : `~lsst.pipe.base.QuantumGraph`
730 Execution graph.
731 taskFactory : `~lsst.pipe.base.TaskFactory`
732 Task factory
733 args : `types.SimpleNamespace`
734 Parsed command line
735 butler : `~lsst.daf.butler.Butler`, optional
736 Data Butler instance, if not defined then new instance is made
737 using command line options.
738 """
739 # Check that output run defined on command line is consistent with
740 # quantum graph.
741 if args.output_run and graph.metadata:
742 graph_output_run = graph.metadata.get("output_run", args.output_run)
743 if graph_output_run != args.output_run:
744 raise ValueError(
745 f"Output run defined on command line ({args.output_run}) has to be "
746 f"identical to graph metadata ({graph_output_run}). "
747 "To update graph metadata run `pipetask update-graph-run` command."
748 )
750 # Make sure that --extend-run always enables --skip-existing,
751 # clobbering should be disabled if --extend-run is not specified.
752 if args.extend_run:
753 args.skip_existing = True
754 else:
755 args.clobber_outputs = False
757 if not args.enable_implicit_threading:
758 disable_implicit_threading()
760 # Make butler instance. QuantumGraph should have an output run defined,
761 # but we ignore it here and let command line decide actual output run.
762 if butler is None:
763 butler = _ButlerFactory.makeWriteButler(args, graph.iterTaskGraph())
765 if args.skip_existing:
766 args.skip_existing_in += (butler.run,)
768 # Enable lsstDebug debugging. Note that this is done once in the
769 # main process before PreExecInit and it is also repeated before
770 # running each task in SingleQuantumExecutor (which may not be
771 # needed if `multiprocessing` always uses fork start method).
772 if args.enableLsstDebug:
773 try:
774 _LOG.debug("Will try to import debug.py")
775 import debug # type: ignore # noqa:F401
776 except ImportError:
777 _LOG.warn("No 'debug' module found.")
779 # Save all InitOutputs, configs, etc.
780 preExecInit = PreExecInit(butler, taskFactory, extendRun=args.extend_run)
781 preExecInit.initialize(
782 graph,
783 saveInitOutputs=not args.skip_init_writes,
784 registerDatasetTypes=args.register_dataset_types,
785 saveVersions=not args.no_versions,
786 )
788 if not args.init_only:
789 graphFixup = self._importGraphFixup(args)
790 resources = self._make_execution_resources(args)
791 quantumExecutor = SingleQuantumExecutor(
792 butler,
793 taskFactory,
794 skipExistingIn=args.skip_existing_in,
795 clobberOutputs=args.clobber_outputs,
796 enableLsstDebug=args.enableLsstDebug,
797 exitOnKnownError=args.fail_fast,
798 resources=resources,
799 )
801 timeout = self.MP_TIMEOUT if args.timeout is None else args.timeout
802 executor = MPGraphExecutor(
803 numProc=args.processes,
804 timeout=timeout,
805 startMethod=args.start_method,
806 quantumExecutor=quantumExecutor,
807 failFast=args.fail_fast,
808 pdb=args.pdb,
809 executionGraphFixup=graphFixup,
810 )
811 # Have to reset connection pool to avoid sharing connections with
812 # forked processes.
813 butler.registry.resetConnectionPool()
814 try:
815 with util.profile(args.profile, _LOG):
816 executor.execute(graph)
817 finally:
818 if args.summary:
819 report = executor.getReport()
820 if report:
821 with open(args.summary, "w") as out:
822 # Do not save fields that are not set.
823 out.write(report.json(exclude_none=True, indent=2))
825 def _generateTaskTable(self, qgraph: QuantumGraph) -> Table:
826 """Generate astropy table listing the number of quanta per task for a
827 given quantum graph.
829 Parameters
830 ----------
831 qgraph : `lsst.pipe.base.graph.graph.QuantumGraph`
832 A QuantumGraph object.
834 Returns
835 -------
836 qg_task_table : `astropy.table.table.Table`
837 An astropy table containing columns: Quanta and Tasks.
838 """
839 qg_quanta, qg_tasks = [], []
840 for task_def in qgraph.iterTaskGraph():
841 num_qnodes = qgraph.getNumberOfQuantaForTask(task_def)
842 qg_quanta.append(num_qnodes)
843 qg_tasks.append(task_def.label)
844 qg_task_table = Table(dict(Quanta=qg_quanta, Tasks=qg_tasks))
845 return qg_task_table
847 def _importGraphFixup(self, args: SimpleNamespace) -> ExecutionGraphFixup | None:
848 """Import/instantiate graph fixup object.
850 Parameters
851 ----------
852 args : `types.SimpleNamespace`
853 Parsed command line.
855 Returns
856 -------
857 fixup : `ExecutionGraphFixup` or `None`
859 Raises
860 ------
861 ValueError
862 Raised if import fails, method call raises exception, or returned
863 instance has unexpected type.
864 """
865 if args.graph_fixup:
866 try:
867 factory = doImportType(args.graph_fixup)
868 except Exception as exc:
869 raise ValueError("Failed to import graph fixup class/method") from exc
870 try:
871 fixup = factory()
872 except Exception as exc:
873 raise ValueError("Failed to make instance of graph fixup") from exc
874 if not isinstance(fixup, ExecutionGraphFixup):
875 raise ValueError("Graph fixup is not an instance of ExecutionGraphFixup class")
876 return fixup
877 return None
879 def preExecInitQBB(self, task_factory: TaskFactory, args: SimpleNamespace) -> None:
880 # Load quantum graph. We do not really need individual Quanta here,
881 # but we need datastore records for initInputs, and those are only
882 # available from Quanta, so load the whole thing.
883 qgraph = QuantumGraph.loadUri(args.qgraph, graphID=args.qgraph_id)
884 universe = qgraph.universe
886 # Collect all init input/output dataset IDs.
887 predicted_inputs: set[DatasetId] = set()
888 predicted_outputs: set[DatasetId] = set()
889 for taskDef in qgraph.iterTaskGraph():
890 if (refs := qgraph.initInputRefs(taskDef)) is not None:
891 predicted_inputs.update(ref.id for ref in refs)
892 if (refs := qgraph.initOutputRefs(taskDef)) is not None:
893 predicted_outputs.update(ref.id for ref in refs)
894 predicted_outputs.update(ref.id for ref in qgraph.globalInitOutputRefs())
895 # remove intermediates from inputs
896 predicted_inputs -= predicted_outputs
898 # Very inefficient way to extract datastore records from quantum graph,
899 # we have to scan all quanta and look at their datastore records.
900 datastore_records: dict[str, DatastoreRecordData] = {}
901 for quantum_node in qgraph:
902 for store_name, records in quantum_node.quantum.datastore_records.items():
903 subset = records.subset(predicted_inputs)
904 if subset is not None:
905 datastore_records.setdefault(store_name, DatastoreRecordData()).update(subset)
907 dataset_types = {dstype.name: dstype for dstype in qgraph.registryDatasetTypes()}
909 # Make butler from everything.
910 butler = QuantumBackedButler.from_predicted(
911 config=args.butler_config,
912 predicted_inputs=predicted_inputs,
913 predicted_outputs=predicted_outputs,
914 dimensions=universe,
915 datastore_records=datastore_records,
916 search_paths=args.config_search_path,
917 dataset_types=dataset_types,
918 )
920 # Save all InitOutputs, configs, etc.
921 preExecInit = PreExecInitLimited(butler, task_factory)
922 preExecInit.initialize(qgraph)
924 def runGraphQBB(self, task_factory: TaskFactory, args: SimpleNamespace) -> None:
925 # Load quantum graph.
926 nodes = args.qgraph_node_id or None
927 qgraph = QuantumGraph.loadUri(args.qgraph, nodes=nodes, graphID=args.qgraph_id)
929 if qgraph.metadata is None:
930 raise ValueError("QuantumGraph is missing metadata, cannot ")
932 dataset_types = {dstype.name: dstype for dstype in qgraph.registryDatasetTypes()}
934 _butler_factory = _QBBFactory(
935 butler_config=args.butler_config,
936 dimensions=qgraph.universe,
937 dataset_types=dataset_types,
938 )
940 # make special quantum executor
941 resources = self._make_execution_resources(args)
942 quantumExecutor = SingleQuantumExecutor(
943 butler=None,
944 taskFactory=task_factory,
945 enableLsstDebug=args.enableLsstDebug,
946 exitOnKnownError=args.fail_fast,
947 limited_butler_factory=_butler_factory,
948 resources=resources,
949 )
951 timeout = self.MP_TIMEOUT if args.timeout is None else args.timeout
952 executor = MPGraphExecutor(
953 numProc=args.processes,
954 timeout=timeout,
955 startMethod=args.start_method,
956 quantumExecutor=quantumExecutor,
957 failFast=args.fail_fast,
958 pdb=args.pdb,
959 )
960 try:
961 with util.profile(args.profile, _LOG):
962 executor.execute(qgraph)
963 finally:
964 if args.summary:
965 report = executor.getReport()
966 if report:
967 with open(args.summary, "w") as out:
968 # Do not save fields that are not set.
969 out.write(report.json(exclude_none=True, indent=2))