Coverage for python/lsst/ctrl/mpexec/cmdLineFwk.py: 16%
394 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-10 03:29 -0700
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-10 03:29 -0700
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28"""Module defining CmdLineFwk class and related methods.
29"""
31from __future__ import annotations
33__all__ = ["CmdLineFwk"]
35import atexit
36import contextlib
37import copy
38import datetime
39import getpass
40import logging
41import shutil
42from collections.abc import Mapping, Sequence
43from types import SimpleNamespace
45import astropy.units as u
46import lsst.utils.timer
47from astropy.table import Table
48from lsst.daf.butler import (
49 Butler,
50 CollectionType,
51 Config,
52 DatasetId,
53 DatasetType,
54 DimensionUniverse,
55 LimitedButler,
56 Quantum,
57 QuantumBackedButler,
58 Registry,
59)
60from lsst.daf.butler.datastore.cache_manager import DatastoreCacheManager
61from lsst.daf.butler.datastore.record_data import DatastoreRecordData
62from lsst.daf.butler.direct_butler import DirectButler
63from lsst.daf.butler.registry import MissingCollectionError, RegistryDefaults
64from lsst.daf.butler.registry.wildcards import CollectionWildcard
65from lsst.pipe.base import (
66 ExecutionResources,
67 Instrument,
68 Pipeline,
69 PipelineGraph,
70 QuantumGraph,
71 TaskFactory,
72 buildExecutionButler,
73)
74from lsst.pipe.base.all_dimensions_quantum_graph_builder import AllDimensionsQuantumGraphBuilder
75from lsst.pipe.base.pipeline_graph import NodeType
76from lsst.utils import doImportType
77from lsst.utils.logging import getLogger
78from lsst.utils.threads import disable_implicit_threading
80from .dotTools import graph2dot, pipeline2dot
81from .executionGraphFixup import ExecutionGraphFixup
82from .mpGraphExecutor import MPGraphExecutor
83from .preExecInit import PreExecInit, PreExecInitLimited
84from .singleQuantumExecutor import SingleQuantumExecutor
86# ----------------------------------
87# Local non-exported definitions --
88# ----------------------------------
90_LOG = getLogger(__name__)
93class _OutputChainedCollectionInfo:
94 """A helper class for handling command-line arguments related to an output
95 `~lsst.daf.butler.CollectionType.CHAINED` collection.
97 Parameters
98 ----------
99 registry : `lsst.daf.butler.Registry`
100 Butler registry that collections will be added to and/or queried from.
101 name : `str`
102 Name of the collection given on the command line.
103 """
105 def __init__(self, registry: Registry, name: str):
106 self.name = name
107 try:
108 self.chain = tuple(registry.getCollectionChain(name))
109 self.exists = True
110 except MissingCollectionError:
111 self.chain = ()
112 self.exists = False
114 def __str__(self) -> str:
115 return self.name
117 name: str
118 """Name of the collection provided on the command line (`str`).
119 """
121 exists: bool
122 """Whether this collection already exists in the registry (`bool`).
123 """
125 chain: tuple[str, ...]
126 """The definition of the collection, if it already exists (`tuple`[`str`]).
128 Empty if the collection does not already exist.
129 """
132class _OutputRunCollectionInfo:
133 """A helper class for handling command-line arguments related to an output
134 `~lsst.daf.butler.CollectionType.RUN` collection.
136 Parameters
137 ----------
138 registry : `lsst.daf.butler.Registry`
139 Butler registry that collections will be added to and/or queried from.
140 name : `str`
141 Name of the collection given on the command line.
142 """
144 def __init__(self, registry: Registry, name: str):
145 self.name = name
146 try:
147 actualType = registry.getCollectionType(name)
148 if actualType is not CollectionType.RUN:
149 raise TypeError(f"Collection '{name}' exists but has type {actualType.name}, not RUN.")
150 self.exists = True
151 except MissingCollectionError:
152 self.exists = False
154 name: str
155 """Name of the collection provided on the command line (`str`).
156 """
158 exists: bool
159 """Whether this collection already exists in the registry (`bool`).
160 """
163class _ButlerFactory:
164 """A helper class for processing command-line arguments related to input
165 and output collections.
167 Parameters
168 ----------
169 registry : `lsst.daf.butler.Registry`
170 Butler registry that collections will be added to and/or queried from.
172 args : `types.SimpleNamespace`
173 Parsed command-line arguments. The following attributes are used,
174 either at construction or in later methods.
176 ``output``
177 The name of a `~lsst.daf.butler.CollectionType.CHAINED`
178 input/output collection.
180 ``output_run``
181 The name of a `~lsst.daf.butler.CollectionType.RUN` input/output
182 collection.
184 ``extend_run``
185 A boolean indicating whether ``output_run`` should already exist
186 and be extended.
188 ``replace_run``
189 A boolean indicating that (if `True`) ``output_run`` should already
190 exist but will be removed from the output chained collection and
191 replaced with a new one.
193 ``prune_replaced``
194 A boolean indicating whether to prune the replaced run (requires
195 ``replace_run``).
197 ``rebase``
198 A boolean indicating whether to force the ``output`` collection
199 to be consistent with ``inputs`` and ``output`` run such that the
200 ``output`` collection has output run collections first (i.e. those
201 that start with the same prefix), then the new inputs, then any
202 original inputs not included in the new inputs.
204 ``inputs``
205 Input collections of any type; see
206 :ref:`daf_butler_ordered_collection_searches` for details.
208 ``butler_config``
209 Path to a data repository root or configuration file.
211 writeable : `bool`
212 If `True`, a `~lsst.daf.butler.Butler` is being initialized in a
213 context where actual writes should happens, and hence no output run
214 is necessary.
216 Raises
217 ------
218 ValueError
219 Raised if ``writeable is True`` but there are no output collections.
220 """
222 def __init__(self, registry: Registry, args: SimpleNamespace, writeable: bool):
223 if args.output is not None:
224 self.output = _OutputChainedCollectionInfo(registry, args.output)
225 else:
226 self.output = None
227 if args.output_run is not None:
228 if args.rebase and self.output and not args.output_run.startswith(self.output.name):
229 raise ValueError("Cannot rebase if output run does not start with output collection name.")
230 self.outputRun = _OutputRunCollectionInfo(registry, args.output_run)
231 elif self.output is not None:
232 if args.extend_run:
233 if not self.output.chain:
234 raise ValueError("Cannot use --extend-run option with non-existing or empty output chain")
235 runName = self.output.chain[0]
236 else:
237 runName = f"{self.output}/{Instrument.makeCollectionTimestamp()}"
238 self.outputRun = _OutputRunCollectionInfo(registry, runName)
239 elif not writeable:
240 # If we're not writing yet, ok to have no output run.
241 self.outputRun = None
242 else:
243 raise ValueError("Cannot write without at least one of (--output, --output-run).")
244 # Recursively flatten any input CHAINED collections. We do this up
245 # front so we can tell if the user passes the same inputs on subsequent
246 # calls, even though we also flatten when we define the output CHAINED
247 # collection.
248 self.inputs = tuple(registry.queryCollections(args.input, flattenChains=True)) if args.input else ()
250 # If things are inconsistent and user has asked for a rebase then
251 # construct the new output chain.
252 if args.rebase and self._checkOutputInputConsistency():
253 assert self.output is not None
254 newOutputChain = [item for item in self.output.chain if item.startswith(self.output.name)]
255 newOutputChain.extend([item for item in self.inputs if item not in newOutputChain])
256 newOutputChain.extend([item for item in self.output.chain if item not in newOutputChain])
257 self.output.chain = tuple(newOutputChain)
259 def check(self, args: SimpleNamespace) -> None:
260 """Check command-line options for consistency with each other and the
261 data repository.
263 Parameters
264 ----------
265 args : `types.SimpleNamespace`
266 Parsed command-line arguments. See class documentation for the
267 construction parameter of the same name.
268 """
269 assert not (args.extend_run and args.replace_run), "In mutually-exclusive group in ArgumentParser."
270 if consistencyError := self._checkOutputInputConsistency():
271 raise ValueError(consistencyError)
273 if args.extend_run:
274 if self.outputRun is None:
275 raise ValueError("Cannot --extend-run when no output collection is given.")
276 elif not self.outputRun.exists:
277 raise ValueError(
278 f"Cannot --extend-run; output collection '{self.outputRun.name}' does not exist."
279 )
280 if not args.extend_run and self.outputRun is not None and self.outputRun.exists:
281 raise ValueError(
282 f"Output run '{self.outputRun.name}' already exists, but --extend-run was not given."
283 )
284 if args.prune_replaced and not args.replace_run:
285 raise ValueError("--prune-replaced requires --replace-run.")
286 if args.replace_run and (self.output is None or not self.output.exists):
287 raise ValueError("--output must point to an existing CHAINED collection for --replace-run.")
289 def _checkOutputInputConsistency(self) -> str | None:
290 if self.inputs and self.output is not None and self.output.exists:
291 # Passing the same inputs that were used to initialize the output
292 # collection is allowed; this means the inputs must appear as a
293 # contiguous subsequence of outputs (normally they're also at the
294 # end, but --rebase will in general put them in the middle).
295 for n in reversed(range(1 + len(self.output.chain) - len(self.inputs))):
296 if self.inputs == self.output.chain[n : n + len(self.inputs)]:
297 return None
298 return (
299 f"Output CHAINED collection {self.output.name!r} exists and does not include the "
300 f"same sequence of (flattened) input collections {self.inputs} as a contiguous "
301 "subsequence. "
302 "Use --rebase to ignore this problem and reset the output collection, but note that "
303 "this may obfuscate what inputs were actually used to produce these outputs."
304 )
305 return None
307 @classmethod
308 def _makeReadParts(cls, args: SimpleNamespace) -> tuple[Butler, Sequence[str], _ButlerFactory]:
309 """Parse arguments to support implementations of `makeReadButler` and
310 `makeButlerAndCollections`.
312 Parameters
313 ----------
314 args : `types.SimpleNamespace`
315 Parsed command-line arguments. See class documentation for the
316 construction parameter of the same name.
318 Returns
319 -------
320 butler : `lsst.daf.butler.Butler`
321 A read-only butler constructed from the repo at
322 ``args.butler_config``, but with no default collections.
323 inputs : `~collections.abc.Sequence` [ `str` ]
324 A collection search path constructed according to ``args``.
325 self : `_ButlerFactory`
326 A new `_ButlerFactory` instance representing the processed version
327 of ``args``.
328 """
329 butler = Butler.from_config(args.butler_config, writeable=False)
330 self = cls(butler.registry, args, writeable=False)
331 self.check(args)
332 if self.output and self.output.exists:
333 if args.replace_run:
334 replaced = self.output.chain[0]
335 inputs = list(self.output.chain[1:])
336 _LOG.debug(
337 "Simulating collection search in '%s' after removing '%s'.", self.output.name, replaced
338 )
339 else:
340 inputs = [self.output.name]
341 else:
342 inputs = list(self.inputs)
343 if args.extend_run:
344 assert self.outputRun is not None, "Output collection has to be specified."
345 inputs.insert(0, self.outputRun.name)
346 collSearch = CollectionWildcard.from_expression(inputs).require_ordered()
347 return butler, collSearch, self
349 @classmethod
350 def makeReadButler(cls, args: SimpleNamespace) -> Butler:
351 """Construct a read-only butler according to the given command-line
352 arguments.
354 Parameters
355 ----------
356 args : `types.SimpleNamespace`
357 Parsed command-line arguments. See class documentation for the
358 construction parameter of the same name.
360 Returns
361 -------
362 butler : `lsst.daf.butler.Butler`
363 A read-only butler initialized with the collections specified by
364 ``args``.
365 """
366 cls.defineDatastoreCache() # Ensure that this butler can use a shared cache.
367 butler, inputs, _ = cls._makeReadParts(args)
368 _LOG.debug("Preparing butler to read from %s.", inputs)
369 return Butler.from_config(butler=butler, collections=inputs)
371 @classmethod
372 def makeButlerAndCollections(cls, args: SimpleNamespace) -> tuple[Butler, Sequence[str], str | None]:
373 """Return a read-only registry, a collection search path, and the name
374 of the run to be used for future writes.
376 Parameters
377 ----------
378 args : `types.SimpleNamespace`
379 Parsed command-line arguments. See class documentation for the
380 construction parameter of the same name.
382 Returns
383 -------
384 butler : `lsst.daf.butler.Butler`
385 A read-only butler that collections will be added to and/or queried
386 from.
387 inputs : `Sequence` [ `str` ]
388 Collections to search for datasets.
389 run : `str` or `None`
390 Name of the output `~lsst.daf.butler.CollectionType.RUN` collection
391 if it already exists, or `None` if it does not.
392 """
393 butler, inputs, self = cls._makeReadParts(args)
394 run: str | None = None
395 if args.extend_run:
396 assert self.outputRun is not None, "Output collection has to be specified."
397 if self.outputRun is not None:
398 run = self.outputRun.name
399 _LOG.debug("Preparing registry to read from %s and expect future writes to '%s'.", inputs, run)
400 return butler, inputs, run
402 @staticmethod
403 def defineDatastoreCache() -> None:
404 """Define where datastore cache directories should be found.
406 Notes
407 -----
408 All the jobs should share a datastore cache if applicable. This
409 method asks for a shared fallback cache to be defined and then
410 configures an exit handler to clean it up.
411 """
412 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset()
413 if defined:
414 atexit.register(shutil.rmtree, cache_dir, ignore_errors=True)
415 _LOG.debug("Defining shared datastore cache directory to %s", cache_dir)
417 @classmethod
418 def makeWriteButler(cls, args: SimpleNamespace, pipeline_graph: PipelineGraph | None = None) -> Butler:
419 """Return a read-write butler initialized to write to and read from
420 the collections specified by the given command-line arguments.
422 Parameters
423 ----------
424 args : `types.SimpleNamespace`
425 Parsed command-line arguments. See class documentation for the
426 construction parameter of the same name.
427 pipeline_graph : `lsst.pipe.base.PipelineGraph`, optional
428 Definitions for tasks in a pipeline. This argument is only needed
429 if ``args.replace_run`` is `True` and ``args.prune_replaced`` is
430 "unstore".
432 Returns
433 -------
434 butler : `lsst.daf.butler.Butler`
435 A read-write butler initialized according to the given arguments.
436 """
437 cls.defineDatastoreCache() # Ensure that this butler can use a shared cache.
438 butler = Butler.from_config(args.butler_config, writeable=True)
439 self = cls(butler.registry, args, writeable=True)
440 self.check(args)
441 assert self.outputRun is not None, "Output collection has to be specified." # for mypy
442 if self.output is not None:
443 chainDefinition = list(self.output.chain if self.output.exists else self.inputs)
444 if args.replace_run:
445 replaced = chainDefinition.pop(0)
446 if args.prune_replaced == "unstore":
447 # Remove datasets from datastore
448 with butler.transaction():
449 # we want to remove regular outputs from this pipeline,
450 # but keep initOutputs, configs, and versions.
451 if pipeline_graph is not None:
452 refs = [
453 ref
454 for ref in butler.registry.queryDatasets(..., collections=replaced)
455 if (
456 (producer := pipeline_graph.producer_of(ref.datasetType.name)) is not None
457 and producer.key.node_type is NodeType.TASK # i.e. not TASK_INIT
458 )
459 ]
460 butler.pruneDatasets(refs, unstore=True, disassociate=False)
461 elif args.prune_replaced == "purge":
462 # Erase entire collection and all datasets, need to remove
463 # collection from its chain collection first.
464 with butler.transaction():
465 butler.registry.setCollectionChain(self.output.name, chainDefinition, flatten=True)
466 butler.removeRuns([replaced], unstore=True)
467 elif args.prune_replaced is not None:
468 raise NotImplementedError(f"Unsupported --prune-replaced option '{args.prune_replaced}'.")
469 if not self.output.exists:
470 butler.registry.registerCollection(self.output.name, CollectionType.CHAINED)
471 if not args.extend_run:
472 butler.registry.registerCollection(self.outputRun.name, CollectionType.RUN)
473 chainDefinition.insert(0, self.outputRun.name)
474 butler.registry.setCollectionChain(self.output.name, chainDefinition, flatten=True)
475 _LOG.debug(
476 "Preparing butler to write to '%s' and read from '%s'=%s",
477 self.outputRun.name,
478 self.output.name,
479 chainDefinition,
480 )
481 butler.registry.defaults = RegistryDefaults(run=self.outputRun.name, collections=self.output.name)
482 else:
483 inputs = (self.outputRun.name,) + self.inputs
484 _LOG.debug("Preparing butler to write to '%s' and read from %s.", self.outputRun.name, inputs)
485 butler.registry.defaults = RegistryDefaults(run=self.outputRun.name, collections=inputs)
486 return butler
488 output: _OutputChainedCollectionInfo | None
489 """Information about the output chained collection, if there is or will be
490 one (`_OutputChainedCollectionInfo` or `None`).
491 """
493 outputRun: _OutputRunCollectionInfo | None
494 """Information about the output run collection, if there is or will be
495 one (`_OutputRunCollectionInfo` or `None`).
496 """
498 inputs: tuple[str, ...]
499 """Input collections provided directly by the user (`tuple` [ `str` ]).
500 """
503class _QBBFactory:
504 """Class which is a callable for making QBB instances."""
506 def __init__(
507 self, butler_config: Config, dimensions: DimensionUniverse, dataset_types: Mapping[str, DatasetType]
508 ):
509 self.butler_config = butler_config
510 self.dimensions = dimensions
511 self.dataset_types = dataset_types
513 def __call__(self, quantum: Quantum) -> LimitedButler:
514 """Return freshly initialized `~lsst.daf.butler.QuantumBackedButler`.
516 Factory method to create QuantumBackedButler instances.
517 """
518 return QuantumBackedButler.initialize(
519 config=self.butler_config,
520 quantum=quantum,
521 dimensions=self.dimensions,
522 dataset_types=self.dataset_types,
523 )
526# ------------------------
527# Exported definitions --
528# ------------------------
531class CmdLineFwk:
532 """PipelineTask framework which executes tasks from command line.
534 In addition to executing tasks this activator provides additional methods
535 for task management like dumping configuration or execution chain.
536 """
538 MP_TIMEOUT = 3600 * 24 * 30 # Default timeout (sec) for multiprocessing
540 def __init__(self) -> None:
541 pass
543 def makePipeline(self, args: SimpleNamespace) -> Pipeline:
544 """Build a pipeline from command line arguments.
546 Parameters
547 ----------
548 args : `types.SimpleNamespace`
549 Parsed command line.
551 Returns
552 -------
553 pipeline : `~lsst.pipe.base.Pipeline`
554 Newly-constructed pipeline.
555 """
556 if args.pipeline:
557 pipeline = Pipeline.from_uri(args.pipeline)
558 else:
559 pipeline = Pipeline("anonymous")
561 # loop over all pipeline actions and apply them in order
562 for action in args.pipeline_actions:
563 if action.action == "add_instrument":
564 pipeline.addInstrument(action.value)
566 elif action.action == "new_task":
567 pipeline.addTask(action.value, action.label)
569 elif action.action == "delete_task":
570 pipeline.removeTask(action.label)
572 elif action.action == "config":
573 # action value string is "field=value", split it at '='
574 field, _, value = action.value.partition("=")
575 pipeline.addConfigOverride(action.label, field, value)
577 elif action.action == "configfile":
578 pipeline.addConfigFile(action.label, action.value)
580 else:
581 raise ValueError(f"Unexpected pipeline action: {action.action}")
583 if args.save_pipeline:
584 pipeline.write_to_uri(args.save_pipeline)
586 if args.pipeline_dot:
587 pipeline2dot(pipeline, args.pipeline_dot)
589 return pipeline
591 def makeGraph(self, pipeline: Pipeline, args: SimpleNamespace) -> QuantumGraph | None:
592 """Build a graph from command line arguments.
594 Parameters
595 ----------
596 pipeline : `~lsst.pipe.base.Pipeline`
597 Pipeline, can be empty or ``None`` if graph is read from a file.
598 args : `types.SimpleNamespace`
599 Parsed command line.
601 Returns
602 -------
603 graph : `~lsst.pipe.base.QuantumGraph` or `None`
604 If resulting graph is empty then `None` is returned.
605 """
606 # make sure that --extend-run always enables --skip-existing
607 if args.extend_run:
608 args.skip_existing = True
610 butler, collections, run = _ButlerFactory.makeButlerAndCollections(args)
612 if args.skip_existing and run:
613 args.skip_existing_in += (run,)
615 if args.qgraph:
616 # click passes empty tuple as default value for qgraph_node_id
617 nodes = args.qgraph_node_id or None
618 qgraph = QuantumGraph.loadUri(args.qgraph, butler.dimensions, nodes=nodes, graphID=args.qgraph_id)
620 # pipeline can not be provided in this case
621 if pipeline:
622 raise ValueError("Pipeline must not be given when quantum graph is read from file.")
623 if args.show_qgraph_header:
624 print(QuantumGraph.readHeader(args.qgraph))
625 else:
626 pipeline_graph = pipeline.to_graph()
627 if args.mock:
628 from lsst.pipe.base.tests.mocks import mock_pipeline_graph
630 pipeline_graph = mock_pipeline_graph(
631 pipeline_graph,
632 unmocked_dataset_types=args.unmocked_dataset_types,
633 force_failures=args.mock_failure,
634 )
635 # make execution plan (a.k.a. DAG) for pipeline
636 graph_builder = AllDimensionsQuantumGraphBuilder(
637 pipeline_graph,
638 butler,
639 where=args.data_query,
640 skip_existing_in=args.skip_existing_in if args.skip_existing_in is not None else (),
641 clobber=args.clobber_outputs,
642 dataset_query_constraint=args.dataset_query_constraint,
643 input_collections=collections,
644 output_run=run,
645 )
646 # accumulate metadata
647 metadata = {
648 "input": args.input,
649 "output": args.output,
650 "butler_argument": args.butler_config,
651 "output_run": run,
652 "extend_run": args.extend_run,
653 "skip_existing_in": args.skip_existing_in,
654 "skip_existing": args.skip_existing,
655 "data_query": args.data_query,
656 "user": getpass.getuser(),
657 "time": f"{datetime.datetime.now()}",
658 }
659 assert run is not None, "Butler output run collection must be defined"
660 qgraph = graph_builder.build(metadata, attach_datastore_records=args.qgraph_datastore_records)
661 if args.show_qgraph_header:
662 qgraph.buildAndPrintHeader()
664 if len(qgraph) == 0:
665 # Nothing to do.
666 return None
667 self._summarize_qgraph(qgraph)
669 if args.save_qgraph:
670 _LOG.verbose("Writing QuantumGraph to %r.", args.save_qgraph)
671 qgraph.saveUri(args.save_qgraph)
673 if args.save_single_quanta:
674 for quantumNode in qgraph:
675 sqgraph = qgraph.subset(quantumNode)
676 uri = args.save_single_quanta.format(quantumNode)
677 sqgraph.saveUri(uri)
679 if args.qgraph_dot:
680 _LOG.verbose("Writing quantum graph DOT visualization to %r.", args.qgraph_dot)
681 graph2dot(qgraph, args.qgraph_dot)
683 if args.execution_butler_location:
684 _LOG.verbose("Writing execution butler to %r.", args.execution_butler_location)
685 butler = Butler.from_config(args.butler_config)
686 assert isinstance(butler, DirectButler), "Execution butler needs DirectButler"
687 newArgs = copy.deepcopy(args)
689 def builderShim(butler: Butler) -> Butler:
690 assert isinstance(butler, DirectButler), "Execution butler needs DirectButler"
691 newArgs.butler_config = butler._config
692 # Calling makeWriteButler is done for the side effects of
693 # calling that method, maining parsing all the args into
694 # collection names, creating collections, etc.
695 newButler = _ButlerFactory.makeWriteButler(newArgs)
696 return newButler
698 # Include output collection in collections for input
699 # files if it exists in the repo.
700 all_inputs = args.input
701 if args.output is not None:
702 with contextlib.suppress(MissingCollectionError):
703 all_inputs += (next(iter(butler.registry.queryCollections(args.output))),)
705 _LOG.debug("Calling buildExecutionButler with collections=%s", all_inputs)
706 buildExecutionButler(
707 butler,
708 qgraph,
709 args.execution_butler_location,
710 run,
711 butlerModifier=builderShim,
712 collections=all_inputs,
713 clobber=args.clobber_execution_butler,
714 datastoreRoot=args.target_datastore_root,
715 transfer=args.transfer,
716 )
718 return qgraph
720 def _make_execution_resources(self, args: SimpleNamespace) -> ExecutionResources:
721 """Construct the execution resource class from arguments.
723 Parameters
724 ----------
725 args : `types.SimpleNamespace`
726 Parsed command line.
728 Returns
729 -------
730 resources : `~lsst.pipe.base.ExecutionResources`
731 The resources available to each quantum.
732 """
733 return ExecutionResources(
734 num_cores=args.cores_per_quantum, max_mem=args.memory_per_quantum, default_mem_units=u.MB
735 )
737 def runPipeline(
738 self,
739 graph: QuantumGraph,
740 taskFactory: TaskFactory,
741 args: SimpleNamespace,
742 butler: Butler | None = None,
743 ) -> None:
744 """Execute complete QuantumGraph.
746 Parameters
747 ----------
748 graph : `~lsst.pipe.base.QuantumGraph`
749 Execution graph.
750 taskFactory : `~lsst.pipe.base.TaskFactory`
751 Task factory.
752 args : `types.SimpleNamespace`
753 Parsed command line.
754 butler : `~lsst.daf.butler.Butler`, optional
755 Data Butler instance, if not defined then new instance is made
756 using command line options.
757 """
758 if not args.enable_implicit_threading:
759 disable_implicit_threading()
761 # Check that output run defined on command line is consistent with
762 # quantum graph.
763 if args.output_run and graph.metadata:
764 graph_output_run = graph.metadata.get("output_run", args.output_run)
765 if graph_output_run != args.output_run:
766 raise ValueError(
767 f"Output run defined on command line ({args.output_run}) has to be "
768 f"identical to graph metadata ({graph_output_run}). "
769 "To update graph metadata run `pipetask update-graph-run` command."
770 )
772 # Make sure that --extend-run always enables --skip-existing,
773 # clobbering should be disabled if --extend-run is not specified.
774 if args.extend_run:
775 args.skip_existing = True
776 else:
777 args.clobber_outputs = False
779 # Make butler instance. QuantumGraph should have an output run defined,
780 # but we ignore it here and let command line decide actual output run.
781 if butler is None:
782 butler = _ButlerFactory.makeWriteButler(args, graph.pipeline_graph)
784 if args.skip_existing:
785 args.skip_existing_in += (butler.run,)
787 # Enable lsstDebug debugging. Note that this is done once in the
788 # main process before PreExecInit and it is also repeated before
789 # running each task in SingleQuantumExecutor (which may not be
790 # needed if `multiprocessing` always uses fork start method).
791 if args.enableLsstDebug:
792 try:
793 _LOG.debug("Will try to import debug.py")
794 import debug # type: ignore # noqa:F401
795 except ImportError:
796 _LOG.warn("No 'debug' module found.")
798 # Save all InitOutputs, configs, etc.
799 preExecInit = PreExecInit(butler, taskFactory, extendRun=args.extend_run)
800 preExecInit.initialize(
801 graph,
802 saveInitOutputs=not args.skip_init_writes,
803 registerDatasetTypes=args.register_dataset_types,
804 saveVersions=not args.no_versions,
805 )
807 if not args.init_only:
808 graphFixup = self._importGraphFixup(args)
809 resources = self._make_execution_resources(args)
810 quantumExecutor = SingleQuantumExecutor(
811 butler,
812 taskFactory,
813 skipExistingIn=args.skip_existing_in,
814 clobberOutputs=args.clobber_outputs,
815 enableLsstDebug=args.enableLsstDebug,
816 exitOnKnownError=args.fail_fast,
817 resources=resources,
818 )
820 timeout = self.MP_TIMEOUT if args.timeout is None else args.timeout
821 executor = MPGraphExecutor(
822 numProc=args.processes,
823 timeout=timeout,
824 startMethod=args.start_method,
825 quantumExecutor=quantumExecutor,
826 failFast=args.fail_fast,
827 pdb=args.pdb,
828 executionGraphFixup=graphFixup,
829 )
830 # Have to reset connection pool to avoid sharing connections with
831 # forked processes.
832 butler.registry.resetConnectionPool()
833 try:
834 with lsst.utils.timer.profile(args.profile, _LOG):
835 executor.execute(graph)
836 finally:
837 if args.summary:
838 report = executor.getReport()
839 if report:
840 with open(args.summary, "w") as out:
841 # Do not save fields that are not set.
842 out.write(report.model_dump_json(exclude_none=True, indent=2))
844 def _generateTaskTable(self, qgraph: QuantumGraph) -> Table:
845 """Generate astropy table listing the number of quanta per task for a
846 given quantum graph.
848 Parameters
849 ----------
850 qgraph : `lsst.pipe.base.graph.graph.QuantumGraph`
851 A QuantumGraph object.
853 Returns
854 -------
855 qg_task_table : `astropy.table.table.Table`
856 An astropy table containing columns: Quanta and Tasks.
857 """
858 qg_quanta, qg_tasks = [], []
859 for task_def in qgraph.iterTaskGraph():
860 num_qnodes = qgraph.getNumberOfQuantaForTask(task_def)
861 qg_quanta.append(num_qnodes)
862 qg_tasks.append(task_def.label)
863 qg_task_table = Table(dict(Quanta=qg_quanta, Tasks=qg_tasks))
864 return qg_task_table
866 def _summarize_qgraph(self, qgraph: QuantumGraph) -> int:
867 """Report a summary of the quanta in the graph.
869 Parameters
870 ----------
871 qgraph : `lsst.pipe.base.QuantumGraph`
872 The graph to be summarized.
874 Returns
875 -------
876 n_quanta : `int`
877 The number of quanta in the graph.
878 """
879 n_quanta = len(qgraph)
880 if n_quanta == 0:
881 _LOG.info("QuantumGraph contains no quanta.")
882 else:
883 if _LOG.isEnabledFor(logging.INFO):
884 qg_task_table = self._generateTaskTable(qgraph)
885 qg_task_table_formatted = "\n".join(qg_task_table.pformat_all())
886 quanta_str = "quantum" if n_quanta == 1 else "quanta"
887 n_tasks = len(qgraph.taskGraph)
888 n_tasks_plural = "" if n_tasks == 1 else "s"
889 _LOG.info(
890 "QuantumGraph contains %d %s for %d task%s, graph ID: %r\n%s",
891 n_quanta,
892 quanta_str,
893 n_tasks,
894 n_tasks_plural,
895 qgraph.graphID,
896 qg_task_table_formatted,
897 )
898 return n_quanta
900 def _importGraphFixup(self, args: SimpleNamespace) -> ExecutionGraphFixup | None:
901 """Import/instantiate graph fixup object.
903 Parameters
904 ----------
905 args : `types.SimpleNamespace`
906 Parsed command line.
908 Returns
909 -------
910 fixup : `ExecutionGraphFixup` or `None`
912 Raises
913 ------
914 ValueError
915 Raised if import fails, method call raises exception, or returned
916 instance has unexpected type.
917 """
918 if args.graph_fixup:
919 try:
920 factory = doImportType(args.graph_fixup)
921 except Exception as exc:
922 raise ValueError("Failed to import graph fixup class/method") from exc
923 try:
924 fixup = factory()
925 except Exception as exc:
926 raise ValueError("Failed to make instance of graph fixup") from exc
927 if not isinstance(fixup, ExecutionGraphFixup):
928 raise ValueError("Graph fixup is not an instance of ExecutionGraphFixup class")
929 return fixup
930 return None
932 def preExecInitQBB(self, task_factory: TaskFactory, args: SimpleNamespace) -> None:
933 # Load quantum graph. We do not really need individual Quanta here,
934 # but we need datastore records for initInputs, and those are only
935 # available from Quanta, so load the whole thing.
936 qgraph = QuantumGraph.loadUri(args.qgraph, graphID=args.qgraph_id)
937 universe = qgraph.universe
939 # Collect all init input/output dataset IDs.
940 predicted_inputs: set[DatasetId] = set()
941 predicted_outputs: set[DatasetId] = set()
942 for taskDef in qgraph.iterTaskGraph():
943 if (refs := qgraph.initInputRefs(taskDef)) is not None:
944 predicted_inputs.update(ref.id for ref in refs)
945 if (refs := qgraph.initOutputRefs(taskDef)) is not None:
946 predicted_outputs.update(ref.id for ref in refs)
947 predicted_outputs.update(ref.id for ref in qgraph.globalInitOutputRefs())
948 # remove intermediates from inputs
949 predicted_inputs -= predicted_outputs
951 # Very inefficient way to extract datastore records from quantum graph,
952 # we have to scan all quanta and look at their datastore records.
953 datastore_records: dict[str, DatastoreRecordData] = {}
954 for quantum_node in qgraph:
955 for store_name, records in quantum_node.quantum.datastore_records.items():
956 subset = records.subset(predicted_inputs)
957 if subset is not None:
958 datastore_records.setdefault(store_name, DatastoreRecordData()).update(subset)
960 dataset_types = {dstype.name: dstype for dstype in qgraph.registryDatasetTypes()}
962 # Make butler from everything.
963 butler = QuantumBackedButler.from_predicted(
964 config=args.butler_config,
965 predicted_inputs=predicted_inputs,
966 predicted_outputs=predicted_outputs,
967 dimensions=universe,
968 datastore_records=datastore_records,
969 search_paths=args.config_search_path,
970 dataset_types=dataset_types,
971 )
973 # Save all InitOutputs, configs, etc.
974 preExecInit = PreExecInitLimited(butler, task_factory)
975 preExecInit.initialize(qgraph)
977 def runGraphQBB(self, task_factory: TaskFactory, args: SimpleNamespace) -> None:
978 if not args.enable_implicit_threading:
979 disable_implicit_threading()
981 # Load quantum graph.
982 nodes = args.qgraph_node_id or None
983 qgraph = QuantumGraph.loadUri(args.qgraph, nodes=nodes, graphID=args.qgraph_id)
985 if qgraph.metadata is None:
986 raise ValueError("QuantumGraph is missing metadata, cannot continue.")
988 self._summarize_qgraph(qgraph)
990 dataset_types = {dstype.name: dstype for dstype in qgraph.registryDatasetTypes()}
992 _butler_factory = _QBBFactory(
993 butler_config=args.butler_config,
994 dimensions=qgraph.universe,
995 dataset_types=dataset_types,
996 )
998 # make special quantum executor
999 resources = self._make_execution_resources(args)
1000 quantumExecutor = SingleQuantumExecutor(
1001 butler=None,
1002 taskFactory=task_factory,
1003 enableLsstDebug=args.enableLsstDebug,
1004 exitOnKnownError=args.fail_fast,
1005 limited_butler_factory=_butler_factory,
1006 resources=resources,
1007 clobberOutputs=True,
1008 skipExisting=True,
1009 )
1011 timeout = self.MP_TIMEOUT if args.timeout is None else args.timeout
1012 executor = MPGraphExecutor(
1013 numProc=args.processes,
1014 timeout=timeout,
1015 startMethod=args.start_method,
1016 quantumExecutor=quantumExecutor,
1017 failFast=args.fail_fast,
1018 pdb=args.pdb,
1019 )
1020 try:
1021 with lsst.utils.timer.profile(args.profile, _LOG):
1022 executor.execute(qgraph)
1023 finally:
1024 if args.summary:
1025 report = executor.getReport()
1026 if report:
1027 with open(args.summary, "w") as out:
1028 # Do not save fields that are not set.
1029 out.write(report.json(exclude_none=True, indent=2))