Coverage for python/lsst/ctrl/mpexec/cmdLineFwk.py: 15%
388 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-09 12:06 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-09 12:06 +0000
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28"""Module defining CmdLineFwk class and related methods.
29"""
31from __future__ import annotations
33__all__ = ["CmdLineFwk"]
35import atexit
36import contextlib
37import copy
38import datetime
39import getpass
40import logging
41import shutil
42from collections.abc import Iterable, Mapping, Sequence
43from types import SimpleNamespace
45import astropy.units as u
46from astropy.table import Table
47from lsst.daf.butler import (
48 Butler,
49 CollectionType,
50 Config,
51 DatasetId,
52 DatasetRef,
53 DatasetType,
54 DimensionUniverse,
55 LimitedButler,
56 Quantum,
57 QuantumBackedButler,
58 Registry,
59)
60from lsst.daf.butler.datastore.cache_manager import DatastoreCacheManager
61from lsst.daf.butler.datastore.record_data import DatastoreRecordData
62from lsst.daf.butler.direct_butler import DirectButler
63from lsst.daf.butler.registry import MissingCollectionError, RegistryDefaults
64from lsst.daf.butler.registry.wildcards import CollectionWildcard
65from lsst.pipe.base import (
66 ExecutionResources,
67 GraphBuilder,
68 Instrument,
69 Pipeline,
70 PipelineDatasetTypes,
71 QuantumGraph,
72 TaskDef,
73 TaskFactory,
74 buildExecutionButler,
75)
76from lsst.utils import doImportType
77from lsst.utils.threads import disable_implicit_threading
79from . import util
80from .dotTools import graph2dot, pipeline2dot
81from .executionGraphFixup import ExecutionGraphFixup
82from .mpGraphExecutor import MPGraphExecutor
83from .preExecInit import PreExecInit, PreExecInitLimited
84from .singleQuantumExecutor import SingleQuantumExecutor
86# ----------------------------------
87# Local non-exported definitions --
88# ----------------------------------
90_LOG = logging.getLogger(__name__)
93class _OutputChainedCollectionInfo:
94 """A helper class for handling command-line arguments related to an output
95 `~lsst.daf.butler.CollectionType.CHAINED` collection.
97 Parameters
98 ----------
99 registry : `lsst.daf.butler.Registry`
100 Butler registry that collections will be added to and/or queried from.
101 name : `str`
102 Name of the collection given on the command line.
103 """
105 def __init__(self, registry: Registry, name: str):
106 self.name = name
107 try:
108 self.chain = tuple(registry.getCollectionChain(name))
109 self.exists = True
110 except MissingCollectionError:
111 self.chain = ()
112 self.exists = False
114 def __str__(self) -> str:
115 return self.name
117 name: str
118 """Name of the collection provided on the command line (`str`).
119 """
121 exists: bool
122 """Whether this collection already exists in the registry (`bool`).
123 """
125 chain: tuple[str, ...]
126 """The definition of the collection, if it already exists (`tuple`[`str`]).
128 Empty if the collection does not already exist.
129 """
132class _OutputRunCollectionInfo:
133 """A helper class for handling command-line arguments related to an output
134 `~lsst.daf.butler.CollectionType.RUN` collection.
136 Parameters
137 ----------
138 registry : `lsst.daf.butler.Registry`
139 Butler registry that collections will be added to and/or queried from.
140 name : `str`
141 Name of the collection given on the command line.
142 """
144 def __init__(self, registry: Registry, name: str):
145 self.name = name
146 try:
147 actualType = registry.getCollectionType(name)
148 if actualType is not CollectionType.RUN:
149 raise TypeError(f"Collection '{name}' exists but has type {actualType.name}, not RUN.")
150 self.exists = True
151 except MissingCollectionError:
152 self.exists = False
154 name: str
155 """Name of the collection provided on the command line (`str`).
156 """
158 exists: bool
159 """Whether this collection already exists in the registry (`bool`).
160 """
163class _ButlerFactory:
164 """A helper class for processing command-line arguments related to input
165 and output collections.
167 Parameters
168 ----------
169 registry : `lsst.daf.butler.Registry`
170 Butler registry that collections will be added to and/or queried from.
172 args : `types.SimpleNamespace`
173 Parsed command-line arguments. The following attributes are used,
174 either at construction or in later methods.
176 ``output``
177 The name of a `~lsst.daf.butler.CollectionType.CHAINED`
178 input/output collection.
180 ``output_run``
181 The name of a `~lsst.daf.butler.CollectionType.RUN` input/output
182 collection.
184 ``extend_run``
185 A boolean indicating whether ``output_run`` should already exist
186 and be extended.
188 ``replace_run``
189 A boolean indicating that (if `True`) ``output_run`` should already
190 exist but will be removed from the output chained collection and
191 replaced with a new one.
193 ``prune_replaced``
194 A boolean indicating whether to prune the replaced run (requires
195 ``replace_run``).
197 ``rebase``
198 A boolean indicating whether to force the ``output`` collection
199 to be consistent with ``inputs`` and ``output`` run such that the
200 ``output`` collection has output run collections first (i.e. those
201 that start with the same prefix), then the new inputs, then any
202 original inputs not included in the new inputs.
204 ``inputs``
205 Input collections of any type; see
206 :ref:`daf_butler_ordered_collection_searches` for details.
208 ``butler_config``
209 Path to a data repository root or configuration file.
211 writeable : `bool`
212 If `True`, a `~lsst.daf.butler.Butler` is being initialized in a
213 context where actual writes should happens, and hence no output run
214 is necessary.
216 Raises
217 ------
218 ValueError
219 Raised if ``writeable is True`` but there are no output collections.
220 """
222 def __init__(self, registry: Registry, args: SimpleNamespace, writeable: bool):
223 if args.output is not None:
224 self.output = _OutputChainedCollectionInfo(registry, args.output)
225 else:
226 self.output = None
227 if args.output_run is not None:
228 if args.rebase and self.output and not args.output_run.startswith(self.output.name):
229 raise ValueError("Cannot rebase if output run does not start with output collection name.")
230 self.outputRun = _OutputRunCollectionInfo(registry, args.output_run)
231 elif self.output is not None:
232 if args.extend_run:
233 if not self.output.chain:
234 raise ValueError("Cannot use --extend-run option with non-existing or empty output chain")
235 runName = self.output.chain[0]
236 else:
237 runName = f"{self.output}/{Instrument.makeCollectionTimestamp()}"
238 self.outputRun = _OutputRunCollectionInfo(registry, runName)
239 elif not writeable:
240 # If we're not writing yet, ok to have no output run.
241 self.outputRun = None
242 else:
243 raise ValueError("Cannot write without at least one of (--output, --output-run).")
244 # Recursively flatten any input CHAINED collections. We do this up
245 # front so we can tell if the user passes the same inputs on subsequent
246 # calls, even though we also flatten when we define the output CHAINED
247 # collection.
248 self.inputs = tuple(registry.queryCollections(args.input, flattenChains=True)) if args.input else ()
250 # If things are inconsistent and user has asked for a rebase then
251 # construct the new output chain.
252 if args.rebase and self._checkOutputInputConsistency():
253 assert self.output is not None
254 newOutputChain = [item for item in self.output.chain if item.startswith(self.output.name)]
255 newOutputChain.extend([item for item in self.inputs if item not in newOutputChain])
256 newOutputChain.extend([item for item in self.output.chain if item not in newOutputChain])
257 self.output.chain = tuple(newOutputChain)
259 def check(self, args: SimpleNamespace) -> None:
260 """Check command-line options for consistency with each other and the
261 data repository.
263 Parameters
264 ----------
265 args : `types.SimpleNamespace`
266 Parsed command-line arguments. See class documentation for the
267 construction parameter of the same name.
268 """
269 assert not (args.extend_run and args.replace_run), "In mutually-exclusive group in ArgumentParser."
270 if consistencyError := self._checkOutputInputConsistency():
271 raise ValueError(consistencyError)
273 if args.extend_run:
274 if self.outputRun is None:
275 raise ValueError("Cannot --extend-run when no output collection is given.")
276 elif not self.outputRun.exists:
277 raise ValueError(
278 f"Cannot --extend-run; output collection '{self.outputRun.name}' does not exist."
279 )
280 if not args.extend_run and self.outputRun is not None and self.outputRun.exists:
281 raise ValueError(
282 f"Output run '{self.outputRun.name}' already exists, but --extend-run was not given."
283 )
284 if args.prune_replaced and not args.replace_run:
285 raise ValueError("--prune-replaced requires --replace-run.")
286 if args.replace_run and (self.output is None or not self.output.exists):
287 raise ValueError("--output must point to an existing CHAINED collection for --replace-run.")
289 def _checkOutputInputConsistency(self) -> str | None:
290 if self.inputs and self.output is not None and self.output.exists:
291 # Passing the same inputs that were used to initialize the output
292 # collection is allowed; this means the inputs must appear as a
293 # contiguous subsequence of outputs (normally they're also at the
294 # end, but --rebase will in general put them in the middle).
295 for n in reversed(range(1 + len(self.output.chain) - len(self.inputs))):
296 if self.inputs == self.output.chain[n : n + len(self.inputs)]:
297 return None
298 return (
299 f"Output CHAINED collection {self.output.name!r} exists and does not include the "
300 f"same sequence of (flattened) input collections {self.inputs} as a contiguous "
301 "subsequence. "
302 "Use --rebase to ignore this problem and reset the output collection, but note that "
303 "this may obfuscate what inputs were actually used to produce these outputs."
304 )
305 return None
307 @classmethod
308 def _makeReadParts(cls, args: SimpleNamespace) -> tuple[Butler, Sequence[str], _ButlerFactory]:
309 """Parse arguments to support implementations of `makeReadButler` and
310 `makeButlerAndCollections`.
312 Parameters
313 ----------
314 args : `types.SimpleNamespace`
315 Parsed command-line arguments. See class documentation for the
316 construction parameter of the same name.
318 Returns
319 -------
320 butler : `lsst.daf.butler.Butler`
321 A read-only butler constructed from the repo at
322 ``args.butler_config``, but with no default collections.
323 inputs : `~collections.abc.Sequence` [ `str` ]
324 A collection search path constructed according to ``args``.
325 self : `_ButlerFactory`
326 A new `_ButlerFactory` instance representing the processed version
327 of ``args``.
328 """
329 butler = Butler.from_config(args.butler_config, writeable=False)
330 self = cls(butler.registry, args, writeable=False)
331 self.check(args)
332 if self.output and self.output.exists:
333 if args.replace_run:
334 replaced = self.output.chain[0]
335 inputs = list(self.output.chain[1:])
336 _LOG.debug(
337 "Simulating collection search in '%s' after removing '%s'.", self.output.name, replaced
338 )
339 else:
340 inputs = [self.output.name]
341 else:
342 inputs = list(self.inputs)
343 if args.extend_run:
344 assert self.outputRun is not None, "Output collection has to be specified."
345 inputs.insert(0, self.outputRun.name)
346 collSearch = CollectionWildcard.from_expression(inputs).require_ordered()
347 return butler, collSearch, self
349 @classmethod
350 def makeReadButler(cls, args: SimpleNamespace) -> Butler:
351 """Construct a read-only butler according to the given command-line
352 arguments.
354 Parameters
355 ----------
356 args : `types.SimpleNamespace`
357 Parsed command-line arguments. See class documentation for the
358 construction parameter of the same name.
360 Returns
361 -------
362 butler : `lsst.daf.butler.Butler`
363 A read-only butler initialized with the collections specified by
364 ``args``.
365 """
366 cls.defineDatastoreCache() # Ensure that this butler can use a shared cache.
367 butler, inputs, _ = cls._makeReadParts(args)
368 _LOG.debug("Preparing butler to read from %s.", inputs)
369 return Butler.from_config(butler=butler, collections=inputs)
371 @classmethod
372 def makeButlerAndCollections(cls, args: SimpleNamespace) -> tuple[Butler, Sequence[str], str | None]:
373 """Return a read-only registry, a collection search path, and the name
374 of the run to be used for future writes.
376 Parameters
377 ----------
378 args : `types.SimpleNamespace`
379 Parsed command-line arguments. See class documentation for the
380 construction parameter of the same name.
382 Returns
383 -------
384 butler : `lsst.daf.butler.Butler`
385 A read-only butler that collections will be added to and/or queried
386 from.
387 inputs : `Sequence` [ `str` ]
388 Collections to search for datasets.
389 run : `str` or `None`
390 Name of the output `~lsst.daf.butler.CollectionType.RUN` collection
391 if it already exists, or `None` if it does not.
392 """
393 butler, inputs, self = cls._makeReadParts(args)
394 run: str | None = None
395 if args.extend_run:
396 assert self.outputRun is not None, "Output collection has to be specified."
397 if self.outputRun is not None:
398 run = self.outputRun.name
399 _LOG.debug("Preparing registry to read from %s and expect future writes to '%s'.", inputs, run)
400 return butler, inputs, run
402 @staticmethod
403 def defineDatastoreCache() -> None:
404 """Define where datastore cache directories should be found.
406 Notes
407 -----
408 All the jobs should share a datastore cache if applicable. This
409 method asks for a shared fallback cache to be defined and then
410 configures an exit handler to clean it up.
411 """
412 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset()
413 if defined:
414 atexit.register(shutil.rmtree, cache_dir, ignore_errors=True)
415 _LOG.debug("Defining shared datastore cache directory to %s", cache_dir)
417 @classmethod
418 def makeWriteButler(cls, args: SimpleNamespace, taskDefs: Iterable[TaskDef] | None = None) -> Butler:
419 """Return a read-write butler initialized to write to and read from
420 the collections specified by the given command-line arguments.
422 Parameters
423 ----------
424 args : `types.SimpleNamespace`
425 Parsed command-line arguments. See class documentation for the
426 construction parameter of the same name.
427 taskDefs : iterable of `TaskDef`, optional
428 Definitions for tasks in a pipeline. This argument is only needed
429 if ``args.replace_run`` is `True` and ``args.prune_replaced`` is
430 "unstore".
432 Returns
433 -------
434 butler : `lsst.daf.butler.Butler`
435 A read-write butler initialized according to the given arguments.
436 """
437 cls.defineDatastoreCache() # Ensure that this butler can use a shared cache.
438 butler = Butler.from_config(args.butler_config, writeable=True)
439 self = cls(butler.registry, args, writeable=True)
440 self.check(args)
441 assert self.outputRun is not None, "Output collection has to be specified." # for mypy
442 if self.output is not None:
443 chainDefinition = list(self.output.chain if self.output.exists else self.inputs)
444 if args.replace_run:
445 replaced = chainDefinition.pop(0)
446 if args.prune_replaced == "unstore":
447 # Remove datasets from datastore
448 with butler.transaction():
449 refs: Iterable[DatasetRef] = butler.registry.queryDatasets(..., collections=replaced)
450 # we want to remove regular outputs but keep
451 # initOutputs, configs, and versions.
452 if taskDefs is not None:
453 initDatasetNames = set(PipelineDatasetTypes.initOutputNames(taskDefs))
454 refs = [ref for ref in refs if ref.datasetType.name not in initDatasetNames]
455 butler.pruneDatasets(refs, unstore=True, disassociate=False)
456 elif args.prune_replaced == "purge":
457 # Erase entire collection and all datasets, need to remove
458 # collection from its chain collection first.
459 with butler.transaction():
460 butler.registry.setCollectionChain(self.output.name, chainDefinition, flatten=True)
461 butler.removeRuns([replaced], unstore=True)
462 elif args.prune_replaced is not None:
463 raise NotImplementedError(f"Unsupported --prune-replaced option '{args.prune_replaced}'.")
464 if not self.output.exists:
465 butler.registry.registerCollection(self.output.name, CollectionType.CHAINED)
466 if not args.extend_run:
467 butler.registry.registerCollection(self.outputRun.name, CollectionType.RUN)
468 chainDefinition.insert(0, self.outputRun.name)
469 butler.registry.setCollectionChain(self.output.name, chainDefinition, flatten=True)
470 _LOG.debug(
471 "Preparing butler to write to '%s' and read from '%s'=%s",
472 self.outputRun.name,
473 self.output.name,
474 chainDefinition,
475 )
476 butler.registry.defaults = RegistryDefaults(run=self.outputRun.name, collections=self.output.name)
477 else:
478 inputs = (self.outputRun.name,) + self.inputs
479 _LOG.debug("Preparing butler to write to '%s' and read from %s.", self.outputRun.name, inputs)
480 butler.registry.defaults = RegistryDefaults(run=self.outputRun.name, collections=inputs)
481 return butler
483 output: _OutputChainedCollectionInfo | None
484 """Information about the output chained collection, if there is or will be
485 one (`_OutputChainedCollectionInfo` or `None`).
486 """
488 outputRun: _OutputRunCollectionInfo | None
489 """Information about the output run collection, if there is or will be
490 one (`_OutputRunCollectionInfo` or `None`).
491 """
493 inputs: tuple[str, ...]
494 """Input collections provided directly by the user (`tuple` [ `str` ]).
495 """
498class _QBBFactory:
499 """Class which is a callable for making QBB instances."""
501 def __init__(
502 self, butler_config: Config, dimensions: DimensionUniverse, dataset_types: Mapping[str, DatasetType]
503 ):
504 self.butler_config = butler_config
505 self.dimensions = dimensions
506 self.dataset_types = dataset_types
508 def __call__(self, quantum: Quantum) -> LimitedButler:
509 """Return freshly initialized `~lsst.daf.butler.QuantumBackedButler`.
511 Factory method to create QuantumBackedButler instances.
512 """
513 return QuantumBackedButler.initialize(
514 config=self.butler_config,
515 quantum=quantum,
516 dimensions=self.dimensions,
517 dataset_types=self.dataset_types,
518 )
521# ------------------------
522# Exported definitions --
523# ------------------------
526class CmdLineFwk:
527 """PipelineTask framework which executes tasks from command line.
529 In addition to executing tasks this activator provides additional methods
530 for task management like dumping configuration or execution chain.
531 """
533 MP_TIMEOUT = 3600 * 24 * 30 # Default timeout (sec) for multiprocessing
535 def __init__(self) -> None:
536 pass
538 def makePipeline(self, args: SimpleNamespace) -> Pipeline:
539 """Build a pipeline from command line arguments.
541 Parameters
542 ----------
543 args : `types.SimpleNamespace`
544 Parsed command line
546 Returns
547 -------
548 pipeline : `~lsst.pipe.base.Pipeline`
549 """
550 if args.pipeline:
551 pipeline = Pipeline.from_uri(args.pipeline)
552 else:
553 pipeline = Pipeline("anonymous")
555 # loop over all pipeline actions and apply them in order
556 for action in args.pipeline_actions:
557 if action.action == "add_instrument":
558 pipeline.addInstrument(action.value)
560 elif action.action == "new_task":
561 pipeline.addTask(action.value, action.label)
563 elif action.action == "delete_task":
564 pipeline.removeTask(action.label)
566 elif action.action == "config":
567 # action value string is "field=value", split it at '='
568 field, _, value = action.value.partition("=")
569 pipeline.addConfigOverride(action.label, field, value)
571 elif action.action == "configfile":
572 pipeline.addConfigFile(action.label, action.value)
574 else:
575 raise ValueError(f"Unexpected pipeline action: {action.action}")
577 if args.save_pipeline:
578 pipeline.write_to_uri(args.save_pipeline)
580 if args.pipeline_dot:
581 pipeline2dot(pipeline, args.pipeline_dot)
583 return pipeline
585 def makeGraph(self, pipeline: Pipeline, args: SimpleNamespace) -> QuantumGraph | None:
586 """Build a graph from command line arguments.
588 Parameters
589 ----------
590 pipeline : `~lsst.pipe.base.Pipeline`
591 Pipeline, can be empty or ``None`` if graph is read from a file.
592 args : `types.SimpleNamespace`
593 Parsed command line
595 Returns
596 -------
597 graph : `~lsst.pipe.base.QuantumGraph` or `None`
598 If resulting graph is empty then `None` is returned.
599 """
600 # make sure that --extend-run always enables --skip-existing
601 if args.extend_run:
602 args.skip_existing = True
604 butler, collections, run = _ButlerFactory.makeButlerAndCollections(args)
606 if args.skip_existing and run:
607 args.skip_existing_in += (run,)
609 if args.qgraph:
610 # click passes empty tuple as default value for qgraph_node_id
611 nodes = args.qgraph_node_id or None
612 qgraph = QuantumGraph.loadUri(args.qgraph, butler.dimensions, nodes=nodes, graphID=args.qgraph_id)
614 # pipeline can not be provided in this case
615 if pipeline:
616 raise ValueError("Pipeline must not be given when quantum graph is read from file.")
617 if args.show_qgraph_header:
618 print(QuantumGraph.readHeader(args.qgraph))
619 else:
620 task_defs = list(pipeline.toExpandedPipeline())
621 if args.mock:
622 from lsst.pipe.base.tests.mocks import mock_task_defs
624 task_defs = mock_task_defs(
625 task_defs,
626 unmocked_dataset_types=args.unmocked_dataset_types,
627 force_failures=args.mock_failure,
628 )
629 # make execution plan (a.k.a. DAG) for pipeline
630 graphBuilder = GraphBuilder(
631 butler.registry,
632 skipExistingIn=args.skip_existing_in,
633 clobberOutputs=args.clobber_outputs,
634 datastore=butler._datastore if args.qgraph_datastore_records else None,
635 )
636 # accumulate metadata
637 metadata = {
638 "input": args.input,
639 "output": args.output,
640 "butler_argument": args.butler_config,
641 "output_run": run,
642 "extend_run": args.extend_run,
643 "skip_existing_in": args.skip_existing_in,
644 "skip_existing": args.skip_existing,
645 "data_query": args.data_query,
646 "user": getpass.getuser(),
647 "time": f"{datetime.datetime.now()}",
648 }
649 assert run is not None, "Butler output run collection must be defined"
650 qgraph = graphBuilder.makeGraph(
651 task_defs,
652 collections,
653 run,
654 args.data_query,
655 metadata=metadata,
656 datasetQueryConstraint=args.dataset_query_constraint,
657 dataId=pipeline.get_data_id(butler.dimensions),
658 )
659 if args.show_qgraph_header:
660 qgraph.buildAndPrintHeader()
662 if len(qgraph) == 0:
663 # Nothing to do.
664 return None
665 self._summarize_qgraph(qgraph)
667 if args.save_qgraph:
668 qgraph.saveUri(args.save_qgraph)
670 if args.save_single_quanta:
671 for quantumNode in qgraph:
672 sqgraph = qgraph.subset(quantumNode)
673 uri = args.save_single_quanta.format(quantumNode)
674 sqgraph.saveUri(uri)
676 if args.qgraph_dot:
677 graph2dot(qgraph, args.qgraph_dot)
679 if args.execution_butler_location:
680 butler = Butler.from_config(args.butler_config)
681 assert isinstance(butler, DirectButler), "Execution butler needs DirectButler"
682 newArgs = copy.deepcopy(args)
684 def builderShim(butler: Butler) -> Butler:
685 assert isinstance(butler, DirectButler), "Execution butler needs DirectButler"
686 newArgs.butler_config = butler._config
687 # Calling makeWriteButler is done for the side effects of
688 # calling that method, maining parsing all the args into
689 # collection names, creating collections, etc.
690 newButler = _ButlerFactory.makeWriteButler(newArgs)
691 return newButler
693 # Include output collection in collections for input
694 # files if it exists in the repo.
695 all_inputs = args.input
696 if args.output is not None:
697 with contextlib.suppress(MissingCollectionError):
698 all_inputs += (next(iter(butler.registry.queryCollections(args.output))),)
700 _LOG.debug("Calling buildExecutionButler with collections=%s", all_inputs)
701 buildExecutionButler(
702 butler,
703 qgraph,
704 args.execution_butler_location,
705 run,
706 butlerModifier=builderShim,
707 collections=all_inputs,
708 clobber=args.clobber_execution_butler,
709 datastoreRoot=args.target_datastore_root,
710 transfer=args.transfer,
711 )
713 return qgraph
715 def _make_execution_resources(self, args: SimpleNamespace) -> ExecutionResources:
716 """Construct the execution resource class from arguments.
718 Parameters
719 ----------
720 args : `types.SimpleNamespace`
721 Parsed command line.
723 Returns
724 -------
725 resources : `~lsst.pipe.base.ExecutionResources`
726 The resources available to each quantum.
727 """
728 return ExecutionResources(
729 num_cores=args.cores_per_quantum, max_mem=args.memory_per_quantum, default_mem_units=u.MB
730 )
732 def runPipeline(
733 self,
734 graph: QuantumGraph,
735 taskFactory: TaskFactory,
736 args: SimpleNamespace,
737 butler: Butler | None = None,
738 ) -> None:
739 """Execute complete QuantumGraph.
741 Parameters
742 ----------
743 graph : `~lsst.pipe.base.QuantumGraph`
744 Execution graph.
745 taskFactory : `~lsst.pipe.base.TaskFactory`
746 Task factory
747 args : `types.SimpleNamespace`
748 Parsed command line
749 butler : `~lsst.daf.butler.Butler`, optional
750 Data Butler instance, if not defined then new instance is made
751 using command line options.
752 """
753 # Check that output run defined on command line is consistent with
754 # quantum graph.
755 if args.output_run and graph.metadata:
756 graph_output_run = graph.metadata.get("output_run", args.output_run)
757 if graph_output_run != args.output_run:
758 raise ValueError(
759 f"Output run defined on command line ({args.output_run}) has to be "
760 f"identical to graph metadata ({graph_output_run}). "
761 "To update graph metadata run `pipetask update-graph-run` command."
762 )
764 # Make sure that --extend-run always enables --skip-existing,
765 # clobbering should be disabled if --extend-run is not specified.
766 if args.extend_run:
767 args.skip_existing = True
768 else:
769 args.clobber_outputs = False
771 if not args.enable_implicit_threading:
772 disable_implicit_threading()
774 # Make butler instance. QuantumGraph should have an output run defined,
775 # but we ignore it here and let command line decide actual output run.
776 if butler is None:
777 butler = _ButlerFactory.makeWriteButler(args, graph.iterTaskGraph())
779 if args.skip_existing:
780 args.skip_existing_in += (butler.run,)
782 # Enable lsstDebug debugging. Note that this is done once in the
783 # main process before PreExecInit and it is also repeated before
784 # running each task in SingleQuantumExecutor (which may not be
785 # needed if `multiprocessing` always uses fork start method).
786 if args.enableLsstDebug:
787 try:
788 _LOG.debug("Will try to import debug.py")
789 import debug # type: ignore # noqa:F401
790 except ImportError:
791 _LOG.warn("No 'debug' module found.")
793 # Save all InitOutputs, configs, etc.
794 preExecInit = PreExecInit(butler, taskFactory, extendRun=args.extend_run)
795 preExecInit.initialize(
796 graph,
797 saveInitOutputs=not args.skip_init_writes,
798 registerDatasetTypes=args.register_dataset_types,
799 saveVersions=not args.no_versions,
800 )
802 if not args.init_only:
803 graphFixup = self._importGraphFixup(args)
804 resources = self._make_execution_resources(args)
805 quantumExecutor = SingleQuantumExecutor(
806 butler,
807 taskFactory,
808 skipExistingIn=args.skip_existing_in,
809 clobberOutputs=args.clobber_outputs,
810 enableLsstDebug=args.enableLsstDebug,
811 exitOnKnownError=args.fail_fast,
812 resources=resources,
813 )
815 timeout = self.MP_TIMEOUT if args.timeout is None else args.timeout
816 executor = MPGraphExecutor(
817 numProc=args.processes,
818 timeout=timeout,
819 startMethod=args.start_method,
820 quantumExecutor=quantumExecutor,
821 failFast=args.fail_fast,
822 pdb=args.pdb,
823 executionGraphFixup=graphFixup,
824 )
825 # Have to reset connection pool to avoid sharing connections with
826 # forked processes.
827 butler.registry.resetConnectionPool()
828 try:
829 with util.profile(args.profile, _LOG):
830 executor.execute(graph)
831 finally:
832 if args.summary:
833 report = executor.getReport()
834 if report:
835 with open(args.summary, "w") as out:
836 # Do not save fields that are not set.
837 out.write(report.json(exclude_none=True, indent=2))
839 def _generateTaskTable(self, qgraph: QuantumGraph) -> Table:
840 """Generate astropy table listing the number of quanta per task for a
841 given quantum graph.
843 Parameters
844 ----------
845 qgraph : `lsst.pipe.base.graph.graph.QuantumGraph`
846 A QuantumGraph object.
848 Returns
849 -------
850 qg_task_table : `astropy.table.table.Table`
851 An astropy table containing columns: Quanta and Tasks.
852 """
853 qg_quanta, qg_tasks = [], []
854 for task_def in qgraph.iterTaskGraph():
855 num_qnodes = qgraph.getNumberOfQuantaForTask(task_def)
856 qg_quanta.append(num_qnodes)
857 qg_tasks.append(task_def.label)
858 qg_task_table = Table(dict(Quanta=qg_quanta, Tasks=qg_tasks))
859 return qg_task_table
861 def _summarize_qgraph(self, qgraph: QuantumGraph) -> int:
862 """Report a summary of the quanta in the graph.
864 Parameters
865 ----------
866 qgraph : `lsst.pipe.base.QuantumGraph`
867 The graph to be summarized.
869 Returns
870 -------
871 n_quanta : `int`
872 The number of quanta in the graph.
873 """
874 n_quanta = len(qgraph)
875 if n_quanta == 0:
876 _LOG.info("QuantumGraph contains no quanta.")
877 else:
878 if _LOG.isEnabledFor(logging.INFO):
879 qg_task_table = self._generateTaskTable(qgraph)
880 qg_task_table_formatted = "\n".join(qg_task_table.pformat_all())
881 quanta_str = "quantum" if n_quanta == 1 else "quanta"
882 n_tasks = len(qgraph.taskGraph)
883 n_tasks_plural = "" if n_tasks == 1 else "s"
884 _LOG.info(
885 "QuantumGraph contains %d %s for %d task%s, graph ID: %r\n%s",
886 n_quanta,
887 quanta_str,
888 n_tasks,
889 n_tasks_plural,
890 qgraph.graphID,
891 qg_task_table_formatted,
892 )
893 return n_quanta
895 def _importGraphFixup(self, args: SimpleNamespace) -> ExecutionGraphFixup | None:
896 """Import/instantiate graph fixup object.
898 Parameters
899 ----------
900 args : `types.SimpleNamespace`
901 Parsed command line.
903 Returns
904 -------
905 fixup : `ExecutionGraphFixup` or `None`
907 Raises
908 ------
909 ValueError
910 Raised if import fails, method call raises exception, or returned
911 instance has unexpected type.
912 """
913 if args.graph_fixup:
914 try:
915 factory = doImportType(args.graph_fixup)
916 except Exception as exc:
917 raise ValueError("Failed to import graph fixup class/method") from exc
918 try:
919 fixup = factory()
920 except Exception as exc:
921 raise ValueError("Failed to make instance of graph fixup") from exc
922 if not isinstance(fixup, ExecutionGraphFixup):
923 raise ValueError("Graph fixup is not an instance of ExecutionGraphFixup class")
924 return fixup
925 return None
927 def preExecInitQBB(self, task_factory: TaskFactory, args: SimpleNamespace) -> None:
928 # Load quantum graph. We do not really need individual Quanta here,
929 # but we need datastore records for initInputs, and those are only
930 # available from Quanta, so load the whole thing.
931 qgraph = QuantumGraph.loadUri(args.qgraph, graphID=args.qgraph_id)
932 universe = qgraph.universe
934 # Collect all init input/output dataset IDs.
935 predicted_inputs: set[DatasetId] = set()
936 predicted_outputs: set[DatasetId] = set()
937 for taskDef in qgraph.iterTaskGraph():
938 if (refs := qgraph.initInputRefs(taskDef)) is not None:
939 predicted_inputs.update(ref.id for ref in refs)
940 if (refs := qgraph.initOutputRefs(taskDef)) is not None:
941 predicted_outputs.update(ref.id for ref in refs)
942 predicted_outputs.update(ref.id for ref in qgraph.globalInitOutputRefs())
943 # remove intermediates from inputs
944 predicted_inputs -= predicted_outputs
946 # Very inefficient way to extract datastore records from quantum graph,
947 # we have to scan all quanta and look at their datastore records.
948 datastore_records: dict[str, DatastoreRecordData] = {}
949 for quantum_node in qgraph:
950 for store_name, records in quantum_node.quantum.datastore_records.items():
951 subset = records.subset(predicted_inputs)
952 if subset is not None:
953 datastore_records.setdefault(store_name, DatastoreRecordData()).update(subset)
955 dataset_types = {dstype.name: dstype for dstype in qgraph.registryDatasetTypes()}
957 # Make butler from everything.
958 butler = QuantumBackedButler.from_predicted(
959 config=args.butler_config,
960 predicted_inputs=predicted_inputs,
961 predicted_outputs=predicted_outputs,
962 dimensions=universe,
963 datastore_records=datastore_records,
964 search_paths=args.config_search_path,
965 dataset_types=dataset_types,
966 )
968 # Save all InitOutputs, configs, etc.
969 preExecInit = PreExecInitLimited(butler, task_factory)
970 preExecInit.initialize(qgraph)
972 def runGraphQBB(self, task_factory: TaskFactory, args: SimpleNamespace) -> None:
973 # Load quantum graph.
974 nodes = args.qgraph_node_id or None
975 qgraph = QuantumGraph.loadUri(args.qgraph, nodes=nodes, graphID=args.qgraph_id)
977 if qgraph.metadata is None:
978 raise ValueError("QuantumGraph is missing metadata, cannot continue.")
980 self._summarize_qgraph(qgraph)
982 dataset_types = {dstype.name: dstype for dstype in qgraph.registryDatasetTypes()}
984 _butler_factory = _QBBFactory(
985 butler_config=args.butler_config,
986 dimensions=qgraph.universe,
987 dataset_types=dataset_types,
988 )
990 # make special quantum executor
991 resources = self._make_execution_resources(args)
992 quantumExecutor = SingleQuantumExecutor(
993 butler=None,
994 taskFactory=task_factory,
995 enableLsstDebug=args.enableLsstDebug,
996 exitOnKnownError=args.fail_fast,
997 limited_butler_factory=_butler_factory,
998 resources=resources,
999 )
1001 timeout = self.MP_TIMEOUT if args.timeout is None else args.timeout
1002 executor = MPGraphExecutor(
1003 numProc=args.processes,
1004 timeout=timeout,
1005 startMethod=args.start_method,
1006 quantumExecutor=quantumExecutor,
1007 failFast=args.fail_fast,
1008 pdb=args.pdb,
1009 )
1010 try:
1011 with util.profile(args.profile, _LOG):
1012 executor.execute(qgraph)
1013 finally:
1014 if args.summary:
1015 report = executor.getReport()
1016 if report:
1017 with open(args.summary, "w") as out:
1018 # Do not save fields that are not set.
1019 out.write(report.json(exclude_none=True, indent=2))