Coverage for python/lsst/pipe/base/graphBuilder.py: 50%
32 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-04 10:03 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-04 10:03 +0000
1# This file is part of pipe_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28"""Module defining GraphBuilder class and related methods.
29"""
31from __future__ import annotations
33__all__ = ["GraphBuilder"]
36from collections.abc import Iterable, Mapping
37from typing import Any
39from lsst.daf.butler import Butler, DataCoordinate, Datastore, Registry
40from lsst.daf.butler.registry.wildcards import CollectionWildcard
42from ._datasetQueryConstraints import DatasetQueryConstraintVariant
43from .all_dimensions_quantum_graph_builder import AllDimensionsQuantumGraphBuilder
44from .graph import QuantumGraph
45from .pipeline import Pipeline, TaskDef
46from .pipeline_graph import PipelineGraph
48# Re-exports for backwards-compatibility.
49from .quantum_graph_builder import GraphBuilderError # noqa: F401
50from .quantum_graph_builder import OutputExistsError # noqa: F401
51from .quantum_graph_builder import PrerequisiteMissingError # noqa: F401
54class GraphBuilder:
55 """GraphBuilder class is responsible for building task execution graph from
56 a Pipeline.
58 Parameters
59 ----------
60 registry : `~lsst.daf.butler.Registry`
61 Data butler instance.
62 skipExistingIn : `~typing.Any`
63 Expressions representing the collections to search for existing
64 output datasets that should be skipped. See
65 :ref:`daf_butler_ordered_collection_searches`.
66 clobberOutputs : `bool`, optional
67 If `True` (default), allow quanta to created even if partial outputs
68 exist; this requires the same behavior behavior to be enabled when
69 executing.
70 datastore : `~lsst.daf.butler.Datastore`, optional
71 If not `None` then fill datastore records in each generated Quantum.
72 """
74 def __init__(
75 self,
76 registry: Registry,
77 skipExistingIn: Any = None,
78 clobberOutputs: bool = True,
79 datastore: Datastore | None = None,
80 ):
81 self.registry = registry
82 self.dimensions = registry.dimensions
83 self.skipExistingIn = skipExistingIn
84 self.clobberOutputs = clobberOutputs
85 self.datastore = datastore
87 def makeGraph(
88 self,
89 pipeline: Pipeline | Iterable[TaskDef],
90 collections: Any,
91 run: str,
92 userQuery: str | None,
93 datasetQueryConstraint: DatasetQueryConstraintVariant = DatasetQueryConstraintVariant.ALL,
94 metadata: Mapping[str, Any] | None = None,
95 bind: Mapping[str, Any] | None = None,
96 dataId: DataCoordinate | None = None,
97 ) -> QuantumGraph:
98 """Create execution graph for a pipeline.
100 Parameters
101 ----------
102 pipeline : `Pipeline` or `~collections.abc.Iterable` [ `TaskDef` ]
103 Pipeline definition, task names/classes and their configs.
104 collections : `~typing.Any`
105 Expressions representing the collections to search for input
106 datasets. See :ref:`daf_butler_ordered_collection_searches`.
107 run : `str`
108 Name of the `~lsst.daf.butler.CollectionType.RUN` collection for
109 output datasets. Collection does not have to exist and it will be
110 created when graph is executed.
111 userQuery : `str`
112 String which defines user-defined selection for registry, should be
113 empty or `None` if there is no restrictions on data selection.
114 datasetQueryConstraint : `DatasetQueryConstraintVariant`, optional
115 The query constraint variant that should be used to constraint the
116 query based on dataset existance, defaults to
117 `DatasetQueryConstraintVariant.ALL`.
118 metadata : Optional Mapping of `str` to primitives
119 This is an optional parameter of extra data to carry with the
120 graph. Entries in this mapping should be able to be serialized in
121 JSON.
122 bind : `~collections.abc.Mapping`, optional
123 Mapping containing literal values that should be injected into the
124 ``userQuery`` expression, keyed by the identifiers they replace.
125 dataId : `lsst.daf.butler.DataCoordinate`, optional
126 Data ID that should also be included in the query constraint.
127 Ignored if ``pipeline`` is a `Pipeline` instance (which has its own
128 data ID).
130 Returns
131 -------
132 graph : `QuantumGraph`
133 The constructed graph.
135 Raises
136 ------
137 UserExpressionError
138 Raised when user expression cannot be parsed.
139 OutputExistsError
140 Raised when output datasets already exist.
141 Exception
142 Other exceptions types may be raised by underlying registry
143 classes.
144 """
145 if isinstance(pipeline, Pipeline):
146 pipeline_graph = pipeline.to_graph()
147 else:
148 pipeline_graph = PipelineGraph(data_id=dataId)
149 for task_def in pipeline:
150 pipeline_graph.add_task(
151 task_def.label,
152 task_def.taskClass,
153 config=task_def.config,
154 connections=task_def.connections,
155 )
156 # We assume `registry` is actually a RegistryShim that has a butler
157 # inside it, since that's now the only kind of Registry code outside
158 # Butler should be able to get, and we assert that the datastore came
159 # from the same place. Soon this interface will be deprecated in favor
160 # of QuantumGraphBuilder (which takes a Butler directly, as all new
161 # code should) anyway.
162 butler: Butler = self.registry._butler # type: ignore
163 assert butler._datastore is self.datastore or self.datastore is None
164 qgb = AllDimensionsQuantumGraphBuilder(
165 pipeline_graph,
166 butler,
167 input_collections=CollectionWildcard.from_expression(collections).require_ordered(),
168 output_run=run,
169 skip_existing_in=self.skipExistingIn if self.skipExistingIn is not None else (),
170 clobber=self.clobberOutputs,
171 where=userQuery if userQuery is not None else "",
172 dataset_query_constraint=datasetQueryConstraint,
173 bind=bind,
174 )
175 return qgb.build(metadata)