Coverage for python/lsst/pipe/base/graphBuilder.py: 58%

37 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-20 02:43 -0700

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28"""Module defining GraphBuilder class and related methods. 

29""" 

30 

31from __future__ import annotations 

32 

33__all__ = ["GraphBuilder"] 

34 

35 

36import warnings 

37from collections.abc import Iterable, Mapping 

38from typing import Any 

39 

40from deprecated.sphinx import deprecated 

41from lsst.daf.butler import Butler, DataCoordinate, Datastore, Registry 

42from lsst.daf.butler.registry.wildcards import CollectionWildcard 

43from lsst.utils.introspection import find_outside_stacklevel 

44 

45from ._datasetQueryConstraints import DatasetQueryConstraintVariant 

46from .all_dimensions_quantum_graph_builder import AllDimensionsQuantumGraphBuilder 

47from .graph import QuantumGraph 

48from .pipeline import Pipeline, TaskDef 

49from .pipeline_graph import PipelineGraph 

50 

51# Re-exports for backwards-compatibility. 

52from .quantum_graph_builder import GraphBuilderError # noqa: F401 

53from .quantum_graph_builder import OutputExistsError # noqa: F401 

54from .quantum_graph_builder import PrerequisiteMissingError # noqa: F401 

55 

56# TODO: remove this module on DM-40443. 

57warnings.warn( 

58 "The graphBuilder module is deprecated in favor of quantum_graph_builder, and will be removed after v27.", 

59 category=FutureWarning, 

60 stacklevel=find_outside_stacklevel("lsst.pipe.base"), 

61) 

62 

63 

64@deprecated( 

65 "Deprecated in favor of QuantumGraphBuilder and will be removed after v27.", 

66 version="v27.0", 

67 category=FutureWarning, 

68) 

69class GraphBuilder: 

70 """GraphBuilder class is responsible for building task execution graph from 

71 a Pipeline. 

72 

73 Parameters 

74 ---------- 

75 registry : `~lsst.daf.butler.Registry` 

76 Data butler instance. 

77 skipExistingIn : `~typing.Any` 

78 Expressions representing the collections to search for existing 

79 output datasets that should be skipped. See 

80 :ref:`daf_butler_ordered_collection_searches`. 

81 clobberOutputs : `bool`, optional 

82 If `True` (default), allow quanta to created even if partial outputs 

83 exist; this requires the same behavior behavior to be enabled when 

84 executing. 

85 datastore : `~lsst.daf.butler.Datastore`, optional 

86 If not `None` then fill datastore records in each generated Quantum. 

87 """ 

88 

89 def __init__( 

90 self, 

91 registry: Registry, 

92 skipExistingIn: Any = None, 

93 clobberOutputs: bool = True, 

94 datastore: Datastore | None = None, 

95 ): 

96 self.registry = registry 

97 self.dimensions = registry.dimensions 

98 self.skipExistingIn = skipExistingIn 

99 self.clobberOutputs = clobberOutputs 

100 self.datastore = datastore 

101 

102 def makeGraph( 

103 self, 

104 pipeline: Pipeline | Iterable[TaskDef], 

105 collections: Any, 

106 run: str, 

107 userQuery: str | None, 

108 datasetQueryConstraint: DatasetQueryConstraintVariant = DatasetQueryConstraintVariant.ALL, 

109 metadata: Mapping[str, Any] | None = None, 

110 bind: Mapping[str, Any] | None = None, 

111 dataId: DataCoordinate | None = None, 

112 ) -> QuantumGraph: 

113 """Create execution graph for a pipeline. 

114 

115 Parameters 

116 ---------- 

117 pipeline : `Pipeline` or `~collections.abc.Iterable` [ `TaskDef` ] 

118 Pipeline definition, task names/classes and their configs. 

119 collections : `~typing.Any` 

120 Expressions representing the collections to search for input 

121 datasets. See :ref:`daf_butler_ordered_collection_searches`. 

122 run : `str` 

123 Name of the `~lsst.daf.butler.CollectionType.RUN` collection for 

124 output datasets. Collection does not have to exist and it will be 

125 created when graph is executed. 

126 userQuery : `str` 

127 String which defines user-defined selection for registry, should be 

128 empty or `None` if there is no restrictions on data selection. 

129 datasetQueryConstraint : `DatasetQueryConstraintVariant`, optional 

130 The query constraint variant that should be used to constraint the 

131 query based on dataset existance, defaults to 

132 `DatasetQueryConstraintVariant.ALL`. 

133 metadata : Optional Mapping of `str` to primitives 

134 This is an optional parameter of extra data to carry with the 

135 graph. Entries in this mapping should be able to be serialized in 

136 JSON. 

137 bind : `~collections.abc.Mapping`, optional 

138 Mapping containing literal values that should be injected into the 

139 ``userQuery`` expression, keyed by the identifiers they replace. 

140 dataId : `lsst.daf.butler.DataCoordinate`, optional 

141 Data ID that should also be included in the query constraint. 

142 Ignored if ``pipeline`` is a `Pipeline` instance (which has its own 

143 data ID). 

144 

145 Returns 

146 ------- 

147 graph : `QuantumGraph` 

148 The constructed graph. 

149 

150 Raises 

151 ------ 

152 UserExpressionError 

153 Raised when user expression cannot be parsed. 

154 OutputExistsError 

155 Raised when output datasets already exist. 

156 Exception 

157 Other exceptions types may be raised by underlying registry 

158 classes. 

159 """ 

160 if isinstance(pipeline, Pipeline): 

161 pipeline_graph = pipeline.to_graph() 

162 else: 

163 pipeline_graph = PipelineGraph(data_id=dataId) 

164 for task_def in pipeline: 

165 pipeline_graph.add_task( 

166 task_def.label, 

167 task_def.taskClass, 

168 config=task_def.config, 

169 connections=task_def.connections, 

170 ) 

171 # We assume `registry` is actually a RegistryShim that has a butler 

172 # inside it, since that's now the only kind of Registry code outside 

173 # Butler should be able to get, and we assert that the datastore came 

174 # from the same place. Soon this interface will be deprecated in favor 

175 # of QuantumGraphBuilder (which takes a Butler directly, as all new 

176 # code should) anyway. 

177 butler: Butler = self.registry._butler # type: ignore 

178 assert butler._datastore is self.datastore or self.datastore is None 

179 qgb = AllDimensionsQuantumGraphBuilder( 

180 pipeline_graph, 

181 butler, 

182 input_collections=CollectionWildcard.from_expression(collections).require_ordered(), 

183 output_run=run, 

184 skip_existing_in=self.skipExistingIn if self.skipExistingIn is not None else (), 

185 clobber=self.clobberOutputs, 

186 where=userQuery if userQuery is not None else "", 

187 dataset_query_constraint=datasetQueryConstraint, 

188 bind=bind, 

189 ) 

190 return qgb.build(metadata, attach_datastore_records=(self.datastore is not None))