Coverage for python/lsst/pipe/base/graphBuilder.py: 50%

32 statements  

« prev     ^ index     » next       coverage.py v7.3.0, created at 2023-08-31 09:39 +0000

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Module defining GraphBuilder class and related methods. 

23""" 

24 

25from __future__ import annotations 

26 

27__all__ = ["GraphBuilder"] 

28 

29 

30from collections.abc import Iterable, Mapping 

31from typing import Any 

32 

33from lsst.daf.butler import Butler, DataCoordinate, Datastore, Registry 

34from lsst.daf.butler.registry.wildcards import CollectionWildcard 

35 

36from ._datasetQueryConstraints import DatasetQueryConstraintVariant 

37from .all_dimensions_quantum_graph_builder import AllDimensionsQuantumGraphBuilder 

38from .graph import QuantumGraph 

39from .pipeline import Pipeline, TaskDef 

40from .pipeline_graph import PipelineGraph 

41 

42# Re-exports for backwards-compatibility. 

43from .quantum_graph_builder import GraphBuilderError # noqa: F401 

44from .quantum_graph_builder import OutputExistsError # noqa: F401 

45from .quantum_graph_builder import PrerequisiteMissingError # noqa: F401 

46 

47 

48class GraphBuilder: 

49 """GraphBuilder class is responsible for building task execution graph from 

50 a Pipeline. 

51 

52 Parameters 

53 ---------- 

54 registry : `~lsst.daf.butler.Registry` 

55 Data butler instance. 

56 skipExistingIn 

57 Expressions representing the collections to search for existing 

58 output datasets that should be skipped. See 

59 :ref:`daf_butler_ordered_collection_searches`. 

60 clobberOutputs : `bool`, optional 

61 If `True` (default), allow quanta to created even if partial outputs 

62 exist; this requires the same behavior behavior to be enabled when 

63 executing. 

64 datastore : `~lsst.daf.butler.Datastore`, optional 

65 If not `None` then fill datastore records in each generated Quantum. 

66 """ 

67 

68 def __init__( 

69 self, 

70 registry: Registry, 

71 skipExistingIn: Any = None, 

72 clobberOutputs: bool = True, 

73 datastore: Datastore | None = None, 

74 ): 

75 self.registry = registry 

76 self.dimensions = registry.dimensions 

77 self.skipExistingIn = skipExistingIn 

78 self.clobberOutputs = clobberOutputs 

79 self.datastore = datastore 

80 

81 def makeGraph( 

82 self, 

83 pipeline: Pipeline | Iterable[TaskDef], 

84 collections: Any, 

85 run: str, 

86 userQuery: str | None, 

87 datasetQueryConstraint: DatasetQueryConstraintVariant = DatasetQueryConstraintVariant.ALL, 

88 metadata: Mapping[str, Any] | None = None, 

89 bind: Mapping[str, Any] | None = None, 

90 dataId: DataCoordinate | None = None, 

91 ) -> QuantumGraph: 

92 """Create execution graph for a pipeline. 

93 

94 Parameters 

95 ---------- 

96 pipeline : `Pipeline` or `~collections.abc.Iterable` [ `TaskDef` ] 

97 Pipeline definition, task names/classes and their configs. 

98 collections 

99 Expressions representing the collections to search for input 

100 datasets. See :ref:`daf_butler_ordered_collection_searches`. 

101 run : `str` 

102 Name of the `~lsst.daf.butler.CollectionType.RUN` collection for 

103 output datasets. Collection does not have to exist and it will be 

104 created when graph is executed. 

105 userQuery : `str` 

106 String which defines user-defined selection for registry, should be 

107 empty or `None` if there is no restrictions on data selection. 

108 datasetQueryConstraint : `DatasetQueryConstraintVariant`, optional 

109 The query constraint variant that should be used to constraint the 

110 query based on dataset existance, defaults to 

111 `DatasetQueryConstraintVariant.ALL`. 

112 metadata : Optional Mapping of `str` to primitives 

113 This is an optional parameter of extra data to carry with the 

114 graph. Entries in this mapping should be able to be serialized in 

115 JSON. 

116 bind : `~collections.abc.Mapping`, optional 

117 Mapping containing literal values that should be injected into the 

118 ``userQuery`` expression, keyed by the identifiers they replace. 

119 dataId : `lsst.daf.butler.DataCoordinate`, optional 

120 Data ID that should also be included in the query constraint. 

121 Ignored if ``pipeline`` is a `Pipeline` instance (which has its own 

122 data ID). 

123 

124 Returns 

125 ------- 

126 graph : `QuantumGraph` 

127 

128 Raises 

129 ------ 

130 UserExpressionError 

131 Raised when user expression cannot be parsed. 

132 OutputExistsError 

133 Raised when output datasets already exist. 

134 Exception 

135 Other exceptions types may be raised by underlying registry 

136 classes. 

137 """ 

138 if isinstance(pipeline, Pipeline): 

139 pipeline_graph = pipeline.to_graph() 

140 else: 

141 pipeline_graph = PipelineGraph(data_id=dataId) 

142 for task_def in pipeline: 

143 pipeline_graph.add_task( 

144 task_def.label, 

145 task_def.taskClass, 

146 config=task_def.config, 

147 connections=task_def.connections, 

148 ) 

149 # We assume `registry` is actually a RegistryShim that has a butler 

150 # inside it, since that's now the only kind of Registry code outside 

151 # Butler should be able to get, and we assert that the datastore came 

152 # from the same place. Soon this interface will be deprecated in favor 

153 # of QuantumGraphBuilder (which takes a Butler directly, as all new 

154 # code should) anyway. 

155 butler: Butler = self.registry._butler # type: ignore 

156 assert butler._datastore is self.datastore or self.datastore is None 

157 qgb = AllDimensionsQuantumGraphBuilder( 

158 pipeline_graph, 

159 butler, 

160 input_collections=CollectionWildcard.from_expression(collections).require_ordered(), 

161 output_run=run, 

162 skip_existing_in=self.skipExistingIn if self.skipExistingIn is not None else (), 

163 clobber=self.clobberOutputs, 

164 where=userQuery if userQuery is not None else "", 

165 dataset_query_constraint=datasetQueryConstraint, 

166 bind=bind, 

167 ) 

168 return qgb.build(metadata)