Coverage for python/lsst/pipe/base/script/transfer_from_graph.py: 8%

48 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-04 10:03 +0000

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28__all__ = ["transfer_from_graph"] 

29 

30from lsst.daf.butler import Butler, CollectionType, DatasetRef, QuantumBackedButler, Registry 

31from lsst.daf.butler.registry import MissingCollectionError 

32from lsst.pipe.base import QuantumGraph 

33 

34 

35def transfer_from_graph( 

36 graph: str, 

37 dest: str, 

38 register_dataset_types: bool, 

39 transfer_dimensions: bool, 

40 update_output_chain: bool, 

41 dry_run: bool, 

42) -> int: 

43 """Transfer output datasets from quantum graph to dest. 

44 

45 Parameters 

46 ---------- 

47 graph : `str` 

48 URI string of the quantum graph. 

49 dest : `str` 

50 URI string of the destination Butler repo. 

51 register_dataset_types : `bool` 

52 Indicate whether missing dataset types should be registered. 

53 transfer_dimensions : `bool` 

54 Indicate whether dimensions should be transferred along with datasets. 

55 It can be more efficient to disable this if it is known that all 

56 dimensions exist. 

57 update_output_chain : `bool` 

58 If quantum graph metadata includes output run name and output 

59 collection which is a chain, update the chain definition to include run 

60 name as a the first collection in the chain. 

61 dry_run : `bool` 

62 Run the transfer without updating the destination butler. 

63 

64 Returns 

65 ------- 

66 count : `int` 

67 Actual count of transferred datasets. 

68 """ 

69 # Read whole graph into memory 

70 qgraph = QuantumGraph.loadUri(graph) 

71 

72 # Collect output refs that could be created by this graph. 

73 original_output_refs: set[DatasetRef] = set(qgraph.globalInitOutputRefs()) 

74 for task_def in qgraph.iterTaskGraph(): 

75 if refs := qgraph.initOutputRefs(task_def): 

76 original_output_refs.update(refs) 

77 for qnode in qgraph: 

78 for otherRefs in qnode.quantum.outputs.values(): 

79 original_output_refs.update(otherRefs) 

80 

81 # Get data repository definitions from the QuantumGraph; these can have 

82 # different storage classes than those in the quanta. 

83 dataset_types = {dstype.name: dstype for dstype in qgraph.registryDatasetTypes()} 

84 

85 # Convert output_refs to the data repository storage classes, too. 

86 output_refs = set() 

87 for ref in original_output_refs: 

88 internal_dataset_type = dataset_types.get(ref.datasetType.name, ref.datasetType) 

89 if internal_dataset_type.storageClass_name != ref.datasetType.storageClass_name: 

90 output_refs.add(ref.overrideStorageClass(internal_dataset_type.storageClass_name)) 

91 else: 

92 output_refs.add(ref) 

93 

94 # Make QBB, its config is the same as output Butler. 

95 qbb = QuantumBackedButler.from_predicted( 

96 config=dest, 

97 predicted_inputs=[ref.id for ref in output_refs], 

98 predicted_outputs=[], 

99 dimensions=qgraph.universe, 

100 datastore_records={}, 

101 dataset_types=dataset_types, 

102 ) 

103 

104 dest_butler = Butler.from_config(dest, writeable=True) 

105 

106 transferred = dest_butler.transfer_from( 

107 qbb, 

108 output_refs, 

109 transfer="auto", 

110 register_dataset_types=register_dataset_types, 

111 transfer_dimensions=transfer_dimensions, 

112 dry_run=dry_run, 

113 ) 

114 count = len(transferred) 

115 

116 # If anything was transferred then update output chain definition if asked. 

117 if count > 0 and update_output_chain and (metadata := qgraph.metadata) is not None: 

118 # These are defined in CmdLineFwk. 

119 output_run = metadata.get("output_run") 

120 output = metadata.get("output") 

121 input = metadata.get("input") 

122 if output_run is not None and output is not None: 

123 _update_chain(dest_butler.registry, output, output_run, input) 

124 

125 return count 

126 

127 

128def _update_chain(registry: Registry, output_chain: str, output_run: str, inputs: list[str] | None) -> None: 

129 """Update chain definition if it exists to include run as the first item 

130 in a chain. If it does not exist then create it to include all inputs and 

131 output. 

132 """ 

133 try: 

134 # If output_chain is not a chain the exception will be raised. 

135 chain_definition = list(registry.getCollectionChain(output_chain)) 

136 except MissingCollectionError: 

137 # We have to create chained collection to include inputs and output run 

138 # (this reproduces logic in CmdLineFwk). 

139 registry.registerCollection(output_chain, type=CollectionType.CHAINED) 

140 chain_definition = list(registry.queryCollections(inputs, flattenChains=True)) if inputs else [] 

141 chain_definition = [output_run] + [run for run in chain_definition if run != output_run] 

142 registry.setCollectionChain(output_chain, chain_definition) 

143 else: 

144 # If run is in the chain but not the first item then remove it, will 

145 # re-insert at front below. 

146 try: 

147 index = chain_definition.index(output_run) 

148 if index == 0: 

149 # It is already at the top. 

150 return 

151 else: 

152 del chain_definition[index] 

153 except ValueError: 

154 pass 

155 

156 chain_definition.insert(0, output_run) 

157 registry.setCollectionChain(output_chain, chain_definition)