Coverage for python/lsst/pipe/base/script/transfer_from_graph.py: 8%

48 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-06 02:51 -0700

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22__all__ = ["transfer_from_graph"] 

23 

24from lsst.daf.butler import Butler, CollectionType, DatasetRef, QuantumBackedButler, Registry 

25from lsst.daf.butler.registry import MissingCollectionError 

26from lsst.pipe.base import QuantumGraph 

27 

28 

29def transfer_from_graph( 

30 graph: str, 

31 dest: str, 

32 register_dataset_types: bool, 

33 transfer_dimensions: bool, 

34 update_output_chain: bool, 

35) -> int: 

36 """Transfer output datasets from quantum graph to dest. 

37 

38 Parameters 

39 ---------- 

40 graph : `str` 

41 URI string of the quantum graph. 

42 dest : `str` 

43 URI string of the destination Butler repo. 

44 register_dataset_types : `bool` 

45 Indicate whether missing dataset types should be registered. 

46 transfer_dimensions : `bool` 

47 Indicate whether dimensions should be transferred along with datasets. 

48 It can be more efficient to disable this if it is known that all 

49 dimensions exist. 

50 update_output_chain : `bool` 

51 If quantum graph metadata includes output run name and output 

52 collection which is a chain, update the chain definition to include run 

53 name as a the first collection in the chain. 

54 

55 Returns 

56 ------- 

57 count : `int` 

58 Actual count of transferred datasets. 

59 """ 

60 

61 # Read whole graph into memory 

62 qgraph = QuantumGraph.loadUri(graph) 

63 

64 # Collect output refs that could be created by this graph. 

65 original_output_refs: set[DatasetRef] = set(qgraph.globalInitOutputRefs()) 

66 for task_def in qgraph.iterTaskGraph(): 

67 if refs := qgraph.initOutputRefs(task_def): 

68 original_output_refs.update(refs) 

69 for qnode in qgraph: 

70 for refs in qnode.quantum.outputs.values(): 

71 original_output_refs.update(refs) 

72 

73 # Get data repository definitions from the QuantumGraph; these can have 

74 # different storage classes than those in the quanta. 

75 dataset_types = {dstype.name: dstype for dstype in qgraph.registryDatasetTypes()} 

76 

77 # Convert output_refs to the data repository storage classes, too. 

78 output_refs = set() 

79 for ref in original_output_refs: 

80 internal_dataset_type = dataset_types.get(ref.datasetType.name, ref.datasetType) 

81 if internal_dataset_type.storageClass_name != ref.datasetType.storageClass_name: 

82 output_refs.add(ref.overrideStorageClass(internal_dataset_type.storageClass_name)) 

83 else: 

84 output_refs.add(ref) 

85 

86 # Make QBB, its config is the same as output Butler. 

87 qbb = QuantumBackedButler.from_predicted( 

88 config=dest, 

89 predicted_inputs=[ref.id for ref in output_refs], 

90 predicted_outputs=[], 

91 dimensions=qgraph.universe, 

92 datastore_records={}, 

93 dataset_types=dataset_types, 

94 ) 

95 

96 dest_butler = Butler(dest, writeable=True) 

97 

98 transferred = dest_butler.transfer_from( 

99 qbb, 

100 output_refs, 

101 transfer="auto", 

102 register_dataset_types=register_dataset_types, 

103 transfer_dimensions=transfer_dimensions, 

104 ) 

105 count = len(transferred) 

106 

107 # If anything was transferred then update output chain definition if asked. 

108 if count > 0 and update_output_chain and (metadata := qgraph.metadata) is not None: 

109 # These are defined in CmdLineFwk. 

110 output_run = metadata.get("output_run") 

111 output = metadata.get("output") 

112 input = metadata.get("input") 

113 if output_run is not None and output is not None: 

114 _update_chain(dest_butler.registry, output, output_run, input) 

115 

116 return count 

117 

118 

119def _update_chain(registry: Registry, output_chain: str, output_run: str, inputs: list[str] | None) -> None: 

120 """Update chain definition if it exists to include run as the first item 

121 in a chain. If it does not exist then create it to include all inputs and 

122 output. 

123 """ 

124 try: 

125 # If output_chain is not a chain the exception will be raised. 

126 chain_definition = list(registry.getCollectionChain(output_chain)) 

127 except MissingCollectionError: 

128 # We have to create chained collection to include inputs and output run 

129 # (this reproduces logic in CmdLineFwk). 

130 registry.registerCollection(output_chain, type=CollectionType.CHAINED) 

131 chain_definition = list(registry.queryCollections(inputs, flattenChains=True)) if inputs else [] 

132 chain_definition = [output_run] + [run for run in chain_definition if run != output_run] 

133 registry.setCollectionChain(output_chain, chain_definition) 

134 else: 

135 # If run is in the chain but not the first item then remove it, will 

136 # re-insert at front below. 

137 try: 

138 index = chain_definition.index(output_run) 

139 if index == 0: 

140 # It is already at the top. 

141 return 

142 else: 

143 del chain_definition[index] 

144 except ValueError: 

145 pass 

146 

147 chain_definition.insert(0, output_run) 

148 registry.setCollectionChain(output_chain, chain_definition)