Coverage for python/lsst/pipe/base/script/transfer_from_graph.py: 10%

41 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-04-13 10:09 +0000

1# This file is part of pipe_base. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22__all__ = ["transfer_from_graph"] 

23 

24from lsst.daf.butler import Butler, CollectionType, DatasetRef, QuantumBackedButler, Registry 

25from lsst.daf.butler.registry import MissingCollectionError 

26from lsst.pipe.base import QuantumGraph 

27 

28 

29def transfer_from_graph( 

30 graph: str, 

31 dest: str, 

32 register_dataset_types: bool, 

33 transfer_dimensions: bool, 

34 update_output_chain: bool, 

35) -> int: 

36 """Transfer output datasets from quantum graph to dest. 

37 

38 Parameters 

39 ---------- 

40 graph : `str` 

41 URI string of the quantum graph. 

42 dest : `str` 

43 URI string of the destination Butler repo. 

44 register_dataset_types : `bool` 

45 Indicate whether missing dataset types should be registered. 

46 transfer_dimensions : `bool` 

47 Indicate whether dimensions should be transferred along with datasets. 

48 It can be more efficient to disable this if it is known that all 

49 dimensions exist. 

50 update_output_chain : `bool` 

51 If quantum graph metadata includes output run name and output 

52 collection which is a chain, update the chain definition to include run 

53 name as a the first collection in the chain. 

54 

55 Returns 

56 ------- 

57 count : `int` 

58 Actual count of transferred datasets. 

59 """ 

60 

61 # Read whole graph into memory 

62 qgraph = QuantumGraph.loadUri(graph) 

63 

64 # Collect output refs that could be created by this graph. 

65 output_refs: set[DatasetRef] = set(qgraph.globalInitOutputRefs()) 

66 for task_def in qgraph.iterTaskGraph(): 

67 if refs := qgraph.initOutputRefs(task_def): 

68 output_refs.update(refs) 

69 for qnode in qgraph: 

70 for refs in qnode.quantum.outputs.values(): 

71 output_refs.update(refs) 

72 

73 # Make QBB, its config is the same as output Butler. 

74 qbb = QuantumBackedButler.from_predicted( 

75 config=dest, 

76 predicted_inputs=[ref.getCheckedId() for ref in output_refs], 

77 predicted_outputs=[], 

78 dimensions=qgraph.universe, 

79 datastore_records={}, 

80 ) 

81 

82 dest_butler = Butler(dest, writeable=True) 

83 

84 transferred = dest_butler.transfer_from( 

85 qbb, 

86 output_refs, 

87 transfer="auto", 

88 register_dataset_types=register_dataset_types, 

89 transfer_dimensions=transfer_dimensions, 

90 ) 

91 count = len(transferred) 

92 

93 # If anything was transferred then update output chain definition if asked. 

94 if count > 0 and update_output_chain and (metadata := qgraph.metadata) is not None: 

95 # These are defined in CmdLineFwk. 

96 output_run = metadata.get("output_run") 

97 output = metadata.get("output") 

98 input = metadata.get("input") 

99 if output_run is not None and output is not None: 

100 _update_chain(dest_butler.registry, output, output_run, input) 

101 

102 return count 

103 

104 

105def _update_chain(registry: Registry, output_chain: str, output_run: str, inputs: list[str] | None) -> None: 

106 """Update chain definition if it exists to include run as the first item 

107 in a chain. If it does not exist then create it to include all inputs and 

108 output. 

109 """ 

110 try: 

111 # If output_chain is not a chain the exception will be raised. 

112 chain_definition = list(registry.getCollectionChain(output_chain)) 

113 except MissingCollectionError: 

114 # We have to create chained collection to include inputs and output run 

115 # (this reproduces logic in CmdLineFwk). 

116 registry.registerCollection(output_chain, type=CollectionType.CHAINED) 

117 chain_definition = list(registry.queryCollections(inputs, flattenChains=True)) if inputs else [] 

118 chain_definition = [output_run] + [run for run in chain_definition if run != output_run] 

119 registry.setCollectionChain(output_chain, chain_definition) 

120 else: 

121 # If run is in the chain but not the first item then remove it, will 

122 # re-insert at front below. 

123 try: 

124 index = chain_definition.index(output_run) 

125 if index == 0: 

126 # It is already at the top. 

127 return 

128 else: 

129 del chain_definition[index] 

130 except ValueError: 

131 pass 

132 

133 chain_definition.insert(0, output_run) 

134 registry.setCollectionChain(output_chain, chain_definition)