Coverage for python/lsst/pipe/base/script/transfer_from_graph.py: 10%
41 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-04-12 09:12 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2023-04-12 09:12 +0000
1# This file is part of pipe_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22__all__ = ["transfer_from_graph"]
24from lsst.daf.butler import Butler, CollectionType, DatasetRef, QuantumBackedButler, Registry
25from lsst.daf.butler.registry import MissingCollectionError
26from lsst.pipe.base import QuantumGraph
29def transfer_from_graph(
30 graph: str,
31 dest: str,
32 register_dataset_types: bool,
33 transfer_dimensions: bool,
34 update_output_chain: bool,
35) -> int:
36 """Transfer output datasets from quantum graph to dest.
38 Parameters
39 ----------
40 graph : `str`
41 URI string of the quantum graph.
42 dest : `str`
43 URI string of the destination Butler repo.
44 register_dataset_types : `bool`
45 Indicate whether missing dataset types should be registered.
46 transfer_dimensions : `bool`
47 Indicate whether dimensions should be transferred along with datasets.
48 It can be more efficient to disable this if it is known that all
49 dimensions exist.
50 update_output_chain : `bool`
51 If quantum graph metadata includes output run name and output
52 collection which is a chain, update the chain definition to include run
53 name as a the first collection in the chain.
55 Returns
56 -------
57 count : `int`
58 Actual count of transferred datasets.
59 """
61 # Read whole graph into memory
62 qgraph = QuantumGraph.loadUri(graph)
64 # Collect output refs that could be created by this graph.
65 output_refs: set[DatasetRef] = set(qgraph.globalInitOutputRefs())
66 for task_def in qgraph.iterTaskGraph():
67 if refs := qgraph.initOutputRefs(task_def):
68 output_refs.update(refs)
69 for qnode in qgraph:
70 for refs in qnode.quantum.outputs.values():
71 output_refs.update(refs)
73 # Make QBB, its config is the same as output Butler.
74 qbb = QuantumBackedButler.from_predicted(
75 config=dest,
76 predicted_inputs=[ref.getCheckedId() for ref in output_refs],
77 predicted_outputs=[],
78 dimensions=qgraph.universe,
79 datastore_records={},
80 )
82 dest_butler = Butler(dest, writeable=True)
84 transferred = dest_butler.transfer_from(
85 qbb,
86 output_refs,
87 transfer="auto",
88 register_dataset_types=register_dataset_types,
89 transfer_dimensions=transfer_dimensions,
90 )
91 count = len(transferred)
93 # If anything was transferred then update output chain definition if asked.
94 if count > 0 and update_output_chain and (metadata := qgraph.metadata) is not None:
95 # These are defined in CmdLineFwk.
96 output_run = metadata.get("output_run")
97 output = metadata.get("output")
98 input = metadata.get("input")
99 if output_run is not None and output is not None:
100 _update_chain(dest_butler.registry, output, output_run, input)
102 return count
105def _update_chain(registry: Registry, output_chain: str, output_run: str, inputs: list[str] | None) -> None:
106 """Update chain definition if it exists to include run as the first item
107 in a chain. If it does not exist then create it to include all inputs and
108 output.
109 """
110 try:
111 # If output_chain is not a chain the exception will be raised.
112 chain_definition = list(registry.getCollectionChain(output_chain))
113 except MissingCollectionError:
114 # We have to create chained collection to include inputs and output run
115 # (this reproduces logic in CmdLineFwk).
116 registry.registerCollection(output_chain, type=CollectionType.CHAINED)
117 chain_definition = list(registry.queryCollections(inputs, flattenChains=True)) if inputs else []
118 chain_definition = [output_run] + [run for run in chain_definition if run != output_run]
119 registry.setCollectionChain(output_chain, chain_definition)
120 else:
121 # If run is in the chain but not the first item then remove it, will
122 # re-insert at front below.
123 try:
124 index = chain_definition.index(output_run)
125 if index == 0:
126 # It is already at the top.
127 return
128 else:
129 del chain_definition[index]
130 except ValueError:
131 pass
133 chain_definition.insert(0, output_run)
134 registry.setCollectionChain(output_chain, chain_definition)