Coverage for python/lsst/pipe/base/script/transfer_from_graph.py: 8%
48 statements
« prev ^ index » next coverage.py v7.5.0, created at 2024-04-26 02:49 -0700
« prev ^ index » next coverage.py v7.5.0, created at 2024-04-26 02:49 -0700
1# This file is part of pipe_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28__all__ = ["transfer_from_graph"]
30from lsst.daf.butler import Butler, CollectionType, DatasetRef, QuantumBackedButler, Registry
31from lsst.daf.butler.registry import MissingCollectionError
32from lsst.pipe.base import QuantumGraph
35def transfer_from_graph(
36 graph: str,
37 dest: str,
38 register_dataset_types: bool,
39 transfer_dimensions: bool,
40 update_output_chain: bool,
41 dry_run: bool,
42) -> int:
43 """Transfer output datasets from quantum graph to dest.
45 Parameters
46 ----------
47 graph : `str`
48 URI string of the quantum graph.
49 dest : `str`
50 URI string of the destination Butler repo.
51 register_dataset_types : `bool`
52 Indicate whether missing dataset types should be registered.
53 transfer_dimensions : `bool`
54 Indicate whether dimensions should be transferred along with datasets.
55 It can be more efficient to disable this if it is known that all
56 dimensions exist.
57 update_output_chain : `bool`
58 If quantum graph metadata includes output run name and output
59 collection which is a chain, update the chain definition to include run
60 name as a the first collection in the chain.
61 dry_run : `bool`
62 Run the transfer without updating the destination butler.
64 Returns
65 -------
66 count : `int`
67 Actual count of transferred datasets.
68 """
69 # Read whole graph into memory
70 qgraph = QuantumGraph.loadUri(graph)
72 # Collect output refs that could be created by this graph.
73 original_output_refs: set[DatasetRef] = set(qgraph.globalInitOutputRefs())
74 for task_def in qgraph.iterTaskGraph():
75 if refs := qgraph.initOutputRefs(task_def):
76 original_output_refs.update(refs)
77 for qnode in qgraph:
78 for otherRefs in qnode.quantum.outputs.values():
79 original_output_refs.update(otherRefs)
81 # Get data repository definitions from the QuantumGraph; these can have
82 # different storage classes than those in the quanta.
83 dataset_types = {dstype.name: dstype for dstype in qgraph.registryDatasetTypes()}
85 # Convert output_refs to the data repository storage classes, too.
86 output_refs = set()
87 for ref in original_output_refs:
88 internal_dataset_type = dataset_types.get(ref.datasetType.name, ref.datasetType)
89 if internal_dataset_type.storageClass_name != ref.datasetType.storageClass_name:
90 output_refs.add(ref.overrideStorageClass(internal_dataset_type.storageClass_name))
91 else:
92 output_refs.add(ref)
94 # Make QBB, its config is the same as output Butler.
95 qbb = QuantumBackedButler.from_predicted(
96 config=dest,
97 predicted_inputs=[ref.id for ref in output_refs],
98 predicted_outputs=[],
99 dimensions=qgraph.universe,
100 datastore_records={},
101 dataset_types=dataset_types,
102 )
104 dest_butler = Butler.from_config(dest, writeable=True)
106 transferred = dest_butler.transfer_from(
107 qbb,
108 output_refs,
109 transfer="auto",
110 register_dataset_types=register_dataset_types,
111 transfer_dimensions=transfer_dimensions,
112 dry_run=dry_run,
113 )
114 count = len(transferred)
116 # If anything was transferred then update output chain definition if asked.
117 if count > 0 and update_output_chain and (metadata := qgraph.metadata) is not None:
118 # These are defined in CmdLineFwk.
119 output_run = metadata.get("output_run")
120 output = metadata.get("output")
121 input = metadata.get("input")
122 if output_run is not None and output is not None:
123 _update_chain(dest_butler.registry, output, output_run, input)
125 return count
128def _update_chain(registry: Registry, output_chain: str, output_run: str, inputs: list[str] | None) -> None:
129 """Update chain definition if it exists to include run as the first item
130 in a chain. If it does not exist then create it to include all inputs and
131 output.
132 """
133 try:
134 # If output_chain is not a chain the exception will be raised.
135 chain_definition = list(registry.getCollectionChain(output_chain))
136 except MissingCollectionError:
137 # We have to create chained collection to include inputs and output run
138 # (this reproduces logic in CmdLineFwk).
139 registry.registerCollection(output_chain, type=CollectionType.CHAINED)
140 chain_definition = list(registry.queryCollections(inputs, flattenChains=True)) if inputs else []
141 chain_definition = [output_run] + [run for run in chain_definition if run != output_run]
142 registry.setCollectionChain(output_chain, chain_definition)
143 else:
144 # If run is in the chain but not the first item then remove it, will
145 # re-insert at front below.
146 try:
147 index = chain_definition.index(output_run)
148 if index == 0:
149 # It is already at the top.
150 return
151 else:
152 del chain_definition[index]
153 except ValueError:
154 pass
156 chain_definition.insert(0, output_run)
157 registry.setCollectionChain(output_chain, chain_definition)