Coverage for python/lsst/analysis/tools/bin/verifyToSasquatch.py: 22%
73 statements
« prev ^ index » next coverage.py v7.5.0, created at 2024-04-26 04:07 -0700
« prev ^ index » next coverage.py v7.5.0, created at 2024-04-26 04:07 -0700
1# This file is part of analysis_tools.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22__all__ = [
23 "main",
24]
26import argparse
27import copy
28import datetime
29import logging
30from collections import defaultdict
31from collections.abc import Iterable, Mapping
33import lsst.verify
34from lsst.analysis.tools.interfaces import MetricMeasurementBundle
35from lsst.analysis.tools.interfaces.datastore import SasquatchDispatcher
36from lsst.daf.butler import Butler, DataCoordinate, DatasetRef
38logging.basicConfig()
39_LOG = logging.getLogger(__name__)
40_LOG.setLevel(logging.INFO)
43_BASE_URL = "https://usdf-rsp-dev.slac.stanford.edu/sasquatch-rest-proxy/"
46def makeParser():
47 parser = argparse.ArgumentParser(
48 description="""Upload Measurement datasets from a Butler repository to Sasquatch.
50 This script handles metric values persisted directly using
51 lsst.verify tooling. It is neither necessary nor useful for
52 MetricMeasurementBundles created using analysis_tools
53 tooling, and is provided solely for backwards compatibility
54 with the older system.
55 """,
56 add_help=True,
57 )
58 parser.add_argument("repo", help="The Butler repository from which to upload metric values.")
59 parser.add_argument(
60 "collections",
61 action="extend",
62 nargs="+",
63 help="The collection(s) in which to search for metric values. These can "
64 "be specified in any notation recognized by Middleware.",
65 )
66 parser.add_argument("--dataset", required=True, help="The dataset on which the metrics were measured.")
67 parser.add_argument(
68 "--test",
69 action="store_true",
70 help="Run this command while uploading to the lsst.debug test "
71 "namespace. Any --namespace argument is ignored.",
72 )
73 parser.add_argument(
74 "--where", default="", help="Butler query to select metric values for upload (default: all values)."
75 )
76 parser.add_argument(
77 "--date-created",
78 type=datetime.datetime.fromisoformat,
79 help="ISO8601 formatted datetime in UTC for the Measurement creation "
80 "date, e.g. 2021-06-30T22:28:25Z. If not provided, the run time or "
81 "current time is used.",
82 )
83 parser.add_argument(
84 "--extra",
85 action=_AppendDict,
86 help="Extra field (in the form key=value) to be added to any records "
87 "uploaded to Sasquatch. See SasquatchDispatcher.dispatch and "
88 ".dispatchRef for more details. The --extra argument can be passed "
89 "multiple times.",
90 )
92 api_group = parser.add_argument_group("Sasquatch API arguments")
93 api_group.add_argument(
94 "--namespace",
95 default="lsst.dm",
96 help="The Sasquatch namespace to which to upload the metric values (default: lsst.dm)",
97 )
98 api_group.add_argument(
99 "--url",
100 dest="base_url",
101 default=_BASE_URL,
102 help=f"Root URL of Sasquatch proxy server (default: {_BASE_URL}).",
103 )
104 api_group.add_argument("--token", default="na", help="Authentication token for the proxy server.")
106 return parser
109class _AppendDict(argparse.Action):
110 """An action analogous to the build-in 'append' that appends to a `dict`
111 instead of a `list`.
113 Inputs are assumed to be strings in the form "key=value"; any input that
114 does not contain exactly one "=" character is invalid. If the default value
115 is non-empty, the default key-value pairs may be overwritten by values from
116 the command line.
117 """
119 def __init__(
120 self,
121 option_strings,
122 dest,
123 nargs=None,
124 const=None,
125 default=None,
126 type=None,
127 choices=None,
128 required=False,
129 help=None,
130 metavar=None,
131 ):
132 if default is None:
133 default = {}
134 if not isinstance(default, Mapping):
135 argname = option_strings if option_strings else metavar if metavar else dest
136 raise TypeError(f"Default for {argname} must be a mapping or None, got {default!r}.")
137 super().__init__(option_strings, dest, nargs, const, default, type, choices, required, help, metavar)
139 def __call__(self, parser, namespace, values, option_string=None):
140 # argparse doesn't make defensive copies, so namespace.dest may be
141 # the same object as self.default. Do the copy ourselves and avoid
142 # modifying the object previously in namespace.dest.
143 mapping = copy.copy(getattr(namespace, self.dest))
145 # Sometimes values is a copy of default instead of an input???
146 if isinstance(values, Mapping):
147 mapping.update(values)
148 else:
149 # values may be either a string or list of strings, depending on
150 # nargs. Unsafe to test for Sequence, because a scalar string
151 # passes.
152 if not isinstance(values, list):
153 values = [values]
154 for value in values:
155 vars = value.split("=")
156 if len(vars) != 2:
157 raise ValueError(f"Argument {value!r} does not match format 'key=value'.")
158 mapping[vars[0]] = vars[1]
160 # Other half of the defensive copy.
161 setattr(namespace, self.dest, mapping)
164def _bundle_metrics(
165 butler: Butler, metricValues: Iterable[DatasetRef]
166) -> Mapping[tuple[str, str, DataCoordinate], MetricMeasurementBundle]:
167 """Organize free metric values into metric bundles while preserving as much
168 information as practical.
170 Parameters
171 ----------
172 butler : `lsst.daf.butler.Butler`
173 The Butler repository containing the metric values.
174 metricValues : `~collections.abc.Iterable` [`lsst.daf.butler.DatasetRef`]
175 The datasets to bundle. All references must point to ``MetricValue``
176 datasets.
178 Returns
179 -------
180 bundles : `~collections.abc.Mapping`
181 A collection of
182 `lsst.analysis.tools.interfaces.MetricMeasurementBundle`, one for each
183 combination of distinct metadata. The mapping key is a tuple of (run,
184 dataset type, data ID), and the value is the corresponding bundle.
185 To simplify the uploaded schemas, the bundle uses metrics' relative
186 (unqualified) names even if the input measurements were
187 fully-qualified.
188 """
189 bundles = defaultdict(MetricMeasurementBundle)
190 for ref in metricValues:
191 value = butler.get(ref)
192 # MeasurementMetricBundle doesn't validate input.
193 if not isinstance(value, lsst.verify.Measurement):
194 raise ValueError(f"{ref} is not a metric value.")
196 # HACK: in general, metric names are fully qualified, and this becomes
197 # the InfluxDB field name. lsst.verify-style metrics have unique names
198 # already, so remove the package qualification.
199 value = lsst.verify.Measurement(
200 value.metric_name.metric, value.quantity, value.blobs.values(), value.extras, value.notes
201 )
202 # These metrics weren't created by actions. Sasquatch requires that
203 # each actionId produce the same metrics on every run (see
204 # https://sasquatch.lsst.io/user-guide/avro.html), so choose something
205 # unique to the metric.
206 actionId = value.metric_name.metric
208 bundle = bundles[(ref.run, ref.datasetType.name, ref.dataId)]
209 bundle.setdefault(actionId, []).append(value)
210 return bundles
213def main():
214 args = makeParser().parse_args()
215 if args.test:
216 args.namespace = "lsst.debug"
218 butler = Butler(args.repo, collections=args.collections, writeable=False)
219 metricTypes = {t for t in butler.registry.queryDatasetTypes() if t.storageClass_name == "MetricValue"}
220 metricValues = butler.registry.queryDatasets(metricTypes, where=args.where, findFirst=True)
221 _LOG.info("Found %d metric values in %s.", metricValues.count(), args.collections)
223 bundles = _bundle_metrics(butler, metricValues)
224 dispatcher = SasquatchDispatcher(url=args.base_url, token=args.token, namespace=args.namespace)
225 _LOG.info("Uploading to %s @ %s...", args.namespace, args.base_url)
226 for (run, datasetType, dataId), bundle in bundles.items():
227 dispatcher.dispatch(
228 bundle,
229 run=run,
230 datasetType=datasetType,
231 timestamp=args.date_created,
232 datasetIdentifier=args.dataset,
233 identifierFields=dataId,
234 extraFields=args.extra,
235 )