Coverage for python/lsst/analysis/tools/bin/verifyToSasquatch.py: 22%

73 statements  

« prev     ^ index     » next       coverage.py v7.5.1, created at 2024-05-10 11:04 +0000

1# This file is part of analysis_tools. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22__all__ = [ 

23 "main", 

24] 

25 

26import argparse 

27import copy 

28import datetime 

29import logging 

30from collections import defaultdict 

31from collections.abc import Iterable, Mapping 

32 

33import lsst.verify 

34from lsst.analysis.tools.interfaces import MetricMeasurementBundle 

35from lsst.analysis.tools.interfaces.datastore import SasquatchDispatcher 

36from lsst.daf.butler import Butler, DataCoordinate, DatasetRef 

37 

38logging.basicConfig() 

39_LOG = logging.getLogger(__name__) 

40_LOG.setLevel(logging.INFO) 

41 

42 

43_BASE_URL = "https://usdf-rsp-dev.slac.stanford.edu/sasquatch-rest-proxy/" 

44 

45 

46def makeParser(): 

47 parser = argparse.ArgumentParser( 

48 description="""Upload Measurement datasets from a Butler repository to Sasquatch. 

49 

50 This script handles metric values persisted directly using 

51 lsst.verify tooling. It is neither necessary nor useful for 

52 MetricMeasurementBundles created using analysis_tools 

53 tooling, and is provided solely for backwards compatibility 

54 with the older system. 

55 """, 

56 add_help=True, 

57 ) 

58 parser.add_argument("repo", help="The Butler repository from which to upload metric values.") 

59 parser.add_argument( 

60 "collections", 

61 action="extend", 

62 nargs="+", 

63 help="The collection(s) in which to search for metric values. These can " 

64 "be specified in any notation recognized by Middleware.", 

65 ) 

66 parser.add_argument("--dataset", required=True, help="The dataset on which the metrics were measured.") 

67 parser.add_argument( 

68 "--test", 

69 action="store_true", 

70 help="Run this command while uploading to the lsst.debug test " 

71 "namespace. Any --namespace argument is ignored.", 

72 ) 

73 parser.add_argument( 

74 "--where", default="", help="Butler query to select metric values for upload (default: all values)." 

75 ) 

76 parser.add_argument( 

77 "--date-created", 

78 type=datetime.datetime.fromisoformat, 

79 help="ISO8601 formatted datetime in UTC for the Measurement creation " 

80 "date, e.g. 2021-06-30T22:28:25Z. If not provided, the run time or " 

81 "current time is used.", 

82 ) 

83 parser.add_argument( 

84 "--extra", 

85 action=_AppendDict, 

86 help="Extra field (in the form key=value) to be added to any records " 

87 "uploaded to Sasquatch. See SasquatchDispatcher.dispatch and " 

88 ".dispatchRef for more details. The --extra argument can be passed " 

89 "multiple times.", 

90 ) 

91 

92 api_group = parser.add_argument_group("Sasquatch API arguments") 

93 api_group.add_argument( 

94 "--namespace", 

95 default="lsst.dm", 

96 help="The Sasquatch namespace to which to upload the metric values (default: lsst.dm)", 

97 ) 

98 api_group.add_argument( 

99 "--url", 

100 dest="base_url", 

101 default=_BASE_URL, 

102 help=f"Root URL of Sasquatch proxy server (default: {_BASE_URL}).", 

103 ) 

104 api_group.add_argument("--token", default="na", help="Authentication token for the proxy server.") 

105 

106 return parser 

107 

108 

109class _AppendDict(argparse.Action): 

110 """An action analogous to the build-in 'append' that appends to a `dict` 

111 instead of a `list`. 

112 

113 Inputs are assumed to be strings in the form "key=value"; any input that 

114 does not contain exactly one "=" character is invalid. If the default value 

115 is non-empty, the default key-value pairs may be overwritten by values from 

116 the command line. 

117 """ 

118 

119 def __init__( 

120 self, 

121 option_strings, 

122 dest, 

123 nargs=None, 

124 const=None, 

125 default=None, 

126 type=None, 

127 choices=None, 

128 required=False, 

129 help=None, 

130 metavar=None, 

131 ): 

132 if default is None: 

133 default = {} 

134 if not isinstance(default, Mapping): 

135 argname = option_strings if option_strings else metavar if metavar else dest 

136 raise TypeError(f"Default for {argname} must be a mapping or None, got {default!r}.") 

137 super().__init__(option_strings, dest, nargs, const, default, type, choices, required, help, metavar) 

138 

139 def __call__(self, parser, namespace, values, option_string=None): 

140 # argparse doesn't make defensive copies, so namespace.dest may be 

141 # the same object as self.default. Do the copy ourselves and avoid 

142 # modifying the object previously in namespace.dest. 

143 mapping = copy.copy(getattr(namespace, self.dest)) 

144 

145 # Sometimes values is a copy of default instead of an input??? 

146 if isinstance(values, Mapping): 

147 mapping.update(values) 

148 else: 

149 # values may be either a string or list of strings, depending on 

150 # nargs. Unsafe to test for Sequence, because a scalar string 

151 # passes. 

152 if not isinstance(values, list): 

153 values = [values] 

154 for value in values: 

155 vars = value.split("=") 

156 if len(vars) != 2: 

157 raise ValueError(f"Argument {value!r} does not match format 'key=value'.") 

158 mapping[vars[0]] = vars[1] 

159 

160 # Other half of the defensive copy. 

161 setattr(namespace, self.dest, mapping) 

162 

163 

164def _bundle_metrics( 

165 butler: Butler, metricValues: Iterable[DatasetRef] 

166) -> Mapping[tuple[str, str, DataCoordinate], MetricMeasurementBundle]: 

167 """Organize free metric values into metric bundles while preserving as much 

168 information as practical. 

169 

170 Parameters 

171 ---------- 

172 butler : `lsst.daf.butler.Butler` 

173 The Butler repository containing the metric values. 

174 metricValues : `~collections.abc.Iterable` [`lsst.daf.butler.DatasetRef`] 

175 The datasets to bundle. All references must point to ``MetricValue`` 

176 datasets. 

177 

178 Returns 

179 ------- 

180 bundles : `~collections.abc.Mapping` 

181 A collection of 

182 `lsst.analysis.tools.interfaces.MetricMeasurementBundle`, one for each 

183 combination of distinct metadata. The mapping key is a tuple of (run, 

184 dataset type, data ID), and the value is the corresponding bundle. 

185 To simplify the uploaded schemas, the bundle uses metrics' relative 

186 (unqualified) names even if the input measurements were 

187 fully-qualified. 

188 """ 

189 bundles = defaultdict(MetricMeasurementBundle) 

190 for ref in metricValues: 

191 value = butler.get(ref) 

192 # MeasurementMetricBundle doesn't validate input. 

193 if not isinstance(value, lsst.verify.Measurement): 

194 raise ValueError(f"{ref} is not a metric value.") 

195 

196 # HACK: in general, metric names are fully qualified, and this becomes 

197 # the InfluxDB field name. lsst.verify-style metrics have unique names 

198 # already, so remove the package qualification. 

199 value = lsst.verify.Measurement( 

200 value.metric_name.metric, value.quantity, value.blobs.values(), value.extras, value.notes 

201 ) 

202 # These metrics weren't created by actions. Sasquatch requires that 

203 # each actionId produce the same metrics on every run (see 

204 # https://sasquatch.lsst.io/user-guide/avro.html), so choose something 

205 # unique to the metric. 

206 actionId = value.metric_name.metric 

207 

208 bundle = bundles[(ref.run, ref.datasetType.name, ref.dataId)] 

209 bundle.setdefault(actionId, []).append(value) 

210 return bundles 

211 

212 

213def main(): 

214 args = makeParser().parse_args() 

215 if args.test: 

216 args.namespace = "lsst.debug" 

217 

218 butler = Butler(args.repo, collections=args.collections, writeable=False) 

219 metricTypes = {t for t in butler.registry.queryDatasetTypes() if t.storageClass_name == "MetricValue"} 

220 metricValues = butler.registry.queryDatasets(metricTypes, where=args.where, findFirst=True) 

221 _LOG.info("Found %d metric values in %s.", metricValues.count(), args.collections) 

222 

223 bundles = _bundle_metrics(butler, metricValues) 

224 dispatcher = SasquatchDispatcher(url=args.base_url, token=args.token, namespace=args.namespace) 

225 _LOG.info("Uploading to %s @ %s...", args.namespace, args.base_url) 

226 for (run, datasetType, dataId), bundle in bundles.items(): 

227 dispatcher.dispatch( 

228 bundle, 

229 run=run, 

230 datasetType=datasetType, 

231 timestamp=args.date_created, 

232 datasetIdentifier=args.dataset, 

233 identifierFields=dataId, 

234 extraFields=args.extra, 

235 )