Coverage for python/lsst/verify/bin/jobReporter.py: 14%
58 statements
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-11 03:24 -0700
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-11 03:24 -0700
1import argparse
2import json
4from lsst.verify import Job, MetricSet
5from lsst.daf.butler import Butler, FileTemplate
8__all__ = ["main", "JobReporter", "build_argparser"]
11def build_argparser():
12 desc = 'Produce a Job object which can either be used ' \
13 'to build a local report or to ship to SQuaSH.'
14 parser = argparse.ArgumentParser(description=desc)
15 parser.add_argument(
16 'repository', type=str,
17 help='Path to a valid gen3 repository')
18 parser.add_argument(
19 'collection', type=str,
20 help='Collection to search for metric measurement values')
21 parser.add_argument(
22 '--metrics_package', type=str,
23 help='Metrics namespace to filter by. If omitted, all metrics '
24 'are processed.')
25 parser.add_argument(
26 '--spec', type=str, default="design",
27 help='Spec level to apply: minimum, design, or stretch')
28 parser.add_argument(
29 '--dataset_name', type=str, required=True,
30 help='Name of the dataset for which the report is being generated. '
31 'This is the desired ci_dataset tag in SQuaSH.')
32 return parser
35def main(repository, collection, metrics_package, spec, dataset_name):
36 """Extract metric values from a Gen 3 repository and rewrite them to disk
37 in Job format.
39 Parameters
40 ----------
41 Parameters are the same as for the `JobReporter` class.
42 """
43 jr = JobReporter(repository,
44 collection,
45 metrics_package,
46 spec,
47 dataset_name)
48 jobs = jr.run()
49 if len(jobs) == 0:
50 raise RuntimeError('Job reporter returned no jobs.')
51 for k, v in jobs.items():
52 filename = f"{metrics_package or 'all'}_{spec}_{k}.verify.json"
53 with open(filename, 'w') as fh:
54 json.dump(v.json, fh, indent=2, sort_keys=True)
57def make_key(ref):
58 names = sorted(ref.dataId.names)
59 names.append('run') # "run" must be in the template
60 key_tmpl = '_'.join(['{' + el + '}' for el in names])
61 file_tmpl = FileTemplate(key_tmpl)
62 key = file_tmpl.format(ref)
63 return key
66class JobReporter:
67 """A class for extracting metric values from a Gen 3 repository and
68 repackaging them as Job objects.
70 Parameters
71 ----------
72 repository : `str`
73 Path to a Butler configuration YAML file or a directory containing one.
74 collection : `str`
75 Name of the collection to search for metric values.
76 metrics_package : `str` or `None`
77 If provided, the namespace by which to filter selected metrics.
78 spec : `str`
79 The level of specification to filter metrics by.
80 dataset_name : `str`
81 The name of the dataset to report to SQuaSH through the
82 ``ci_dataset`` tag.
83 """
85 def __init__(self,
86 repository,
87 collection,
88 metrics_package,
89 spec,
90 dataset_name):
91 # Hard coding verify_metrics as the packager for now.
92 # It would be easy to pass this in as an argument, if necessary.
93 self.metrics = MetricSet.load_metrics_package(
94 package_name_or_path='verify_metrics',
95 subset=metrics_package)
96 self.butler = Butler(repository)
97 self.registry = self.butler.registry
98 self.spec = spec
99 self.collection = collection
100 self.dataset_name = dataset_name
102 def run(self):
103 """Collate job information.
105 Returns
106 -------
107 jobs : `dict` [`str`, `lsst.verify.Job`]
108 A mapping of `~lsst.verify.Job` objects, indexed by a string
109 representation of their data ID.
110 """
111 jobs = {}
112 for metric in self.metrics:
113 dataset = f'metricvalue_{metric.package}_{metric.metric}'
114 datasetRefs = set(self.registry.queryDatasets(
115 dataset,
116 collections=self.collection,
117 findFirst=True))
118 for ref in datasetRefs:
119 # Ref is guaranteed to be valid.
120 m = self.butler.get(ref)
121 # make the name the same as what SQuaSH Expects
122 m.metric_name = metric
124 # queryDatasets guarantees ref.dataId.hasFull()
125 dataId = ref.dataId.full.byName()
126 key = make_key(ref)
128 # For backward-compatibility with Gen 2 SQuaSH uploads
129 pfilt = dataId.get('physical_filter')
130 if not pfilt:
131 # Grab the physical filter associated with the abstract
132 # filter. In general there may be more than one. Take the
133 # shortest assuming it is the most generic.
134 pfilts = [el.name for el in
135 self.registry.queryDimensionRecords(
136 'physical_filter',
137 dataId=ref.dataId)]
138 pfilt = min(pfilts, key=len)
140 if key not in jobs.keys():
141 job_metadata = {
142 'filter': pfilt,
143 'butler_generation': 'Gen3',
144 'ci_dataset': self.dataset_name,
145 }
146 job_metadata.update(dataId)
147 # Get dataset_repo_url from repository somehow?
148 jobs[key] = Job(meta=job_metadata, metrics=self.metrics)
149 jobs[key].measurements.insert(m)
150 return jobs