Coverage for python/lsst/verify/bin/jobReporter.py : 15%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1import argparse
2import json
4from lsst.verify import Job, MetricSet
5from lsst.daf.butler import Butler, FileTemplate
8__all__ = ["main", "JobReporter", "build_argparser"]
11def build_argparser():
12 desc = 'Produce a Job object which can either be used ' \
13 'to build a local report or to ship to SQuaSH.'
14 parser = argparse.ArgumentParser(description=desc)
15 parser.add_argument(
16 'repository', type=str,
17 help='Path to a valid gen3 repository')
18 parser.add_argument(
19 'collection', type=str,
20 help='Collection to search for metric measurement values')
21 parser.add_argument(
22 '--metrics_package', type=str,
23 help='Metrics namespace to filter by. If omitted, all metrics '
24 'are processed.')
25 parser.add_argument(
26 '--spec', type=str, default="design",
27 help='Spec level to apply: minimum, design, or stretch')
28 parser.add_argument(
29 '--dataset_name', type=str, required=True,
30 help='Name of the dataset for which the report is being generated.'
31 'This is the desired ci_dataset tag in SQuaSH.')
32 return parser
35def main(repository, collection, metrics_package, spec, dataset_name):
36 """Extract metric values from a Gen 3 repository and rewrite them to disk
37 in Job format.
39 Parameters
40 ----------
41 Parameters are the same as for the `JobReporter` class.
42 """
43 jr = JobReporter(repository,
44 collection,
45 metrics_package,
46 spec,
47 dataset_name)
48 jobs = jr.run()
49 if len(jobs) == 0:
50 raise RuntimeError('Job reporter returned no jobs.')
51 for k, v in jobs.items():
52 filename = f"{metrics_package or 'all'}_{spec}_{k}.verify.json"
53 with open(filename, 'w') as fh:
54 json.dump(v.json, fh, indent=2, sort_keys=True)
57def make_key(ref):
58 names = sorted(list(ref.dataId.names))
59 names.append('run') # "run" must be in the template
60 key_tmpl = '_'.join(['{' + el + '}' for el in names])
61 file_tmpl = FileTemplate(key_tmpl)
62 key = file_tmpl.format(ref)
63 return key
66class JobReporter:
67 """A class for extracting metric values from a Gen 3 repository and
68 repackaging them as Job objects.
70 Parameters
71 ----------
72 repository : `str`
73 Path to a Butler configuration YAML file or a directory containing one.
74 collection : `str`
75 Name of the collection to search for metric values.
76 metrics_package : `str` or `None`
77 If provided, the namespace by which to filter selected metrics.
78 spec : `str`
79 The level of specification to filter metrics by.
80 dataset_name : `str`
81 The name of the dataset to report to SQuaSH through the
82 ``ci_dataset`` tag.
83 """
85 def __init__(self,
86 repository,
87 collection,
88 metrics_package,
89 spec,
90 dataset_name):
91 # Hard coding verify_metrics as the packager for now.
92 # It would be easy to pass this in as an argument, if necessary.
93 self.metrics = MetricSet.load_metrics_package(
94 package_name_or_path='verify_metrics',
95 subset=metrics_package)
96 self.butler = Butler(repository)
97 self.registry = self.butler.registry
98 self.spec = spec
99 self.collection = collection
100 self.dataset_name = dataset_name
102 def run(self):
103 """Collate job information.
105 Returns
106 -------
107 jobs : `dict` [`str`, `lsst.verify.Job`]
108 A mapping of `~lsst.verify.Job` objects, indexed by a string
109 representation of their data ID.
110 """
111 jobs = {}
112 for metric in self.metrics:
113 dataset = f'metricvalue_{metric.package}_{metric.metric}'
114 datasetRefs = list(self.registry.queryDatasets(dataset,
115 collections=self.collection))
116 for ref in datasetRefs:
117 m = self.butler.get(ref, collections=self.collection)
118 # make the name the same as what SQuaSH Expects
119 m.metric_name = metric
121 # queryDatasets guarantees ref.dataId.hasFull()
122 dataId = ref.dataId.full.byName()
123 key = make_key(ref)
125 # For backward-compatibility with Gen 2 SQuaSH uploads
126 pfilt = dataId.get('physical_filter')
127 if not pfilt:
128 # Grab the physical filter associated with the abstract
129 # filter. In general there may be more than one. Take the
130 # shortest assuming it is the most generic.
131 pfilts = [el.name for el in
132 self.registry.queryDimensionRecords(
133 'physical_filter',
134 dataId=ref.dataId)]
135 pfilt = min(pfilts, key=len)
137 if key not in jobs.keys():
138 job_metadata = {
139 'filter': pfilt,
140 'butler_generation': 'Gen3',
141 'ci_dataset': self.dataset_name,
142 }
143 job_metadata.update(dataId)
144 # Get dataset_repo_url from repository somehow?
145 jobs[key] = Job(meta=job_metadata, metrics=self.metrics)
146 jobs[key].measurements.insert(m)
147 return jobs