Coverage for python/lsst/verify/bin/jobReporter.py : 16%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1import argparse
2import json
3import time
5from lsst.verify import Job, MetricSet
6from lsst.daf.butler import Butler
9__all__ = ["main", "JobReporter", "build_argparser"]
12def build_argparser():
13 desc = 'Produce a Job object which can either be used ' \
14 'to build a local report or to ship to SQuaSH.'
15 parser = argparse.ArgumentParser(description=desc)
16 parser.add_argument(
17 'repository', type=str,
18 help='Path to a valid gen3 repository')
19 parser.add_argument(
20 'collection', type=str,
21 help='Collection to search for metric measurement values')
22 parser.add_argument(
23 '--metrics_package', type=str,
24 help='Metrics namespace to filter by. If omitted, all metrics '
25 'are processed.')
26 parser.add_argument(
27 '--spec', type=str, default="design",
28 help='Spec level to apply: minimum, design, or stretch')
29 parser.add_argument(
30 '--dataset_name', type=str, required=True,
31 help='Name of the dataset for which the report is being generated.'
32 'This is the desired ci_dataset tag in SQuaSH.')
33 return parser
36def main(repository, collection, metrics_package, spec, dataset_name):
37 """Extract metric values from a Gen 3 repository and rewrite them to disk
38 in Job format.
40 Parameters
41 ----------
42 Parameters are the same as for the `JobReporter` class.
43 """
44 jr = JobReporter(repository,
45 collection,
46 metrics_package,
47 spec,
48 dataset_name)
49 jobs = jr.run()
50 if len(jobs) == 0:
51 raise RuntimeError('Job reporter returned no jobs.')
52 for k, v in jobs.items():
53 filename = f"{metrics_package or 'all'}_{spec}_{k}_{time.time()}.json"
54 with open(filename, 'w') as fh:
55 json.dump(v.json, fh, indent=2, sort_keys=True)
58class JobReporter:
59 """A class for extracting metric values from a Gen 3 repository and
60 repackaging them as Job objects.
62 Parameters
63 ----------
64 repository : `str`
65 Path to a Butler configuration YAML file or a directory containing one.
66 collection : `str`
67 Name of the collection to search for metric values.
68 metrics_package : `str` or `None`
69 If provided, the namespace by which to filter selected metrics.
70 spec : `str`
71 The level of specification to filter metrics by.
72 dataset_name : `str`
73 The name of the dataset to report to SQuaSH through the
74 ``ci_dataset`` tag.
75 """
77 def __init__(self,
78 repository,
79 collection,
80 metrics_package,
81 spec,
82 dataset_name):
83 # Hard coding verify_metrics as the packager for now.
84 # It would be easy to pass this in as an argument, if necessary.
85 self.metrics = MetricSet.load_metrics_package(
86 package_name_or_path='verify_metrics',
87 subset=metrics_package)
88 self.butler = Butler(repository)
89 self.registry = self.butler.registry
90 self.spec = spec
91 self.collection = collection
92 self.dataset_name = dataset_name
94 def run(self):
95 """Collate job information.
97 Returns
98 -------
99 jobs : `dict` [`str`, `lsst.verify.Job`]
100 A mapping of `~lsst.verify.Job` objects, indexed by a string
101 representation of their data ID.
102 """
103 jobs = {}
104 for metric in self.metrics:
105 dataset = f'metricvalue_{metric.package}_{metric.metric}'
106 datasetRefs = list(self.registry.queryDatasets(dataset,
107 collections=self.collection))
108 for ref in datasetRefs:
109 m = self.butler.get(ref, collections=self.collection)
110 # make the name the same as what SQuaSH Expects
111 m.metric_name = metric
113 # queryDatasets guarantees ref.dataId.hasFull()
114 dataId = ref.dataId.full.byName()
115 # Sort values by key name
116 key = "_".join(str(id) for _, id in sorted(dataId.items()))
118 # For backward-compatibility with Gen 2 SQuaSH uploads
119 pfilt = dataId.get('physical_filter')
120 if not pfilt:
121 # Grab the physical filter associated with the abstract
122 # filter. In general there may be more than one. Take the
123 # shortest assuming it is the most generic.
124 pfilts = [el.name for el in
125 self.registry.queryDimensionRecords(
126 'physical_filter',
127 dataId=ref.dataId)]
128 pfilt = min(pfilts, key=len)
130 if key not in jobs.keys():
131 job_metadata = {
132 'filter': pfilt,
133 'butler_generation': 'Gen3',
134 'ci_dataset': self.dataset_name,
135 }
136 job_metadata.update(dataId)
137 # Get dataset_repo_url from repository somehow?
138 jobs[key] = Job(meta=job_metadata, metrics=self.metrics)
139 jobs[key].measurements.insert(m)
140 return jobs