Coverage for python/lsst/verify/bin/jobReporter.py: 14%

58 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-04-12 02:15 -0700

1import argparse 

2import json 

3 

4from lsst.verify import Job, MetricSet 

5from lsst.daf.butler import Butler, FileTemplate 

6 

7 

8__all__ = ["main", "JobReporter", "build_argparser"] 

9 

10 

11def build_argparser(): 

12 desc = 'Produce a Job object which can either be used ' \ 

13 'to build a local report or to ship to SQuaSH.' 

14 parser = argparse.ArgumentParser(description=desc) 

15 parser.add_argument( 

16 'repository', type=str, 

17 help='Path to a valid gen3 repository') 

18 parser.add_argument( 

19 'collection', type=str, 

20 help='Collection to search for metric measurement values') 

21 parser.add_argument( 

22 '--metrics_package', type=str, 

23 help='Metrics namespace to filter by. If omitted, all metrics ' 

24 'are processed.') 

25 parser.add_argument( 

26 '--spec', type=str, default="design", 

27 help='Spec level to apply: minimum, design, or stretch') 

28 parser.add_argument( 

29 '--dataset_name', type=str, required=True, 

30 help='Name of the dataset for which the report is being generated. ' 

31 'This is the desired ci_dataset tag in SQuaSH.') 

32 return parser 

33 

34 

35def main(repository, collection, metrics_package, spec, dataset_name): 

36 """Extract metric values from a Gen 3 repository and rewrite them to disk 

37 in Job format. 

38 

39 Parameters 

40 ---------- 

41 Parameters are the same as for the `JobReporter` class. 

42 """ 

43 jr = JobReporter(repository, 

44 collection, 

45 metrics_package, 

46 spec, 

47 dataset_name) 

48 jobs = jr.run() 

49 if len(jobs) == 0: 

50 raise RuntimeError('Job reporter returned no jobs.') 

51 for k, v in jobs.items(): 

52 filename = f"{metrics_package or 'all'}_{spec}_{k}.verify.json" 

53 with open(filename, 'w') as fh: 

54 json.dump(v.json, fh, indent=2, sort_keys=True) 

55 

56 

57def make_key(ref): 

58 names = sorted(ref.dataId.names) 

59 names.append('run') # "run" must be in the template 

60 key_tmpl = '_'.join(['{' + el + '}' for el in names]) 

61 file_tmpl = FileTemplate(key_tmpl) 

62 key = file_tmpl.format(ref) 

63 return key 

64 

65 

66class JobReporter: 

67 """A class for extracting metric values from a Gen 3 repository and 

68 repackaging them as Job objects. 

69 

70 Parameters 

71 ---------- 

72 repository : `str` 

73 Path to a Butler configuration YAML file or a directory containing one. 

74 collection : `str` 

75 Name of the collection to search for metric values. 

76 metrics_package : `str` or `None` 

77 If provided, the namespace by which to filter selected metrics. 

78 spec : `str` 

79 The level of specification to filter metrics by. 

80 dataset_name : `str` 

81 The name of the dataset to report to SQuaSH through the 

82 ``ci_dataset`` tag. 

83 """ 

84 

85 def __init__(self, 

86 repository, 

87 collection, 

88 metrics_package, 

89 spec, 

90 dataset_name): 

91 # Hard coding verify_metrics as the packager for now. 

92 # It would be easy to pass this in as an argument, if necessary. 

93 self.metrics = MetricSet.load_metrics_package( 

94 package_name_or_path='verify_metrics', 

95 subset=metrics_package) 

96 self.butler = Butler(repository) 

97 self.registry = self.butler.registry 

98 self.spec = spec 

99 self.collection = collection 

100 self.dataset_name = dataset_name 

101 

102 def run(self): 

103 """Collate job information. 

104 

105 Returns 

106 ------- 

107 jobs : `dict` [`str`, `lsst.verify.Job`] 

108 A mapping of `~lsst.verify.Job` objects, indexed by a string 

109 representation of their data ID. 

110 """ 

111 jobs = {} 

112 for metric in self.metrics: 

113 dataset = f'metricvalue_{metric.package}_{metric.metric}' 

114 datasetRefs = set(self.registry.queryDatasets( 

115 dataset, 

116 collections=self.collection, 

117 findFirst=True)) 

118 for ref in datasetRefs: 

119 # Ref is guaranteed to be valid. 

120 m = self.butler.get(ref) 

121 # make the name the same as what SQuaSH Expects 

122 m.metric_name = metric 

123 

124 # queryDatasets guarantees ref.dataId.hasFull() 

125 dataId = ref.dataId.full.byName() 

126 key = make_key(ref) 

127 

128 # For backward-compatibility with Gen 2 SQuaSH uploads 

129 pfilt = dataId.get('physical_filter') 

130 if not pfilt: 

131 # Grab the physical filter associated with the abstract 

132 # filter. In general there may be more than one. Take the 

133 # shortest assuming it is the most generic. 

134 pfilts = [el.name for el in 

135 self.registry.queryDimensionRecords( 

136 'physical_filter', 

137 dataId=ref.dataId)] 

138 pfilt = min(pfilts, key=len) 

139 

140 if key not in jobs.keys(): 

141 job_metadata = { 

142 'filter': pfilt, 

143 'butler_generation': 'Gen3', 

144 'ci_dataset': self.dataset_name, 

145 } 

146 job_metadata.update(dataId) 

147 # Get dataset_repo_url from repository somehow? 

148 jobs[key] = Job(meta=job_metadata, metrics=self.metrics) 

149 jobs[key].measurements.insert(m) 

150 return jobs