Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1import argparse 

2import json 

3import time 

4 

5from lsst.verify import Job, MetricSet 

6from lsst.daf.butler import Butler 

7 

8 

9__all__ = ["main", "JobReporter", "build_argparser"] 

10 

11 

12def build_argparser(): 

13 desc = 'Produce a Job object which can either be used ' \ 

14 'to build a local report or to ship to SQuaSH.' 

15 parser = argparse.ArgumentParser(description=desc) 

16 parser.add_argument( 

17 'repository', type=str, 

18 help='Path to a valid gen3 repository') 

19 parser.add_argument( 

20 'collection', type=str, 

21 help='Collection to search for metric measurement values') 

22 parser.add_argument( 

23 '--metrics_package', type=str, 

24 help='Metrics namespace to filter by. If omitted, all metrics ' 

25 'are processed.') 

26 parser.add_argument( 

27 '--spec', type=str, default="design", 

28 help='Spec level to apply: minimum, design, or stretch') 

29 parser.add_argument( 

30 '--dataset_name', type=str, required=True, 

31 help='Name of the dataset for which the report is being generated.' 

32 'This is the desired ci_dataset tag in SQuaSH.') 

33 return parser 

34 

35 

36def main(repository, collection, metrics_package, spec, dataset_name): 

37 """Extract metric values from a Gen 3 repository and rewrite them to disk 

38 in Job format. 

39 

40 Parameters 

41 ---------- 

42 Parameters are the same as for the `JobReporter` class. 

43 """ 

44 jr = JobReporter(repository, 

45 collection, 

46 metrics_package, 

47 spec, 

48 dataset_name) 

49 jobs = jr.run() 

50 if len(jobs) == 0: 

51 raise RuntimeError('Job reporter returned no jobs.') 

52 for k, v in jobs.items(): 

53 filename = f"{metrics_package or 'all'}_{spec}_{k}_{time.time()}.json" 

54 with open(filename, 'w') as fh: 

55 json.dump(v.json, fh, indent=2, sort_keys=True) 

56 

57 

58class JobReporter: 

59 """A class for extracting metric values from a Gen 3 repository and 

60 repackaging them as Job objects. 

61 

62 Parameters 

63 ---------- 

64 repository : `str` 

65 Path to a Butler configuration YAML file or a directory containing one. 

66 collection : `str` 

67 Name of the collection to search for metric values. 

68 metrics_package : `str` or `None` 

69 If provided, the namespace by which to filter selected metrics. 

70 spec : `str` 

71 The level of specification to filter metrics by. 

72 dataset_name : `str` 

73 The name of the dataset to report to SQuaSH through the 

74 ``ci_dataset`` tag. 

75 """ 

76 

77 def __init__(self, 

78 repository, 

79 collection, 

80 metrics_package, 

81 spec, 

82 dataset_name): 

83 # Hard coding verify_metrics as the packager for now. 

84 # It would be easy to pass this in as an argument, if necessary. 

85 self.metrics = MetricSet.load_metrics_package( 

86 package_name_or_path='verify_metrics', 

87 subset=metrics_package) 

88 self.butler = Butler(repository) 

89 self.registry = self.butler.registry 

90 self.spec = spec 

91 self.collection = collection 

92 self.dataset_name = dataset_name 

93 

94 def run(self): 

95 """Collate job information. 

96 

97 Returns 

98 ------- 

99 jobs : `dict` [`str`, `lsst.verify.Job`] 

100 A mapping of `~lsst.verify.Job` objects, indexed by a string 

101 representation of their data ID. 

102 """ 

103 jobs = {} 

104 for metric in self.metrics: 

105 dataset = f'metricvalue_{metric.package}_{metric.metric}' 

106 datasetRefs = list(self.registry.queryDatasets(dataset, 

107 collections=self.collection)) 

108 for ref in datasetRefs: 

109 m = self.butler.get(ref, collections=self.collection) 

110 # make the name the same as what SQuaSH Expects 

111 m.metric_name = metric 

112 

113 # queryDatasets guarantees ref.dataId.hasFull() 

114 dataId = ref.dataId.full.byName() 

115 # Sort values by key name 

116 key = "_".join(str(id) for _, id in sorted(dataId.items())) 

117 

118 # For backward-compatibility with Gen 2 SQuaSH uploads 

119 pfilt = dataId.get('physical_filter') 

120 if not pfilt: 

121 # Grab the physical filter associated with the abstract 

122 # filter. In general there may be more than one. Take the 

123 # shortest assuming it is the most generic. 

124 pfilts = [el.name for el in 

125 self.registry.queryDimensionRecords( 

126 'physical_filter', 

127 dataId=ref.dataId)] 

128 pfilt = min(pfilts, key=len) 

129 

130 if key not in jobs.keys(): 

131 job_metadata = { 

132 'filter': pfilt, 

133 'butler_generation': 'Gen3', 

134 'ci_dataset': self.dataset_name, 

135 } 

136 job_metadata.update(dataId) 

137 # Get dataset_repo_url from repository somehow? 

138 jobs[key] = Job(meta=job_metadata, metrics=self.metrics) 

139 jobs[key].measurements.insert(m) 

140 return jobs