Coverage for tests/test_indexing.py: 10%

123 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-03-28 02:59 -0700

1# This file is part of astro_metadata_translator. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the LICENSE file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# Use of this source code is governed by a 3-clause BSD-style 

10# license that can be found in the LICENSE file. 

11 

12import io 

13import json 

14import logging 

15import os 

16import tempfile 

17import unittest 

18 

19from astro_metadata_translator import ObservationGroup, ObservationInfo 

20from astro_metadata_translator.file_helpers import read_file_info 

21from astro_metadata_translator.indexing import ( 

22 index_files, 

23 process_index_data, 

24 process_sidecar_data, 

25 read_index, 

26 read_sidecar, 

27) 

28 

29TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

30TESTDATA = os.path.join(TESTDIR, "data") 

31 

32 

33class IndexingTestCase(unittest.TestCase): 

34 """Test indexing and sidecar functionality.""" 

35 

36 def test_indexing(self): 

37 """Test that we can index two headers.""" 

38 files = ["fitsheader-hsc-HSCA04090107.yaml", "fitsheader-hsc.yaml"] 

39 files = [os.path.join(TESTDATA, f) for f in files] 

40 

41 # Index the translated metadata 

42 index, okay, failed = index_files(files, None, 1, None, "translated") 

43 self.assertEqual(set(files), set(okay)) 

44 self.assertEqual(failed, []) 

45 

46 self.assertIn("__COMMON__", index) 

47 self.assertIn("instrument", index["__COMMON__"]) 

48 

49 # Write the index and check we can read it back. 

50 with tempfile.NamedTemporaryFile(suffix=".json", mode="w+") as temp: 

51 print(json.dumps(index), file=temp) 

52 temp.flush() 

53 externally_processed = read_index(temp.name) 

54 

55 # Convert to an ObservationGroup. Filenames are now stored in the 

56 # corresponding ObservationInfo. 

57 obs_group = process_index_data(index) 

58 self.assertIsInstance(obs_group, ObservationGroup) 

59 self.assertEqual(len(obs_group), 2) 

60 self.assertEqual(obs_group[0].instrument, "HSC") 

61 self.assertEqual(set([obs_group[0].filename, obs_group[1].filename]), set(files)) 

62 self.assertEqual(externally_processed, obs_group) 

63 

64 metadata = process_index_data(index, force_metadata=True) 

65 self.assertEqual(len(metadata), 2) 

66 self.assertEqual(metadata[files[0]]["instrument"], "HSC") 

67 

68 # Index the native FITS headers 

69 index, okay, failed = index_files(files, None, 1, None, "metadata") 

70 self.assertEqual(set(files), set(okay)) 

71 self.assertEqual(failed, []) 

72 

73 # Check that common entries have been factored out 

74 self.assertIn("__COMMON__", index) 

75 self.assertIn("TELESCOP", index["__COMMON__"]) 

76 self.assertIn("INSTRUME", index[files[0]]) 

77 self.assertNotIn("INSTRUME", index[files[1]]) 

78 self.assertNotIn("TELESCOP", index[files[0]]) 

79 

80 # Convert back to a dict indexed by filename and check that 

81 # common has been put back properly. 

82 metadata = process_index_data(index) 

83 self.assertEqual(len(metadata), 2) 

84 self.assertEqual(metadata[files[0]]["INSTRUME"], "Hyper Suprime-Cam") 

85 self.assertEqual(metadata[files[0]]["TELESCOP"], index["__COMMON__"]["TELESCOP"]) 

86 self.assertEqual(metadata[files[1]]["TELESCOP"], index["__COMMON__"]["TELESCOP"]) 

87 

88 def test_file_reading(self): 

89 """Test the low-level file reader.""" 

90 # First with a real header (but YAML) 

91 file = os.path.join(TESTDATA, "fitsheader-hsc-HSCA04090107.yaml") 

92 info = read_file_info(file, 1, None, "metadata", content_type="simple") 

93 self.assertEqual(info["PROP-ID"], "o15426") 

94 

95 # With metadata sidecar. 

96 json_file = os.path.splitext(file)[0] + ".json" 

97 json_info = read_sidecar(json_file) 

98 # Need to remove the COMMENT fields to avoid confusion between 

99 # PropertyList and the fallback code with multiple entries. 

100 json_info.pop("COMMENT", None) 

101 dict_info = dict(info) # it may be a PropertyList 

102 dict_info.pop("COMMENT", None) 

103 self.assertEqual(json_info, dict_info) 

104 

105 info = read_file_info(file, 1, None, "translated", content_type="native") 

106 self.assertIsInstance(info, ObservationInfo) 

107 self.assertEqual(info.instrument, "HSC") 

108 

109 info = read_file_info(file, 1, None, "translated", content_type="simple") 

110 self.assertIsInstance(info, dict) 

111 self.assertEqual(info["instrument"], "HSC") 

112 

113 json_str = read_file_info(file, 1, None, "translated", content_type="json") 

114 self.assertIsInstance(json_str, str) 

115 info = json.loads(json_str) 

116 self.assertEqual(info["instrument"], "HSC") 

117 

118 processed = process_sidecar_data(info) 

119 self.assertIsInstance(processed, ObservationInfo) 

120 self.assertEqual(processed.instrument, "HSC") 

121 

122 processed = process_sidecar_data(info, force_metadata=True) 

123 self.assertIsInstance(processed, dict) 

124 self.assertEqual(processed["instrument"], "HSC") 

125 

126 json_str = read_file_info(file, 1, None, "metadata", content_type="json") 

127 self.assertIsInstance(json_str, str) 

128 info = json.loads(json_str) 

129 self.assertEqual(info["PROP-ID"], "o15426") 

130 

131 processed = process_sidecar_data(info) 

132 self.assertEqual(processed["PROP-ID"], info["PROP-ID"]) 

133 

134 # Read a small fits file 

135 fits_file = os.path.join(TESTDATA, "small.fits") 

136 info = read_file_info(fits_file, 0, None, "metadata", content_type="native") 

137 self.assertEqual(info["FILTER"], "r") 

138 

139 # The fits file won't translate 

140 with self.assertRaises(ValueError): 

141 read_file_info(fits_file, 0, None, "obsInfo") 

142 

143 with self.assertRaises(ValueError): 

144 read_file_info(file, 1, None, "unknown") 

145 

146 with self.assertRaises(FileNotFoundError): 

147 read_file_info("notthere.not", 1) 

148 

149 with self.assertLogs(level=logging.WARNING) as cm: 

150 info = read_file_info("notthere.not", 1, print_trace=False) 

151 

152 self.assertIn("Unable to open file notthere.not", "\n".join(cm.output)) 

153 

154 # Now read a file that can not be translated and should trigger 

155 # different errors 

156 bad_file = os.path.join(TESTDATA, "corrections", "SCUBA_test-20000101_00002.yaml") 

157 

158 with self.assertLogs(level="DEBUG") as cm: 

159 with self.assertRaises(ValueError): 

160 read_file_info(bad_file, 1) 

161 self.assertIn("Unable to determine translator class", "\n".join(cm.output)) 

162 

163 with io.StringIO() as out: 

164 info = read_file_info(bad_file, 1, print_trace=False, outstream=out) 

165 out.seek(0) 

166 lines = out.readlines() 

167 self.assertEqual(len(lines), 1) 

168 self.assertIn("ValueError", lines[0]) 

169 

170 with io.StringIO() as out: 

171 info = read_file_info(bad_file, 1, print_trace=True, outstream=out) 

172 out.seek(0) 

173 lines = out.readlines() 

174 self.assertGreater(len(lines), 4) 

175 self.assertIn("ValueError", lines[-1]) 

176 

177 # A sidecar file that is not a dict. 

178 not_dict = os.path.join(TESTDATA, "bad-sidecar.json") 

179 with self.assertRaises(ValueError): 

180 read_sidecar(not_dict) 

181 

182 with self.assertRaises(ValueError): 

183 read_index(not_dict) 

184 

185 # index file that is not JSON. 

186 with self.assertRaises(ValueError): 

187 read_index(bad_file) 

188 

189 def test_obs_info_sidecar(self): 

190 """Test reading of older files with missing content.""" 

191 # First with a real header (but YAML) 

192 file = os.path.join(TESTDATA, "fitsheader-hsc.yaml") 

193 info = read_file_info(file, 1, None, "translated", content_type="native") 

194 self.assertIsInstance(info, ObservationInfo) 

195 self.assertEqual(info.instrument, "HSC") 

196 

197 # With translated metadata sidecar that lacks the group_counter_start. 

198 json_file = os.path.splitext(file)[0] + ".json" 

199 json_info = read_sidecar(json_file) 

200 self.assertIsInstance(json_info, ObservationInfo) 

201 self.assertEqual(json_info, info) 

202 

203 

204if __name__ == "__main__": 

205 unittest.main()