Coverage for tests/test_query_relations.py: 24%

53 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-02-14 02:05 -0800

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22import difflib 

23import os 

24import os.path 

25import re 

26import unittest 

27 

28from lsst.daf.butler.registry import MissingSpatialOverlapError, Registry, RegistryConfig, queries 

29from lsst.daf.butler.transfers import YamlRepoImportBackend 

30 

31TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

32 

33 

34class TestQueryRelationsTests(unittest.TestCase): 

35 """Tests for registry queries that check that the generated relation tree 

36 matches expectations. 

37 

38 These tests are somewhat fragile - there are multiple valid relation trees 

39 for most registry queries, just as there are multiple valid SQL queries, 

40 and since we check the relation tree via string comparisons we are 

41 also sensitive to irrelevant things like column ordering. But these 

42 differences are deterministic, and checking the relation trees instead of 

43 the query results puts a much smaller burden on test-data creation and 

44 inspection (as well as making tests go faster), making it much easier to 

45 test many combinations of arguments. 

46 

47 Note that daf_relation provides good test coverage of the process of going 

48 from relation trees to SQL. 

49 """ 

50 

51 @classmethod 

52 def setUpClass(cls) -> None: 

53 config = RegistryConfig() 

54 config["db"] = "sqlite://" 

55 cls.registry = Registry.createFromConfig(config) 

56 # We need just enough test data to have valid dimension records for 

57 # all of the dimensions we're concerned with, and we want to pick 

58 # values for each dimension that correspond to a spatiotemporal 

59 # overlap. Without that, we'd be fighting optimizations built into the 

60 # query system that simplify things as soon as it can spot that there 

61 # will be no overall results. 

62 data_file = os.path.normpath(os.path.join(TESTDIR, "data", "registry", "hsc-rc2-subset.yaml")) 

63 with open(data_file, "r") as stream: 

64 backend = YamlRepoImportBackend(stream, cls.registry) 

65 backend.register() 

66 backend.load(datastore=None) 

67 assert ( 

68 cls.registry.dimensions.commonSkyPix.name == "htm7" 

69 ), "If this changes, update the skypix levels below to have one below and one above." 

70 cls.htm7 = 222340 

71 cls.htm11 = 56919188 

72 cls.instrument = "HSC" 

73 cls.skymap = "hsc_rings_v1" 

74 cls.visit = 404 

75 cls.tract = 9615 

76 cls.detector = 0 

77 cls.patch = 14 

78 cls.data_id = cls.registry.expandDataId( 

79 htm7=cls.htm7, 

80 htm11=cls.htm11, 

81 instrument=cls.instrument, 

82 skymap=cls.skymap, 

83 visit=cls.visit, 

84 tract=cls.tract, 

85 detector=cls.detector, 

86 patch=cls.patch, 

87 ) 

88 cls.band = cls.data_id["band"] 

89 cls.physical_filter = cls.data_id["physical_filter"] 

90 

91 def assert_relation_str( 

92 self, 

93 expected: str, 

94 *results: queries.DataCoordinateQueryResults 

95 | queries.DimensionRecordQueryResults 

96 | queries.ParentDatasetQueryResults, 

97 ) -> None: 

98 """A specialized test assert that checks that one or more registry 

99 queries have relation trees that match the given string. 

100 

101 Parameters 

102 ---------- 

103 expected : `str` 

104 Expected relation tree, corresponding to 

105 ``lsst.daf.relation.Relation.__str__`` (which is much more concise 

106 and readable than the `repr` version, once you get used to it). 

107 Any newlines and indentation will be stripped. 

108 *results 

109 Query result objects returned by queryDataIds, 

110 queryDimensionRecords, or queryDatasets. 

111 """ 

112 # Drop newlines and leading/trailing space. 

113 expected = expected.replace("\n", " ").strip() 

114 # Drop duplicate spaces (i.e. indentation). 

115 expected = re.sub(r" \s+", " ", expected) 

116 # Drop spaces next to parentheses and square brackets. 

117 expected = re.sub(r"\s*(\[|\(|\)|\])\s*", r"\1", expected) 

118 differ = difflib.Differ() 

119 for n, result in enumerate(results): 

120 result_str = str(result._query.relation) 

121 if expected != result_str: 

122 message_lines = [f"Unexpected relation string for query {n}:"] 

123 message_lines.extend( 

124 differ.compare( 

125 [expected], 

126 [result_str], 

127 ) 

128 ) 

129 raise AssertionError("\n".join(message_lines)) 

130 

131 def test_spatial_constraints(self) -> None: 

132 """Test query constraints from data IDs and WHERE clauses that imply a 

133 spatial region. 

134 """ 

135 # Constrain one set of regular spatial dimensions from another. 

136 # This needs post-query filtering in the iteration engine. 

137 self.assert_relation_str( 

138 f""" 

139 Π[band, patch, skymap, tract]( 

140 σ[regions_overlap(patch.region, visit_detector_region.region)]( 

141 →[iteration]( 

142 select( 

143 Π[band, patch, patch.region, skymap, tract, visit_detector_region.region]( 

144 σ[ 

145 band={self.band!r} 

146 and instrument={self.instrument!r} 

147 and detector={self.detector!r} 

148 and physical_filter={self.physical_filter!r} 

149 and visit={self.visit!r} 

150 ]( 

151 patch_htm7_overlap 

152 ⋈ visit_detector_region_htm7_overlap 

153 ⋈ physical_filter 

154 ⋈ patch 

155 ⋈ visit 

156 ⋈ visit_detector_region 

157 ) 

158 ) 

159 ) 

160 ) 

161 ) 

162 ) 

163 """, 

164 self.registry.queryDataIds( 

165 ["patch", "band"], instrument=self.instrument, visit=self.visit, detector=self.detector 

166 ), 

167 self.registry.queryDataIds( 

168 ["patch", "band"], 

169 where=( 

170 f"band={self.band!r} " 

171 f"and instrument={self.instrument!r} " 

172 f"and detector={self.detector!r} " 

173 f"and physical_filter={self.physical_filter!r} " 

174 f"and visit={self.visit!r}" 

175 ), 

176 ), 

177 ) 

178 # Constrain the special common skypix dimension from a regular 

179 # dimension. This does not need any post-query filtering. 

180 self.assert_relation_str( 

181 # It would be better if this query didn't join in visit and 

182 # physical_filter - it does that to ensure all implied dimension 

183 # relationships are satisfied in the results, but the dimensions 

184 # implied by visit are not present in the results and play no role 

185 # in the constraints. But it'd be hard to fix that and any fix 

186 # would be very rarely exercised. 

187 f""" 

188 select( 

189 Π[htm7]( 

190 σ[ 

191 band={self.band!r} 

192 and instrument={self.instrument!r} 

193 and detector={self.detector!r} 

194 and physical_filter={self.physical_filter!r} 

195 and visit={self.visit!r} 

196 ]( 

197 visit_detector_region_htm7_overlap 

198 ⋈ physical_filter 

199 ⋈ visit 

200 ) 

201 ) 

202 ) 

203 """, 

204 self.registry.queryDataIds( 

205 ["htm7"], instrument=self.instrument, visit=self.visit, detector=self.detector 

206 ), 

207 # For regular dimension constraints we can also support having the 

208 # data ID expressed as a 'where' expression. The query would also 

209 # have the same behavior with only visit and detector specified 

210 # in the 'where' string, but it'd change the expected string. 

211 self.registry.queryDataIds( 

212 ["htm7"], 

213 where=( 

214 f"band={self.band!r} " 

215 f"and instrument={self.instrument!r} " 

216 f"and detector={self.detector!r} " 

217 f"and physical_filter={self.physical_filter!r} " 

218 f"and visit={self.visit!r}" 

219 ), 

220 ), 

221 ) 

222 # We can't constrain any other skypix system spatially, because we 

223 # don't have overlap rows for those in the database. But in the future 

224 # we might be able to fake it with an iteration-engine spatial join, or 

225 # utilize explicitly-materialized overlaps. 

226 with self.assertRaises(MissingSpatialOverlapError): 

227 self.registry.queryDataIds( 

228 ["htm11"], 

229 instrument=self.instrument, 

230 visit=self.visit, 

231 detector=self.detector, 

232 ) 

233 # Constrain a regular spatial dimension (patch) from a non-common 

234 # skypix dimension common. In general this requires post-query 

235 # filtering to get only the patches that overlap the skypix pixel. We 

236 # could special-case skypix dimensions that are coarser than the common 

237 # dimension and part of the same system to simplify both the SQL query 

238 # and avoid post-query filtering, but we don't at present. 

239 self.assert_relation_str( 

240 f""" 

241 Π[patch, skymap, tract]( 

242 σ[ 

243 regions_overlap( 

244 patch.region, 

245 {self.registry.dimensions["htm11"].pixelization.pixel(self.htm11)} 

246 ) 

247 ]( 

248 →[iteration]( 

249 select( 

250 Π[patch, patch.region, skymap, tract]( 

251 σ[htm7={self.htm7!r}]( 

252 patch_htm7_overlap ⋈ patch 

253 ) 

254 ) 

255 ) 

256 ) 

257 ) 

258 ) 

259 """, 

260 self.registry.queryDataIds(["patch"], htm11=self.htm11), 

261 ) 

262 # Constrain a regular spatial dimension (patch) from the common 

263 # skypix dimension. This does not require post-query filtering. 

264 self.assert_relation_str( 

265 f""" 

266 select( 

267 Π[patch, skymap, tract]( 

268 σ[htm7={self.htm7!r}]( 

269 patch_htm7_overlap 

270 ) 

271 ) 

272 ) 

273 """, 

274 self.registry.queryDataIds(["patch"], htm7=self.htm7), 

275 ) 

276 # Constrain a regular dimension (detector) via a different dimension 

277 # (visit) that combine together to define a more fine-grained region, 

278 # and also constrain via a skypix dimension other than the common one. 

279 # Once again we could special-case this for skypix dimensions that are 

280 # coarser than the common dimension in the same syste, but we don't. 

281 self.assert_relation_str( 

282 # This query also doesn't need visit or physical_filter joined in, 

283 # but we can live with that. 

284 f""" 

285 Π[detector, instrument]( 

286 σ[ 

287 regions_overlap( 

288 visit_detector_region.region, 

289 {self.registry.dimensions["htm11"].pixelization.pixel(self.htm11)} 

290 ) 

291 ]( 

292 →[iteration]( 

293 select( 

294 Π[detector, instrument, visit_detector_region.region]( 

295 σ[ 

296 band={self.band!r} 

297 and instrument={self.instrument!r} 

298 and physical_filter={self.physical_filter!r} 

299 and visit={self.visit!r} 

300 and htm7={self.htm7!r} 

301 ]( 

302 visit_detector_region_htm7_overlap 

303 ⋈ physical_filter 

304 ⋈ visit 

305 ⋈ visit_detector_region 

306 ) 

307 ) 

308 ) 

309 ) 

310 ) 

311 ) 

312 """, 

313 self.registry.queryDataIds( 

314 ["detector"], visit=self.visit, instrument=self.instrument, htm11=self.htm11 

315 ), 

316 ) 

317 # Constrain a regular dimension (detector) via a different dimension 

318 # (visit) that combine together to define a more fine-grained region, 

319 # and also constrain via the common-skypix system. 

320 self.assert_relation_str( 

321 # This query also doesn't need visit or physical_filter joined in, 

322 # but we can live with that. 

323 f""" 

324 select( 

325 Π[detector, instrument]( 

326 σ[ 

327 band={self.band!r} 

328 and htm7={self.htm7!r} 

329 and instrument={self.instrument!r} 

330 and physical_filter={self.physical_filter!r} 

331 and visit={self.visit!r} 

332 ]( 

333 visit_detector_region_htm7_overlap 

334 ⋈ physical_filter 

335 ⋈ visit 

336 ) 

337 ) 

338 ) 

339 """, 

340 self.registry.queryDataIds( 

341 ["detector"], visit=self.visit, instrument=self.instrument, htm7=self.htm7 

342 ), 

343 ) 

344 

345 

346if __name__ == "__main__": 346 ↛ 347line 346 didn't jump to line 347, because the condition on line 346 was never true

347 unittest.main()