Coverage for tests/test_query_relations.py: 25%

50 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-01 11:00 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28import difflib 

29import os 

30import os.path 

31import re 

32import unittest 

33 

34from lsst.daf.butler.registry import MissingSpatialOverlapError, RegistryConfig, _RegistryFactory, queries 

35from lsst.daf.butler.transfers import YamlRepoImportBackend 

36 

37TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

38 

39 

40class TestQueryRelationsTests(unittest.TestCase): 

41 """Tests for registry queries that check that the generated relation tree 

42 matches expectations. 

43 

44 These tests are somewhat fragile - there are multiple valid relation trees 

45 for most registry queries, just as there are multiple valid SQL queries, 

46 and since we check the relation tree via string comparisons we are 

47 also sensitive to irrelevant things like column ordering. But these 

48 differences are deterministic, and checking the relation trees instead of 

49 the query results puts a much smaller burden on test-data creation and 

50 inspection (as well as making tests go faster), making it much easier to 

51 test many combinations of arguments. 

52 

53 Note that daf_relation provides good test coverage of the process of going 

54 from relation trees to SQL. 

55 """ 

56 

57 @classmethod 

58 def setUpClass(cls) -> None: 

59 config = RegistryConfig() 

60 config["db"] = "sqlite://" 

61 cls.registry = _RegistryFactory(config).create_from_config() 

62 # We need just enough test data to have valid dimension records for 

63 # all of the dimensions we're concerned with, and we want to pick 

64 # values for each dimension that correspond to a spatiotemporal 

65 # overlap. Without that, we'd be fighting optimizations built into the 

66 # query system that simplify things as soon as it can spot that there 

67 # will be no overall results. 

68 data_file = os.path.normpath(os.path.join(TESTDIR, "data", "registry", "hsc-rc2-subset.yaml")) 

69 with open(data_file) as stream: 

70 backend = YamlRepoImportBackend(stream, cls.registry) 

71 backend.register() 

72 backend.load(datastore=None) 

73 assert ( 

74 cls.registry.dimensions.commonSkyPix.name == "htm7" 

75 ), "If this changes, update the skypix levels below to have one below and one above." 

76 cls.htm7 = 222340 

77 cls.htm11 = 56919188 

78 cls.instrument = "HSC" 

79 cls.skymap = "hsc_rings_v1" 

80 cls.visit = 404 

81 cls.tract = 9615 

82 cls.detector = 0 

83 cls.patch = 14 

84 cls.data_id = cls.registry.expandDataId( 

85 htm7=cls.htm7, 

86 htm11=cls.htm11, 

87 instrument=cls.instrument, 

88 skymap=cls.skymap, 

89 visit=cls.visit, 

90 tract=cls.tract, 

91 detector=cls.detector, 

92 patch=cls.patch, 

93 ) 

94 cls.band = cls.data_id["band"] 

95 cls.physical_filter = cls.data_id["physical_filter"] 

96 

97 def assert_relation_str( 

98 self, 

99 expected: str, 

100 *results: queries.DataCoordinateQueryResults 

101 | queries.DimensionRecordQueryResults 

102 | queries.ParentDatasetQueryResults, 

103 ) -> None: 

104 """Assert that checks that one or more registry 

105 queries have relation trees that match the given string. 

106 

107 Parameters 

108 ---------- 

109 expected : `str` 

110 Expected relation tree, corresponding to 

111 ``lsst.daf.relation.Relation.__str__`` (which is much more concise 

112 and readable than the `repr` version, once you get used to it). 

113 Any newlines and indentation will be stripped. 

114 *results 

115 Query result objects returned by queryDataIds, 

116 queryDimensionRecords, or queryDatasets. 

117 """ 

118 # Drop newlines and leading/trailing space. 

119 expected = expected.replace("\n", " ").strip() 

120 # Drop duplicate spaces (i.e. indentation). 

121 expected = re.sub(r" \s+", " ", expected) 

122 # Drop spaces next to parentheses and square brackets. 

123 expected = re.sub(r"\s*(\[|\(|\)|\])\s*", r"\1", expected) 

124 differ = difflib.Differ() 

125 for n, result in enumerate(results): 

126 result_str = str(result._query.relation) 

127 if expected != result_str: 

128 message_lines = [f"Unexpected relation string for query {n}:"] 

129 message_lines.extend( 

130 differ.compare( 

131 [expected], 

132 [result_str], 

133 ) 

134 ) 

135 raise AssertionError("\n".join(message_lines)) 

136 

137 def test_spatial_constraints(self) -> None: 

138 """Test query constraints from data IDs and WHERE clauses that imply a 

139 spatial region. 

140 """ 

141 # Constrain one set of regular spatial dimensions from another. 

142 # This needs post-query filtering in the iteration engine. 

143 self.assert_relation_str( 

144 f""" 

145 Π[band, patch, skymap, tract]( 

146 σ[regions_overlap(patch.region, visit_detector_region.region)]( 

147 →[iteration]( 

148 select( 

149 Π[band, patch, patch.region, skymap, tract, visit_detector_region.region]( 

150 σ[ 

151 instrument={self.instrument!r} 

152 and detector={self.detector!r} 

153 and visit={self.visit!r} 

154 and band={self.band!r} 

155 and physical_filter={self.physical_filter!r} 

156 ]( 

157 patch_htm7_overlap 

158 ⋈ visit_detector_region_htm7_overlap 

159 ⋈ physical_filter 

160 ⋈ patch 

161 ⋈ visit 

162 ⋈ visit_detector_region 

163 ) 

164 ) 

165 ) 

166 ) 

167 ) 

168 ) 

169 """, 

170 self.registry.queryDataIds( 

171 ["patch", "band"], instrument=self.instrument, visit=self.visit, detector=self.detector 

172 ), 

173 self.registry.queryDataIds( 

174 ["patch", "band"], 

175 where=( 

176 f"instrument={self.instrument!r} " 

177 f"and detector={self.detector!r} " 

178 f"and visit={self.visit!r}" 

179 f"and band={self.band!r} " 

180 f"and physical_filter={self.physical_filter!r} " 

181 ), 

182 ), 

183 ) 

184 # Constrain the special common skypix dimension from a regular 

185 # dimension. This does not need any post-query filtering. 

186 self.assert_relation_str( 

187 # It would be better if this query didn't join in visit and 

188 # physical_filter - it does that to ensure all implied dimension 

189 # relationships are satisfied in the results, but the dimensions 

190 # implied by visit are not present in the results and play no role 

191 # in the constraints. But it'd be hard to fix that and any fix 

192 # would be very rarely exercised. 

193 f""" 

194 select( 

195 Π[htm7]( 

196 σ[ 

197 instrument={self.instrument!r} 

198 and detector={self.detector!r} 

199 and visit={self.visit!r} 

200 and band={self.band!r} 

201 and physical_filter={self.physical_filter!r} 

202 ]( 

203 visit_detector_region_htm7_overlap 

204 ⋈ physical_filter 

205 ⋈ visit 

206 ) 

207 ) 

208 ) 

209 """, 

210 self.registry.queryDataIds( 

211 ["htm7"], instrument=self.instrument, visit=self.visit, detector=self.detector 

212 ), 

213 # For regular dimension constraints we can also support having the 

214 # data ID expressed as a 'where' expression. The query would also 

215 # have the same behavior with only visit and detector specified 

216 # in the 'where' string, but it'd change the expected string. 

217 self.registry.queryDataIds( 

218 ["htm7"], 

219 where=( 

220 f"instrument={self.instrument!r} " 

221 f"and detector={self.detector!r} " 

222 f"and visit={self.visit!r}" 

223 f"and band={self.band!r} " 

224 f"and physical_filter={self.physical_filter!r} " 

225 ), 

226 ), 

227 ) 

228 # We can't constrain any other skypix system spatially, because we 

229 # don't have overlap rows for those in the database. But in the future 

230 # we might be able to fake it with an iteration-engine spatial join, or 

231 # utilize explicitly-materialized overlaps. 

232 with self.assertRaises(MissingSpatialOverlapError): 

233 self.registry.queryDataIds( 

234 ["htm11"], 

235 instrument=self.instrument, 

236 visit=self.visit, 

237 detector=self.detector, 

238 ) 

239 # Constrain a regular spatial dimension (patch) from a non-common 

240 # skypix dimension common. In general this requires post-query 

241 # filtering to get only the patches that overlap the skypix pixel. We 

242 # could special-case skypix dimensions that are coarser than the common 

243 # dimension and part of the same system to simplify both the SQL query 

244 # and avoid post-query filtering, but we don't at present. 

245 self.assert_relation_str( 

246 f""" 

247 Π[patch, skymap, tract]( 

248 σ[ 

249 regions_overlap( 

250 patch.region, 

251 {self.registry.dimensions["htm11"].pixelization.pixel(self.htm11)} 

252 ) 

253 ]( 

254 →[iteration]( 

255 select( 

256 Π[patch, patch.region, skymap, tract]( 

257 σ[htm7={self.htm7!r}]( 

258 patch_htm7_overlap ⋈ patch 

259 ) 

260 ) 

261 ) 

262 ) 

263 ) 

264 ) 

265 """, 

266 self.registry.queryDataIds(["patch"], htm11=self.htm11), 

267 ) 

268 # Constrain a regular spatial dimension (patch) from the common 

269 # skypix dimension. This does not require post-query filtering. 

270 self.assert_relation_str( 

271 f""" 

272 select( 

273 Π[patch, skymap, tract]( 

274 σ[htm7={self.htm7!r}]( 

275 patch_htm7_overlap 

276 ) 

277 ) 

278 ) 

279 """, 

280 self.registry.queryDataIds(["patch"], htm7=self.htm7), 

281 ) 

282 # Constrain a regular dimension (detector) via a different dimension 

283 # (visit) that combine together to define a more fine-grained region, 

284 # and also constrain via a skypix dimension other than the common one. 

285 # Once again we could special-case this for skypix dimensions that are 

286 # coarser than the common dimension in the same syste, but we don't. 

287 self.assert_relation_str( 

288 # This query also doesn't need visit or physical_filter joined in, 

289 # but we can live with that. 

290 f""" 

291 Π[detector, instrument]( 

292 σ[ 

293 regions_overlap( 

294 visit_detector_region.region, 

295 {self.registry.dimensions["htm11"].pixelization.pixel(self.htm11)} 

296 ) 

297 ]( 

298 →[iteration]( 

299 select( 

300 Π[detector, instrument, visit_detector_region.region]( 

301 σ[ 

302 instrument={self.instrument!r} 

303 and visit={self.visit!r} 

304 and band={self.band!r} 

305 and physical_filter={self.physical_filter!r} 

306 and htm7={self.htm7!r} 

307 ]( 

308 visit_detector_region_htm7_overlap 

309 ⋈ physical_filter 

310 ⋈ visit 

311 ⋈ visit_detector_region 

312 ) 

313 ) 

314 ) 

315 ) 

316 ) 

317 ) 

318 """, 

319 self.registry.queryDataIds( 

320 ["detector"], visit=self.visit, instrument=self.instrument, htm11=self.htm11 

321 ), 

322 ) 

323 # Constrain a regular dimension (detector) via a different dimension 

324 # (visit) that combine together to define a more fine-grained region, 

325 # and also constrain via the common-skypix system. 

326 self.assert_relation_str( 

327 # This query also doesn't need visit or physical_filter joined in, 

328 # but we can live with that. 

329 f""" 

330 select( 

331 Π[detector, instrument]( 

332 σ[ 

333 htm7={self.htm7!r} 

334 and instrument={self.instrument!r} 

335 and visit={self.visit!r} 

336 and band={self.band!r} 

337 and physical_filter={self.physical_filter!r} 

338 ]( 

339 visit_detector_region_htm7_overlap 

340 ⋈ physical_filter 

341 ⋈ visit 

342 ) 

343 ) 

344 ) 

345 """, 

346 self.registry.queryDataIds( 

347 ["detector"], visit=self.visit, instrument=self.instrument, htm7=self.htm7 

348 ), 

349 ) 

350 

351 

352if __name__ == "__main__": 

353 unittest.main()