Coverage for tests/test_query_relations.py: 26%

51 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-08 10:56 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28import difflib 

29import os 

30import os.path 

31import re 

32import unittest 

33 

34from lsst.daf.butler import DataCoordinateQueryResults, DimensionRecordQueryResults, ParentDatasetQueryResults 

35from lsst.daf.butler.registry import MissingSpatialOverlapError, RegistryConfig, _RegistryFactory 

36from lsst.daf.butler.transfers import YamlRepoImportBackend 

37 

38TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

39 

40 

41class TestQueryRelationsTests(unittest.TestCase): 

42 """Tests for registry queries that check that the generated relation tree 

43 matches expectations. 

44 

45 These tests are somewhat fragile - there are multiple valid relation trees 

46 for most registry queries, just as there are multiple valid SQL queries, 

47 and since we check the relation tree via string comparisons we are 

48 also sensitive to irrelevant things like column ordering. But these 

49 differences are deterministic, and checking the relation trees instead of 

50 the query results puts a much smaller burden on test-data creation and 

51 inspection (as well as making tests go faster), making it much easier to 

52 test many combinations of arguments. 

53 

54 Note that daf_relation provides good test coverage of the process of going 

55 from relation trees to SQL. 

56 """ 

57 

58 @classmethod 

59 def setUpClass(cls) -> None: 

60 config = RegistryConfig() 

61 config["db"] = "sqlite://" 

62 cls.registry = _RegistryFactory(config).create_from_config() 

63 # We need just enough test data to have valid dimension records for 

64 # all of the dimensions we're concerned with, and we want to pick 

65 # values for each dimension that correspond to a spatiotemporal 

66 # overlap. Without that, we'd be fighting optimizations built into the 

67 # query system that simplify things as soon as it can spot that there 

68 # will be no overall results. 

69 data_file = os.path.normpath(os.path.join(TESTDIR, "data", "registry", "hsc-rc2-subset.yaml")) 

70 with open(data_file) as stream: 

71 backend = YamlRepoImportBackend(stream, cls.registry) 

72 backend.register() 

73 backend.load(datastore=None) 

74 assert ( 

75 cls.registry.dimensions.commonSkyPix.name == "htm7" 

76 ), "If this changes, update the skypix levels below to have one below and one above." 

77 cls.htm7 = 222340 

78 cls.htm11 = 56919188 

79 cls.instrument = "HSC" 

80 cls.skymap = "hsc_rings_v1" 

81 cls.visit = 404 

82 cls.tract = 9615 

83 cls.detector = 0 

84 cls.patch = 14 

85 cls.data_id = cls.registry.expandDataId( 

86 htm7=cls.htm7, 

87 htm11=cls.htm11, 

88 instrument=cls.instrument, 

89 skymap=cls.skymap, 

90 visit=cls.visit, 

91 tract=cls.tract, 

92 detector=cls.detector, 

93 patch=cls.patch, 

94 ) 

95 cls.band = cls.data_id["band"] 

96 cls.physical_filter = cls.data_id["physical_filter"] 

97 

98 def assert_relation_str( 

99 self, 

100 expected: str, 

101 *results: DataCoordinateQueryResults | DimensionRecordQueryResults | ParentDatasetQueryResults, 

102 ) -> None: 

103 """Assert that checks that one or more registry 

104 queries have relation trees that match the given string. 

105 

106 Parameters 

107 ---------- 

108 expected : `str` 

109 Expected relation tree, corresponding to 

110 ``lsst.daf.relation.Relation.__str__`` (which is much more concise 

111 and readable than the `repr` version, once you get used to it). 

112 Any newlines and indentation will be stripped. 

113 *results 

114 Query result objects returned by queryDataIds, 

115 queryDimensionRecords, or queryDatasets. 

116 """ 

117 # Drop newlines and leading/trailing space. 

118 expected = expected.replace("\n", " ").strip() 

119 # Drop duplicate spaces (i.e. indentation). 

120 expected = re.sub(r" \s+", " ", expected) 

121 # Drop spaces next to parentheses and square brackets. 

122 expected = re.sub(r"\s*(\[|\(|\)|\])\s*", r"\1", expected) 

123 differ = difflib.Differ() 

124 for n, result in enumerate(results): 

125 result_str = str(result._query.relation) 

126 if expected != result_str: 

127 message_lines = [f"Unexpected relation string for query {n}:"] 

128 message_lines.extend( 

129 differ.compare( 

130 [expected], 

131 [result_str], 

132 ) 

133 ) 

134 raise AssertionError("\n".join(message_lines)) 

135 

136 def test_spatial_constraints(self) -> None: 

137 """Test query constraints from data IDs and WHERE clauses that imply a 

138 spatial region. 

139 """ 

140 # Constrain one set of regular spatial dimensions from another. 

141 # This needs post-query filtering in the iteration engine. 

142 self.assert_relation_str( 

143 f""" 

144 Π[band, patch, skymap, tract]( 

145 σ[regions_overlap(patch.region, visit_detector_region.region)]( 

146 →[iteration]( 

147 select( 

148 Π[band, patch, patch.region, skymap, tract, visit_detector_region.region]( 

149 σ[ 

150 instrument={self.instrument!r} 

151 and detector={self.detector!r} 

152 and visit={self.visit!r} 

153 and band={self.band!r} 

154 and physical_filter={self.physical_filter!r} 

155 ]( 

156 patch_htm7_overlap 

157 ⋈ visit_detector_region_htm7_overlap 

158 ⋈ physical_filter 

159 ⋈ patch 

160 ⋈ visit 

161 ⋈ visit_detector_region 

162 ) 

163 ) 

164 ) 

165 ) 

166 ) 

167 ) 

168 """, 

169 self.registry.queryDataIds( 

170 ["patch", "band"], instrument=self.instrument, visit=self.visit, detector=self.detector 

171 ), 

172 self.registry.queryDataIds( 

173 ["patch", "band"], 

174 where=( 

175 f"instrument={self.instrument!r} " 

176 f"and detector={self.detector!r} " 

177 f"and visit={self.visit!r}" 

178 f"and band={self.band!r} " 

179 f"and physical_filter={self.physical_filter!r} " 

180 ), 

181 ), 

182 ) 

183 # Constrain the special common skypix dimension from a regular 

184 # dimension. This does not need any post-query filtering. 

185 self.assert_relation_str( 

186 # It would be better if this query didn't join in visit and 

187 # physical_filter - it does that to ensure all implied dimension 

188 # relationships are satisfied in the results, but the dimensions 

189 # implied by visit are not present in the results and play no role 

190 # in the constraints. But it'd be hard to fix that and any fix 

191 # would be very rarely exercised. 

192 f""" 

193 select( 

194 Π[htm7]( 

195 σ[ 

196 instrument={self.instrument!r} 

197 and detector={self.detector!r} 

198 and visit={self.visit!r} 

199 and band={self.band!r} 

200 and physical_filter={self.physical_filter!r} 

201 ]( 

202 visit_detector_region_htm7_overlap 

203 ⋈ physical_filter 

204 ⋈ visit 

205 ) 

206 ) 

207 ) 

208 """, 

209 self.registry.queryDataIds( 

210 ["htm7"], instrument=self.instrument, visit=self.visit, detector=self.detector 

211 ), 

212 # For regular dimension constraints we can also support having the 

213 # data ID expressed as a 'where' expression. The query would also 

214 # have the same behavior with only visit and detector specified 

215 # in the 'where' string, but it'd change the expected string. 

216 self.registry.queryDataIds( 

217 ["htm7"], 

218 where=( 

219 f"instrument={self.instrument!r} " 

220 f"and detector={self.detector!r} " 

221 f"and visit={self.visit!r}" 

222 f"and band={self.band!r} " 

223 f"and physical_filter={self.physical_filter!r} " 

224 ), 

225 ), 

226 ) 

227 # We can't constrain any other skypix system spatially, because we 

228 # don't have overlap rows for those in the database. But in the future 

229 # we might be able to fake it with an iteration-engine spatial join, or 

230 # utilize explicitly-materialized overlaps. 

231 with self.assertRaises(MissingSpatialOverlapError): 

232 self.registry.queryDataIds( 

233 ["htm11"], 

234 instrument=self.instrument, 

235 visit=self.visit, 

236 detector=self.detector, 

237 ) 

238 # Constrain a regular spatial dimension (patch) from a non-common 

239 # skypix dimension common. In general this requires post-query 

240 # filtering to get only the patches that overlap the skypix pixel. We 

241 # could special-case skypix dimensions that are coarser than the common 

242 # dimension and part of the same system to simplify both the SQL query 

243 # and avoid post-query filtering, but we don't at present. 

244 self.assert_relation_str( 

245 f""" 

246 Π[patch, skymap, tract]( 

247 σ[ 

248 regions_overlap( 

249 patch.region, 

250 {self.registry.dimensions["htm11"].pixelization.pixel(self.htm11)} 

251 ) 

252 ]( 

253 →[iteration]( 

254 select( 

255 Π[patch, patch.region, skymap, tract]( 

256 σ[htm7={self.htm7!r}]( 

257 patch_htm7_overlap ⋈ patch 

258 ) 

259 ) 

260 ) 

261 ) 

262 ) 

263 ) 

264 """, 

265 self.registry.queryDataIds(["patch"], htm11=self.htm11), 

266 ) 

267 # Constrain a regular spatial dimension (patch) from the common 

268 # skypix dimension. This does not require post-query filtering. 

269 self.assert_relation_str( 

270 f""" 

271 select( 

272 Π[patch, skymap, tract]( 

273 σ[htm7={self.htm7!r}]( 

274 patch_htm7_overlap 

275 ) 

276 ) 

277 ) 

278 """, 

279 self.registry.queryDataIds(["patch"], htm7=self.htm7), 

280 ) 

281 # Constrain a regular dimension (detector) via a different dimension 

282 # (visit) that combine together to define a more fine-grained region, 

283 # and also constrain via a skypix dimension other than the common one. 

284 # Once again we could special-case this for skypix dimensions that are 

285 # coarser than the common dimension in the same syste, but we don't. 

286 self.assert_relation_str( 

287 # This query also doesn't need visit or physical_filter joined in, 

288 # but we can live with that. 

289 f""" 

290 Π[detector, instrument]( 

291 σ[ 

292 regions_overlap( 

293 visit_detector_region.region, 

294 {self.registry.dimensions["htm11"].pixelization.pixel(self.htm11)} 

295 ) 

296 ]( 

297 →[iteration]( 

298 select( 

299 Π[detector, instrument, visit_detector_region.region]( 

300 σ[ 

301 instrument={self.instrument!r} 

302 and visit={self.visit!r} 

303 and band={self.band!r} 

304 and physical_filter={self.physical_filter!r} 

305 and htm7={self.htm7!r} 

306 ]( 

307 visit_detector_region_htm7_overlap 

308 ⋈ physical_filter 

309 ⋈ visit 

310 ⋈ visit_detector_region 

311 ) 

312 ) 

313 ) 

314 ) 

315 ) 

316 ) 

317 """, 

318 self.registry.queryDataIds( 

319 ["detector"], visit=self.visit, instrument=self.instrument, htm11=self.htm11 

320 ), 

321 ) 

322 # Constrain a regular dimension (detector) via a different dimension 

323 # (visit) that combine together to define a more fine-grained region, 

324 # and also constrain via the common-skypix system. 

325 self.assert_relation_str( 

326 # This query also doesn't need visit or physical_filter joined in, 

327 # but we can live with that. 

328 f""" 

329 select( 

330 Π[detector, instrument]( 

331 σ[ 

332 htm7={self.htm7!r} 

333 and instrument={self.instrument!r} 

334 and visit={self.visit!r} 

335 and band={self.band!r} 

336 and physical_filter={self.physical_filter!r} 

337 ]( 

338 visit_detector_region_htm7_overlap 

339 ⋈ physical_filter 

340 ⋈ visit 

341 ) 

342 ) 

343 ) 

344 """, 

345 self.registry.queryDataIds( 

346 ["detector"], visit=self.visit, instrument=self.instrument, htm7=self.htm7 

347 ), 

348 ) 

349 

350 

351if __name__ == "__main__": 

352 unittest.main()